Merge tag 'drm/panel/for-4.5-rc1' of git://anongit.freedesktop.org/tegra/linux into drm-next

drm/panel: Changes for v4.5-rc1

This set of changes brings in a few more helpers for DSI support as well
as a couple of new drivers and support for some more simple panels.

* tag 'drm/panel/for-4.5-rc1' of git://anongit.freedesktop.org/tegra/linux:
  drm/panel: simple: Add QiaoDian qd43003c0-40
  of: Add vendor prefix for QiaoDian Xianshi
  drm/panel: add kernel doc for size attributes in panel_desc
  drm/panel: simple: Add support for Kyocera TCG121XGLP panel
  devicetree: add vendor prefix for Kyocera Corporation
  drm/bridge: Remove gratuitous blank line
  drm/bridge: dw-hdmi: Use dashes in filenames
  drm/panel: Add Sharp LS043T1LE01 MIPI DSI panel
  dt-bindings: Add Sharp LS043T1LE01 panel binding
  drm/dsi: Add Turn On/Shutdown Peripheral command helpers
  drm/panel: Add Panasonic VVX10F034N00 MIPI DSI panel
  dt-bindings: Add Panasonic VVX10F034N00 panel binding
  drm/panel: simple: Add support for Innolux G121X1-L03
  drm/panel: simple: Add support for BOE TV080WUM-NL0
  dt-bindings: Add BOE TV080WUM-NL0 panel binding
  of: Add vendor prefix for BOE Technology Group
  drm/dsi: Add a helper to get bits per pixel of MIPI DSI pixel format
diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index 201dcd3..c66d641 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -124,6 +124,43 @@
     <para>
       [Insert diagram of typical DRM stack here]
     </para>
+  <sect1>
+    <title>Style Guidelines</title>
+    <para>
+      For consistency this documentation uses American English. Abbreviations
+      are written as all-uppercase, for example: DRM, KMS, IOCTL, CRTC, and so
+      on. To aid in reading, documentations make full use of the markup
+      characters kerneldoc provides: @parameter for function parameters, @member
+      for structure members, &amp;structure to reference structures and
+      function() for functions. These all get automatically hyperlinked if
+      kerneldoc for the referenced objects exists. When referencing entries in
+      function vtables please use -&gt;vfunc(). Note that kerneldoc does
+      not support referencing struct members directly, so please add a reference
+      to the vtable struct somewhere in the same paragraph or at least section.
+    </para>
+    <para>
+      Except in special situations (to separate locked from unlocked variants)
+      locking requirements for functions aren't documented in the kerneldoc.
+      Instead locking should be check at runtime using e.g.
+      <code>WARN_ON(!mutex_is_locked(...));</code>. Since it's much easier to
+      ignore documentation than runtime noise this provides more value. And on
+      top of that runtime checks do need to be updated when the locking rules
+      change, increasing the chances that they're correct. Within the
+      documentation the locking rules should be explained in the relevant
+      structures: Either in the comment for the lock explaining what it
+      protects, or data fields need a note about which lock protects them, or
+      both.
+    </para>
+    <para>
+      Functions which have a non-<code>void</code> return value should have a
+      section called "Returns" explaining the expected return values in
+      different cases and their meanings. Currently there's no consensus whether
+      that section name should be all upper-case or not, and whether it should
+      end in a colon or not. Go with the file-local style. Other common section
+      names are "Notes" with information for dangerous or tricky corner cases,
+      and "FIXME" where the interface could be cleaned up.
+    </para>
+  </sect1>
   </chapter>
 
   <!-- Internals -->
@@ -615,18 +652,6 @@
           <function>drm_gem_object_init</function>. Storage for private GEM
           objects must be managed by drivers.
         </para>
-        <para>
-          Drivers that do not need to extend GEM objects with private information
-          can call the <function>drm_gem_object_alloc</function> function to
-          allocate and initialize a struct <structname>drm_gem_object</structname>
-          instance. The GEM core will call the optional driver
-          <methodname>gem_init_object</methodname> operation after initializing
-          the GEM object with <function>drm_gem_object_init</function>.
-          <synopsis>int (*gem_init_object) (struct drm_gem_object *obj);</synopsis>
-        </para>
-        <para>
-          No alloc-and-init function exists for private GEM objects.
-        </para>
       </sect3>
       <sect3>
         <title>GEM Objects Lifetime</title>
@@ -635,10 +660,10 @@
           acquired and release by <function>calling drm_gem_object_reference</function>
           and <function>drm_gem_object_unreference</function> respectively. The
           caller must hold the <structname>drm_device</structname>
-          <structfield>struct_mutex</structfield> lock. As a convenience, GEM
-          provides the <function>drm_gem_object_reference_unlocked</function> and
-          <function>drm_gem_object_unreference_unlocked</function> functions that
-          can be called without holding the lock.
+	  <structfield>struct_mutex</structfield> lock when calling
+	  <function>drm_gem_object_reference</function>. As a convenience, GEM
+	  provides <function>drm_gem_object_unreference_unlocked</function>
+	  functions that can be called without holding the lock.
         </para>
         <para>
           When the last reference to a GEM object is released the GEM core calls
@@ -649,15 +674,9 @@
         </para>
         <para>
           <synopsis>void (*gem_free_object) (struct drm_gem_object *obj);</synopsis>
-          Drivers are responsible for freeing all GEM object resources, including
-          the resources created by the GEM core. If an mmap offset has been
-          created for the object (in which case
-          <structname>drm_gem_object</structname>::<structfield>map_list</structfield>::<structfield>map</structfield>
-          is not NULL) it must be freed by a call to
-          <function>drm_gem_free_mmap_offset</function>. The shmfs backing store
-          must be released by calling <function>drm_gem_object_release</function>
-          (that function can safely be called if no shmfs backing store has been
-          created).
+          Drivers are responsible for freeing all GEM object resources. This includes
+          the resources created by the GEM core, which need to be released with
+          <function>drm_gem_object_release</function>.
         </para>
       </sect3>
       <sect3>
@@ -740,17 +759,10 @@
           DRM identifies the GEM object to be mapped by a fake offset passed
           through the mmap offset argument. Prior to being mapped, a GEM object
           must thus be associated with a fake offset. To do so, drivers must call
-          <function>drm_gem_create_mmap_offset</function> on the object. The
-          function allocates a fake offset range from a pool and stores the
-          offset divided by PAGE_SIZE in
-          <literal>obj-&gt;map_list.hash.key</literal>. Care must be taken not to
-          call <function>drm_gem_create_mmap_offset</function> if a fake offset
-          has already been allocated for the object. This can be tested by
-          <literal>obj-&gt;map_list.map</literal> being non-NULL.
+          <function>drm_gem_create_mmap_offset</function> on the object.
         </para>
         <para>
           Once allocated, the fake offset value
-          (<literal>obj-&gt;map_list.hash.key &lt;&lt; PAGE_SHIFT</literal>)
           must be passed to the application in a driver-specific way and can then
           be used as the mmap offset argument.
         </para>
@@ -836,10 +848,11 @@
           abstracted from the client in libdrm.
         </para>
       </sect3>
-      <sect3>
-        <title>GEM Function Reference</title>
+    </sect2>
+    <sect2>
+      <title>GEM Function Reference</title>
 !Edrivers/gpu/drm/drm_gem.c
-      </sect3>
+!Iinclude/drm/drm_gem.h
     </sect2>
     <sect2>
       <title>VMA Offset Manager</title>
@@ -970,6 +983,7 @@
     <sect2>
       <title>Atomic Mode Setting Function Reference</title>
 !Edrivers/gpu/drm/drm_atomic.c
+!Idrivers/gpu/drm/drm_atomic.c
     </sect2>
     <sect2>
       <title>Frame Buffer Creation</title>
@@ -1197,137 +1211,6 @@
           pointer to CRTC functions.
         </para>
       </sect3>
-      <sect3 id="drm-kms-crtcops">
-        <title>CRTC Operations</title>
-        <sect4>
-          <title>Set Configuration</title>
-          <synopsis>int (*set_config)(struct drm_mode_set *set);</synopsis>
-          <para>
-            Apply a new CRTC configuration to the device. The configuration
-            specifies a CRTC, a frame buffer to scan out from, a (x,y) position in
-            the frame buffer, a display mode and an array of connectors to drive
-            with the CRTC if possible.
-          </para>
-          <para>
-            If the frame buffer specified in the configuration is NULL, the driver
-            must detach all encoders connected to the CRTC and all connectors
-            attached to those encoders and disable them.
-          </para>
-          <para>
-            This operation is called with the mode config lock held.
-          </para>
-          <note><para>
-	    Note that the drm core has no notion of restoring the mode setting
-	    state after resume, since all resume handling is in the full
-	    responsibility of the driver. The common mode setting helper library
-	    though provides a helper which can be used for this:
-	    <function>drm_helper_resume_force_mode</function>.
-          </para></note>
-        </sect4>
-        <sect4>
-          <title>Page Flipping</title>
-          <synopsis>int (*page_flip)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-                   struct drm_pending_vblank_event *event);</synopsis>
-          <para>
-            Schedule a page flip to the given frame buffer for the CRTC. This
-            operation is called with the mode config mutex held.
-          </para>
-          <para>
-            Page flipping is a synchronization mechanism that replaces the frame
-            buffer being scanned out by the CRTC with a new frame buffer during
-            vertical blanking, avoiding tearing. When an application requests a page
-            flip the DRM core verifies that the new frame buffer is large enough to
-            be scanned out by  the CRTC in the currently configured mode and then
-            calls the CRTC <methodname>page_flip</methodname> operation with a
-            pointer to the new frame buffer.
-          </para>
-          <para>
-            The <methodname>page_flip</methodname> operation schedules a page flip.
-            Once any pending rendering targeting the new frame buffer has
-            completed, the CRTC will be reprogrammed to display that frame buffer
-            after the next vertical refresh. The operation must return immediately
-            without waiting for rendering or page flip to complete and must block
-            any new rendering to the frame buffer until the page flip completes.
-          </para>
-          <para>
-            If a page flip can be successfully scheduled the driver must set the
-            <code>drm_crtc-&gt;fb</code> field to the new framebuffer pointed to
-            by <code>fb</code>. This is important so that the reference counting
-            on framebuffers stays balanced.
-          </para>
-          <para>
-            If a page flip is already pending, the
-            <methodname>page_flip</methodname> operation must return
-            -<errorname>EBUSY</errorname>.
-          </para>
-          <para>
-            To synchronize page flip to vertical blanking the driver will likely
-            need to enable vertical blanking interrupts. It should call
-            <function>drm_vblank_get</function> for that purpose, and call
-            <function>drm_vblank_put</function> after the page flip completes.
-          </para>
-          <para>
-            If the application has requested to be notified when page flip completes
-            the <methodname>page_flip</methodname> operation will be called with a
-            non-NULL <parameter>event</parameter> argument pointing to a
-            <structname>drm_pending_vblank_event</structname> instance. Upon page
-            flip completion the driver must call <methodname>drm_send_vblank_event</methodname>
-            to fill in the event and send to wake up any waiting processes.
-            This can be performed with
-            <programlisting><![CDATA[
-            spin_lock_irqsave(&dev->event_lock, flags);
-            ...
-            drm_send_vblank_event(dev, pipe, event);
-            spin_unlock_irqrestore(&dev->event_lock, flags);
-            ]]></programlisting>
-          </para>
-          <note><para>
-            FIXME: Could drivers that don't need to wait for rendering to complete
-            just add the event to <literal>dev-&gt;vblank_event_list</literal> and
-            let the DRM core handle everything, as for "normal" vertical blanking
-            events?
-          </para></note>
-          <para>
-            While waiting for the page flip to complete, the
-            <literal>event-&gt;base.link</literal> list head can be used freely by
-            the driver to store the pending event in a driver-specific list.
-          </para>
-          <para>
-            If the file handle is closed before the event is signaled, drivers must
-            take care to destroy the event in their
-            <methodname>preclose</methodname> operation (and, if needed, call
-            <function>drm_vblank_put</function>).
-          </para>
-        </sect4>
-        <sect4>
-          <title>Miscellaneous</title>
-          <itemizedlist>
-            <listitem>
-              <synopsis>void (*set_property)(struct drm_crtc *crtc,
-                     struct drm_property *property, uint64_t value);</synopsis>
-              <para>
-                Set the value of the given CRTC property to
-                <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-                for more information about properties.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-                        uint32_t start, uint32_t size);</synopsis>
-              <para>
-                Apply a gamma table to the device. The operation is optional.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*destroy)(struct drm_crtc *crtc);</synopsis>
-              <para>
-                Destroy the CRTC when not needed anymore. See
-                <xref linkend="drm-kms-init"/>.
-              </para>
-            </listitem>
-          </itemizedlist>
-        </sect4>
-      </sect3>
     </sect2>
     <sect2>
       <title>Planes (struct <structname>drm_plane</structname>)</title>
@@ -1344,7 +1227,7 @@
         <listitem>
         DRM_PLANE_TYPE_PRIMARY represents a "main" plane for a CRTC.  Primary
         planes are the planes operated upon by CRTC modesetting and flipping
-        operations described in <xref linkend="drm-kms-crtcops"/>.
+	operations described in the page_flip hook in <structname>drm_crtc_funcs</structname>.
         </listitem>
         <listitem>
         DRM_PLANE_TYPE_CURSOR represents a "cursor" plane for a CRTC.  Cursor
@@ -1381,52 +1264,6 @@
           primary plane with standard capabilities.
         </para>
       </sect3>
-      <sect3>
-        <title>Plane Operations</title>
-        <itemizedlist>
-          <listitem>
-            <synopsis>int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc,
-                        struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-                        unsigned int crtc_w, unsigned int crtc_h,
-                        uint32_t src_x, uint32_t src_y,
-                        uint32_t src_w, uint32_t src_h);</synopsis>
-            <para>
-              Enable and configure the plane to use the given CRTC and frame buffer.
-            </para>
-            <para>
-              The source rectangle in frame buffer memory coordinates is given by
-              the <parameter>src_x</parameter>, <parameter>src_y</parameter>,
-              <parameter>src_w</parameter> and <parameter>src_h</parameter>
-              parameters (as 16.16 fixed point values). Devices that don't support
-              subpixel plane coordinates can ignore the fractional part.
-            </para>
-            <para>
-              The destination rectangle in CRTC coordinates is given by the
-              <parameter>crtc_x</parameter>, <parameter>crtc_y</parameter>,
-              <parameter>crtc_w</parameter> and <parameter>crtc_h</parameter>
-              parameters (as integer values). Devices scale the source rectangle to
-              the destination rectangle. If scaling is not supported, and the source
-              rectangle size doesn't match the destination rectangle size, the
-              driver must return a -<errorname>EINVAL</errorname> error.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>int (*disable_plane)(struct drm_plane *plane);</synopsis>
-            <para>
-              Disable the plane. The DRM core calls this method in response to a
-              DRM_IOCTL_MODE_SETPLANE ioctl call with the frame buffer ID set to 0.
-              Disabled planes must not be processed by the CRTC.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>void (*destroy)(struct drm_plane *plane);</synopsis>
-            <para>
-              Destroy the plane when not needed anymore. See
-              <xref linkend="drm-kms-init"/>.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </sect3>
     </sect2>
     <sect2>
       <title>Encoders (struct <structname>drm_encoder</structname>)</title>
@@ -1483,27 +1320,6 @@
           encoders they want to use to a CRTC.
         </para>
       </sect3>
-      <sect3>
-        <title>Encoder Operations</title>
-        <itemizedlist>
-          <listitem>
-            <synopsis>void (*destroy)(struct drm_encoder *encoder);</synopsis>
-            <para>
-              Called to destroy the encoder when not needed anymore. See
-              <xref linkend="drm-kms-init"/>.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>void (*set_property)(struct drm_plane *plane,
-                     struct drm_property *property, uint64_t value);</synopsis>
-            <para>
-              Set the value of the given plane property to
-              <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-              for more information about properties.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </sect3>
     </sect2>
     <sect2>
       <title>Connectors (struct <structname>drm_connector</structname>)</title>
@@ -1707,27 +1523,6 @@
             connector_status_unknown.
           </para>
         </sect4>
-        <sect4>
-          <title>Miscellaneous</title>
-          <itemizedlist>
-            <listitem>
-              <synopsis>void (*set_property)(struct drm_connector *connector,
-                     struct drm_property *property, uint64_t value);</synopsis>
-              <para>
-                Set the value of the given connector property to
-                <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-                for more information about properties.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*destroy)(struct drm_connector *connector);</synopsis>
-              <para>
-                Destroy the connector when not needed anymore. See
-                <xref linkend="drm-kms-init"/>.
-              </para>
-            </listitem>
-          </itemizedlist>
-        </sect4>
       </sect3>
     </sect2>
     <sect2>
@@ -1854,83 +1649,7 @@
       entities.
     </para>
     <sect2>
-      <title>Helper Functions</title>
-      <itemizedlist>
-        <listitem>
-          <synopsis>int drm_crtc_helper_set_config(struct drm_mode_set *set);</synopsis>
-          <para>
-            The <function>drm_crtc_helper_set_config</function> helper function
-            is a CRTC <methodname>set_config</methodname> implementation. It
-            first tries to locate the best encoder for each connector by calling
-            the connector <methodname>best_encoder</methodname> helper
-            operation.
-          </para>
-          <para>
-            After locating the appropriate encoders, the helper function will
-            call the <methodname>mode_fixup</methodname> encoder and CRTC helper
-            operations to adjust the requested mode, or reject it completely in
-            which case an error will be returned to the application. If the new
-            configuration after mode adjustment is identical to the current
-            configuration the helper function will return without performing any
-            other operation.
-          </para>
-          <para>
-            If the adjusted mode is identical to the current mode but changes to
-            the frame buffer need to be applied, the
-            <function>drm_crtc_helper_set_config</function> function will call
-            the CRTC <methodname>mode_set_base</methodname> helper operation. If
-            the adjusted mode differs from the current mode, or if the
-            <methodname>mode_set_base</methodname> helper operation is not
-            provided, the helper function performs a full mode set sequence by
-            calling the <methodname>prepare</methodname>,
-            <methodname>mode_set</methodname> and
-            <methodname>commit</methodname> CRTC and encoder helper operations,
-            in that order.
-          </para>
-        </listitem>
-        <listitem>
-          <synopsis>void drm_helper_connector_dpms(struct drm_connector *connector, int mode);</synopsis>
-          <para>
-            The <function>drm_helper_connector_dpms</function> helper function
-            is a connector <methodname>dpms</methodname> implementation that
-            tracks power state of connectors. To use the function, drivers must
-            provide <methodname>dpms</methodname> helper operations for CRTCs
-            and encoders to apply the DPMS state to the device.
-          </para>
-          <para>
-            The mid-layer doesn't track the power state of CRTCs and encoders.
-            The <methodname>dpms</methodname> helper operations can thus be
-            called with a mode identical to the currently active mode.
-          </para>
-        </listitem>
-        <listitem>
-          <synopsis>int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-                                            uint32_t maxX, uint32_t maxY);</synopsis>
-          <para>
-            The <function>drm_helper_probe_single_connector_modes</function> helper
-            function is a connector <methodname>fill_modes</methodname>
-            implementation that updates the connection status for the connector
-            and then retrieves a list of modes by calling the connector
-            <methodname>get_modes</methodname> helper operation.
-          </para>
-         <para>
-            If the helper operation returns no mode, and if the connector status
-            is connector_status_connected, standard VESA DMT modes up to
-            1024x768 are automatically added to the modes list by a call to
-            <function>drm_add_modes_noedid</function>.
-          </para>
-          <para>
-            The function then filters out modes larger than
-            <parameter>max_width</parameter> and <parameter>max_height</parameter>
-            if specified. It finally calls the optional connector
-            <methodname>mode_valid</methodname> helper operation for each mode in
-            the probed list to check whether the mode is valid for the connector.
-          </para>
-        </listitem>
-      </itemizedlist>
-    </sect2>
-    <sect2>
-      <title>CRTC Helper Operations</title>
+      <title>Legacy CRTC Helper Operations</title>
       <itemizedlist>
         <listitem id="drm-helper-crtc-mode-fixup">
           <synopsis>bool (*mode_fixup)(struct drm_crtc *crtc,
@@ -2076,198 +1795,6 @@
             <function>drm_add_edid_modes</function> manually in that case.
           </para>
           <para>
-            When adding modes manually the driver creates each mode with a call to
-            <function>drm_mode_create</function> and must fill the following fields.
-            <itemizedlist>
-              <listitem>
-                <synopsis>__u32 type;</synopsis>
-                <para>
-                  Mode type bitmask, a combination of
-                  <variablelist>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_BUILTIN</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_CLOCK_C</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_CRTC_C</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>
-        DRM_MODE_TYPE_PREFERRED - The preferred mode for the connector
-                      </term>
-                      <listitem>
-                        <para>not used?</para>
-                      </listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_DEFAULT</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_USERDEF</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_DRIVER</term>
-                      <listitem>
-                        <para>
-                          The mode has been created by the driver (as opposed to
-                          to user-created modes).
-                        </para>
-                      </listitem>
-                    </varlistentry>
-                  </variablelist>
-                  Drivers must set the DRM_MODE_TYPE_DRIVER bit for all modes they
-                  create, and set the DRM_MODE_TYPE_PREFERRED bit for the preferred
-                  mode.
-                </para>
-              </listitem>
-              <listitem>
-                <synopsis>__u32 clock;</synopsis>
-                <para>Pixel clock frequency in kHz unit</para>
-              </listitem>
-              <listitem>
-                <synopsis>__u16 hdisplay, hsync_start, hsync_end, htotal;
-    __u16 vdisplay, vsync_start, vsync_end, vtotal;</synopsis>
-                <para>Horizontal and vertical timing information</para>
-                <screen><![CDATA[
-             Active                 Front           Sync           Back
-             Region                 Porch                          Porch
-    <-----------------------><----------------><-------------><-------------->
-
-      //////////////////////|
-     ////////////////////// |
-    //////////////////////  |..................               ................
-                                               _______________
-
-    <----- [hv]display ----->
-    <------------- [hv]sync_start ------------>
-    <--------------------- [hv]sync_end --------------------->
-    <-------------------------------- [hv]total ----------------------------->
-]]></screen>
-              </listitem>
-              <listitem>
-                <synopsis>__u16 hskew;
-    __u16 vscan;</synopsis>
-                <para>Unknown</para>
-              </listitem>
-              <listitem>
-                <synopsis>__u32 flags;</synopsis>
-                <para>
-                  Mode flags, a combination of
-                  <variablelist>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PHSYNC</term>
-                      <listitem><para>
-                        Horizontal sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NHSYNC</term>
-                      <listitem><para>
-                        Horizontal sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PVSYNC</term>
-                      <listitem><para>
-                        Vertical sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NVSYNC</term>
-                      <listitem><para>
-                        Vertical sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_INTERLACE</term>
-                      <listitem><para>
-                        Mode is interlaced
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_DBLSCAN</term>
-                      <listitem><para>
-                        Mode uses doublescan
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_CSYNC</term>
-                      <listitem><para>
-                        Mode uses composite sync
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PCSYNC</term>
-                      <listitem><para>
-                        Composite sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NCSYNC</term>
-                      <listitem><para>
-                        Composite sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_HSKEW</term>
-                      <listitem><para>
-                        hskew provided (not used?)
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_BCAST</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PIXMUX</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_DBLCLK</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_CLKDIV2</term>
-                      <listitem><para>
-                        ?
-                      </para></listitem>
-                    </varlistentry>
-                  </variablelist>
-                </para>
-                <para>
-                  Note that modes marked with the INTERLACE or DBLSCAN flags will be
-                  filtered out by
-                  <function>drm_helper_probe_single_connector_modes</function> if
-                  the connector's <structfield>interlace_allowed</structfield> or
-                  <structfield>doublescan_allowed</structfield> field is set to 0.
-                </para>
-              </listitem>
-              <listitem>
-                <synopsis>char name[DRM_DISPLAY_MODE_LEN];</synopsis>
-                <para>
-                  Mode name. The driver must call
-                  <function>drm_mode_set_name</function> to fill the mode name from
-                  <structfield>hdisplay</structfield>,
-                  <structfield>vdisplay</structfield> and interlace flag after
-                  filling the corresponding fields.
-                </para>
-              </listitem>
-            </itemizedlist>
-          </para>
-          <para>
             The <structfield>vrefresh</structfield> value is computed by
             <function>drm_helper_probe_single_connector_modes</function>.
           </para>
@@ -2327,8 +1854,12 @@
 !Edrivers/gpu/drm/drm_atomic_helper.c
     </sect2>
     <sect2>
-      <title>Modeset Helper Functions Reference</title>
-!Iinclude/drm/drm_crtc_helper.h
+      <title>Modeset Helper Reference for Common Vtables</title>
+!Iinclude/drm/drm_modeset_helper_vtables.h
+!Pinclude/drm/drm_modeset_helper_vtables.h overview
+    </sect2>
+    <sect2>
+      <title>Legacy CRTC/Modeset Helper Functions Reference</title>
 !Edrivers/gpu/drm/drm_crtc_helper.c
 !Pdrivers/gpu/drm/drm_crtc_helper.c overview
     </sect2>
@@ -4201,17 +3732,21 @@
       </sect2>
     </sect1>
     <sect1>
-      <title>GuC-based Command Submission</title>
+      <title>GuC</title>
       <sect2>
-        <title>GuC</title>
+        <title>GuC-specific firmware loader</title>
 !Pdrivers/gpu/drm/i915/intel_guc_loader.c GuC-specific firmware loader
 !Idrivers/gpu/drm/i915/intel_guc_loader.c
       </sect2>
       <sect2>
-        <title>GuC Client</title>
-!Pdrivers/gpu/drm/i915/i915_guc_submission.c GuC-based command submissison
+        <title>GuC-based command submission</title>
+!Pdrivers/gpu/drm/i915/i915_guc_submission.c GuC-based command submission
 !Idrivers/gpu/drm/i915/i915_guc_submission.c
       </sect2>
+      <sect2>
+        <title>GuC Firmware Layout</title>
+!Pdrivers/gpu/drm/i915/intel_guc_fwif.h GuC Firmware Layout
+      </sect2>
     </sect1>
 
     <sect1>
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index 31d1d65..c0d8788 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -587,7 +587,7 @@
 
   modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
       preaction=<preaction type> preop=<preop type> start_now=x
-      nowayout=x ifnum_to_use=n
+      nowayout=x ifnum_to_use=n panic_wdt_timeout=<t>
 
 ifnum_to_use specifies which interface the watchdog timer should use.
 The default is -1, which means to pick the first one registered.
@@ -597,7 +597,9 @@
 occur (if pretimeout is zero, then pretimeout will not be enabled).  Note
 that the pretimeout is the time before the final timeout.  So if the
 timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
-will occur in 40 second (10 seconds before the timeout).
+will occur in 40 second (10 seconds before the timeout). The panic_wdt_timeout
+is the value of timeout which is set on kernel panic, in order to let actions
+such as kdump to occur during panic.
 
 The action may be "reset", "power_cycle", or "power_off", and
 specifies what to do when the timer times out, and defaults to
@@ -634,6 +636,7 @@
 	ipmi_watchdog.preop=<preop type>
 	ipmi_watchdog.start_now=x
 	ipmi_watchdog.nowayout=x
+	ipmi_watchdog.panic_wdt_timeout=<t>
 
 The options are the same as the module parameter options.
 
diff --git a/Documentation/arm/keystone/Overview.txt b/Documentation/arm/keystone/Overview.txt
index f17bc4c..400c0c2 100644
--- a/Documentation/arm/keystone/Overview.txt
+++ b/Documentation/arm/keystone/Overview.txt
@@ -49,24 +49,6 @@
 The device tree documentation for the keystone machines are located at
         Documentation/devicetree/bindings/arm/keystone/keystone.txt
 
-Known issues & workaround
--------------------------
-
-Some of the device drivers used on keystone are re-used from that from
-DaVinci and other TI SoCs. These device drivers may use clock APIs directly.
-Some of the keystone specific drivers such as netcp uses run time power
-management API instead to enable clock. As this API has limitations on
-keystone, following workaround is needed to boot Linux.
-
-   Add 'clk_ignore_unused' to the bootargs env variable in u-boot. Otherwise
-   clock frameworks will try to disable clocks that are unused and disable
-   the hardware. This is because netcp related power domain and clock
-   domains are enabled in u-boot as run time power management API currently
-   doesn't enable clocks for netcp due to a limitation. This workaround is
-   expected to be removed in the future when proper API support becomes
-   available. Until then, this work around is needed.
-
-
 Document Author
 ---------------
 Murali Karicheri <m-karicheri2@ti.com>
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index 2f6c6ff..d8880ca 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -70,3 +70,6 @@
      parameter.
   1: The multi-queue block layer is instantiated with a hardware dispatch
      queue for each CPU node in the system.
+
+use_lightnvm=[0/1]: Default: 0
+  Register device with LightNVM. Requires blk-mq to be used.
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
index 64693f2..fe4a7a2 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
@@ -1,3 +1,20 @@
+Device-Tree bindings for Samsung Exynos Embedded DisplayPort Transmitter(eDP)
+
+DisplayPort is industry standard to accommodate the growing board adoption
+of digital display technology within the PC and CE industries.
+It consolidates the internal and external connection methods to reduce device
+complexity and cost. It also supports necessary features for important cross
+industry applications and provides performance scalability to enable the next
+generation of displays that feature higher color depths, refresh rates, and
+display resolutions.
+
+eDP (embedded display port) device is compliant with Embedded DisplayPort
+standard as follows,
+- DisplayPort standard 1.1a for Exynos5250 and Exynos5260.
+- DisplayPort standard 1.3 for Exynos5422s and Exynos5800.
+
+eDP resides between FIMD and panel or FIMD and bridge such as LVDS.
+
 The Exynos display port interface should be configured based on
 the type of panel connected to it.
 
@@ -66,8 +83,15 @@
 		Hotplug detect GPIO.
 			Indicates which GPIO should be used for hotplug
 			detection
-	-video interfaces: Device node can contain video interface port
-			    nodes according to [1].
+Video interfaces:
+  Device node can contain video interface port nodes according to [1].
+  The following are properties specific to those nodes:
+
+  endpoint node connected to bridge or panel node:
+   - remote-endpoint: specifies the endpoint in panel or bridge node.
+		      This node is required in all kinds of exynos dp
+		      to represent the connection between dp and bridge
+		      or dp and panel.
 
 [1]: Documentation/devicetree/bindings/media/video-interfaces.txt
 
@@ -111,9 +135,18 @@
 		};
 
 		ports {
-			port@0 {
+			port {
 				dp_out: endpoint {
-					remote-endpoint = <&bridge_in>;
+					remote-endpoint = <&dp_in>;
+				};
+			};
+		};
+
+		panel {
+			...
+			port {
+				dp_in: endpoint {
+					remote-endpoint = <&dp_out>;
 				};
 			};
 		};
diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt
index f344b9e..e7423be 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@@ -14,17 +14,20 @@
 - clocks: device clocks
   See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details.
 - clock-names: the following clocks are required:
-  * "bus_clk"
-  * "byte_clk"
-  * "core_clk"
-  * "core_mmss_clk"
-  * "iface_clk"
   * "mdp_core_clk"
+  * "iface_clk"
+  * "bus_clk"
+  * "core_mmss_clk"
+  * "byte_clk"
   * "pixel_clk"
+  * "core_clk"
+  For DSIv2, we need an additional clock:
+   * "src_clk"
 - vdd-supply: phandle to vdd regulator device node
 - vddio-supply: phandle to vdd-io regulator device node
 - vdda-supply: phandle to vdda regulator device node
 - qcom,dsi-phy: phandle to DSI PHY device node
+- syscon-sfpb: A phandle to mmss_sfpb syscon node (only for DSIv2)
 
 Optional properties:
 - panel@0: Node of panel connected to this DSI controller.
@@ -51,6 +54,7 @@
   * "qcom,dsi-phy-28nm-hpm"
   * "qcom,dsi-phy-28nm-lp"
   * "qcom,dsi-phy-20nm"
+  * "qcom,dsi-phy-28nm-8960"
 - reg: Physical base address and length of the registers of PLL, PHY and PHY
   regulator
 - reg-names: The names of register regions. The following regions are required:
diff --git a/Documentation/devicetree/bindings/display/msm/mdp.txt b/Documentation/devicetree/bindings/display/msm/mdp.txt
index 0833eda..a214f6c 100644
--- a/Documentation/devicetree/bindings/display/msm/mdp.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp.txt
@@ -2,18 +2,28 @@
 
 Required properties:
 - compatible:
-  * "qcom,mdp" - mdp4
+  * "qcom,mdp4" - mdp4
+  * "qcom,mdp5" - mdp5
 - reg: Physical base address and length of the controller's registers.
 - interrupts: The interrupt signal from the display controller.
 - connectors: array of phandles for output device(s)
 - clocks: device clocks
   See ../clocks/clock-bindings.txt for details.
-- clock-names: the following clocks are required:
-  * "core_clk"
-  * "iface_clk"
-  * "src_clk"
-  * "hdmi_clk"
-  * "mpd_clk"
+- clock-names: the following clocks are required.
+  For MDP4:
+   * "core_clk"
+   * "iface_clk"
+   * "lut_clk"
+   * "src_clk"
+   * "hdmi_clk"
+   * "mdp_clk"
+  For MDP5:
+   * "bus_clk"
+   * "iface_clk"
+   * "core_clk_src"
+   * "core_clk"
+   * "lut_clk" (some MDP5 versions may not need this)
+   * "vsync_clk"
 
 Optional properties:
 - gpus: phandle for gpu device
@@ -26,7 +36,7 @@
 	...
 
 	mdp: qcom,mdp@5100000 {
-		compatible = "qcom,mdp";
+		compatible = "qcom,mdp4";
 		reg = <0x05100000 0xf0000>;
 		interrupts = <GIC_SPI 75 0>;
 		connectors = <&hdmi>;
diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
index 0604d42..5fe9372 100644
--- a/Documentation/devicetree/bindings/media/exynos5-gsc.txt
+++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
@@ -7,6 +7,10 @@
 - reg: should contain G-Scaler physical address location and length.
 - interrupts: should contain G-Scaler interrupt number
 
+Optional properties:
+- samsung,sysreg: handle to syscon used to control the system registers to
+  set writeback input and destination
+
 Example:
 
 gsc_0:  gsc@0x13e00000 {
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index f5a8ca2..aeea50c 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -8,6 +8,11 @@
 - phy-mode: See ethernet.txt file in the same directory
 - clocks: a pointer to the reference clock for this device.
 
+Optional properties:
+- tx-csum-limit: maximum mtu supported by port that allow TX checksum.
+  Value is presented in bytes. If not used, by default 1600B is set for
+  "marvell,armada-370-neta" and 9800B for others.
+
 Example:
 
 ethernet@d0070000 {
@@ -15,6 +20,7 @@
 	reg = <0xd0070000 0x2500>;
 	interrupts = <8>;
 	clocks = <&gate_clk 4>;
+	tx-csum-limit = <9800>
 	status = "okay";
 	phy = <&phy0>;
 	phy-mode = "rgmii-id";
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
index b38200d..0dfa60d 100644
--- a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -1,7 +1,9 @@
 * Temperature Sensor ADC (TSADC) on rockchip SoCs
 
 Required properties:
-- compatible : "rockchip,rk3288-tsadc"
+- compatible : should be "rockchip,<name>-tsadc"
+   "rockchip,rk3288-tsadc": found on RK3288 SoCs
+   "rockchip,rk3368-tsadc": found on RK3368 SoCs
 - reg : physical base address of the controller and length of memory mapped
 	region.
 - interrupts : The interrupt number to the cpu. The interrupt specifier format
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index 6a4b1af..1bba38d 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -32,6 +32,7 @@
   * Intel Sunrise Point-LP (PCH)
   * Intel DNV (SOC)
   * Intel Broxton (SOC)
+  * Intel Lewisburg (PCH)
    Datasheets: Publicly available at the Intel website
 
 On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f8aae63..742f69d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1583,9 +1583,6 @@
 		hwp_only
 			Only load intel_pstate on systems which support
 			hardware P state control (HWP) if available.
-		no_acpi
-			Don't use ACPI processor performance control objects
-			_PSS and _PPC specified limits.
 
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
diff --git a/MAINTAINERS b/MAINTAINERS
index e9caa4b..69c8a9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -318,7 +318,7 @@
 L:	linux-acpi@vger.kernel.org
 W:	https://01.org/linux-acpi
 S:	Supported
-F:	drivers/acpi/video.c
+F:	drivers/acpi/acpi_video.c
 
 ACPI WMI DRIVER
 L:	platform-driver-x86@vger.kernel.org
@@ -1847,7 +1847,7 @@
 F:	drivers/net/wireless/ath/ath6kl/
 
 WILOCITY WIL6210 WIRELESS DRIVER
-M:	Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
+M:	Maya Erez <qca_merez@qca.qualcomm.com>
 L:	linux-wireless@vger.kernel.org
 L:	wil6210@qca.qualcomm.com
 S:	Supported
@@ -1931,7 +1931,7 @@
 F:	drivers/i2c/busses/i2c-at91.c
 
 ATMEL ISI DRIVER
-M:	Josh Wu <josh.wu@atmel.com>
+M:	Ludovic Desroches <ludovic.desroches@atmel.com>
 L:	linux-media@vger.kernel.org
 S:	Supported
 F:	drivers/media/platform/soc_camera/atmel-isi.c
@@ -1950,7 +1950,8 @@
 F:	drivers/net/ethernet/cadence/
 
 ATMEL NAND DRIVER
-M:	Josh Wu <josh.wu@atmel.com>
+M:	Wenyou Yang <wenyou.yang@atmel.com>
+M:	Josh Wu <rainyfeeling@outlook.com>
 L:	linux-mtd@lists.infradead.org
 S:	Supported
 F:	drivers/mtd/nand/atmel_nand*
@@ -2449,7 +2450,9 @@
 
 BROADCOM STB NAND FLASH DRIVER
 M:	Brian Norris <computersforpeace@gmail.com>
+M:	Kamal Dasu <kdasu.kdev@gmail.com>
 L:	linux-mtd@lists.infradead.org
+L:	bcm-kernel-feedback-list@broadcom.com
 S:	Maintained
 F:	drivers/mtd/nand/brcmnand/
 
@@ -2546,7 +2549,7 @@
 
 CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
-L:	linux-cachefs@redhat.com
+L:	linux-cachefs@redhat.com (moderated for non-subscribers)
 S:	Supported
 F:	Documentation/filesystems/caching/cachefiles.txt
 F:	fs/cachefiles/
@@ -2929,10 +2932,9 @@
 F:	drivers/platform/x86/compal-laptop.c
 
 CONEXANT ACCESSRUNNER USB DRIVER
-M:	Simon Arlott <cxacru@fire.lp0.eu>
 L:	accessrunner-general@lists.sourceforge.net
 W:	http://accessrunner.sourceforge.net/
-S:	Maintained
+S:	Orphan
 F:	drivers/usb/atm/cxacru.c
 
 CONFIGFS
@@ -4409,6 +4411,7 @@
 
 FPGA MANAGER FRAMEWORK
 M:	Alan Tull <atull@opensource.altera.com>
+R:	Moritz Fischer <moritz.fischer@ettus.com>
 S:	Maintained
 F:	drivers/fpga/
 F:	include/linux/fpga/fpga-mgr.h
@@ -4559,7 +4562,7 @@
 
 FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS
 M:	David Howells <dhowells@redhat.com>
-L:	linux-cachefs@redhat.com
+L:	linux-cachefs@redhat.com (moderated for non-subscribers)
 S:	Supported
 F:	Documentation/filesystems/caching/
 F:	fs/fscache/
@@ -5711,13 +5714,6 @@
 S:	Maintained
 F:	net/ipv4/netfilter/ipt_MASQUERADE.c
 
-IP1000A 10/100/1000 GIGABIT ETHERNET DRIVER
-M:	Francois Romieu <romieu@fr.zoreil.com>
-M:	Sorbica Shieh <sorbica@icplus.com.tw>
-L:	netdev@vger.kernel.org
-S:	Maintained
-F:	drivers/net/ethernet/icplus/ipg.*
-
 IPATH DRIVER
 M:	Mike Marciniszyn <infinipath@intel.com>
 L:	linux-rdma@vger.kernel.org
@@ -6371,6 +6367,7 @@
 LIGHTNVM PLATFORM SUPPORT
 M:	Matias Bjorling <mb@lightnvm.io>
 W:	http://github/OpenChannelSSD
+L:	linux-block@vger.kernel.org
 S:	Maintained
 F:	drivers/lightnvm/
 F:	include/linux/lightnvm.h
@@ -6923,13 +6920,21 @@
 F:	drivers/scsi/megaraid/
 
 MELLANOX ETHERNET DRIVER (mlx4_en)
-M:	Amir Vadai <amirv@mellanox.com>
+M: 	Eugenia Emantayev <eugenia@mellanox.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.mellanox.com
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlx4/en_*
 
+MELLANOX ETHERNET DRIVER (mlx5e)
+M:	Saeed Mahameed <saeedm@mellanox.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+W:	http://www.mellanox.com
+Q:	http://patchwork.ozlabs.org/project/netdev/list/
+F:	drivers/net/ethernet/mellanox/mlx5/core/en_*
+
 MELLANOX ETHERNET SWITCH DRIVERS
 M:	Jiri Pirko <jiri@mellanox.com>
 M:	Ido Schimmel <idosch@mellanox.com>
@@ -7901,6 +7906,18 @@
 F:	net/openvswitch/
 F:	include/uapi/linux/openvswitch.h
 
+OPERATING PERFORMANCE POINTS (OPP)
+M:	Viresh Kumar <vireshk@kernel.org>
+M:	Nishanth Menon <nm@ti.com>
+M:	Stephen Boyd <sboyd@codeaurora.org>
+L:	linux-pm@vger.kernel.org
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/vireshk/pm.git
+F:	drivers/base/power/opp/
+F:	include/linux/pm_opp.h
+F:	Documentation/power/opp.txt
+F:	Documentation/devicetree/bindings/opp/
+
 OPL4 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -9314,7 +9331,6 @@
 F:	include/linux/platform_data/i2c-designware.h
 
 SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
-M:	Seungwon Jeon <tgih.jun@samsung.com>
 M:	Jaehoon Chung <jh80.chung@samsung.com>
 L:	linux-mmc@vger.kernel.org
 S:	Maintained
@@ -9411,8 +9427,10 @@
 
 SCSI SUBSYSTEM
 M:	"James E.J. Bottomley" <JBottomley@odin.com>
-L:	linux-scsi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
+M:	"Martin K. Petersen" <martin.petersen@oracle.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
+L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/
 F:	include/scsi/
@@ -10887,9 +10905,9 @@
 F:	drivers/media/tuners/tua9001*
 
 TULIP NETWORK DRIVERS
-M:	Grant Grundler <grundler@parisc-linux.org>
 L:	netdev@vger.kernel.org
-S:	Maintained
+L:	linux-parisc@vger.kernel.org
+S:	Orphan
 F:	drivers/net/ethernet/dec/tulip/
 
 TUN/TAP driver
diff --git a/Makefile b/Makefile
index 3a0234f..d644f6e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc4
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index c92c0ef..f1ac981 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index cfac24e..323486d 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 9922a11..66191cd 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index f761a7c..f68838e 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index dc6f74f..96bd1c2 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 3fef0a2..fcae666 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 5178483..b01b659 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index ef35ef3..a07f20d 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index 634509e..f36c047 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -1,4 +1,4 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
+CONFIG_CROSS_COMPILE="arc-linux-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index ad481c2..258b0e5 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -37,6 +37,9 @@
 #define ISA_INIT_STATUS_BITS	(STATUS_IE_MASK | STATUS_AD_MASK | \
 					(ARCV2_IRQ_DEF_PRIO << 1))
 
+/* SLEEP needs default irq priority (<=) which can interrupt the doze */
+#define ISA_SLEEP_ARG		(0x10 | ARCV2_IRQ_DEF_PRIO)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index d8c6081..c1d3645 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -43,6 +43,8 @@
 
 #define ISA_INIT_STATUS_BITS	STATUS_IE_MASK
 
+#define ISA_SLEEP_ARG		0x3
+
 #ifndef __ASSEMBLY__
 
 /******************************************************************
diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c
index c14a5be..5d446df 100644
--- a/arch/arc/kernel/ctx_sw.c
+++ b/arch/arc/kernel/ctx_sw.c
@@ -58,8 +58,6 @@
 		"st      sp, [r24]       \n\t"
 #endif
 
-		"sync   \n\t"
-
 		/*
 		 * setup _current_task with incoming tsk.
 		 * optionally, set r25 to that as well
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index e248594..e6890b1 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -44,9 +44,6 @@
 	* don't need to do anything special to return it
 	*/
 
-	/* hardware memory barrier */
-	sync
-
 	/*
 	 * switch to new task, contained in r1
 	 * Temp reg r3 is required to get the ptr to store val
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 91d5a0f..a3f750e 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -44,11 +44,10 @@
 void arch_cpu_idle(void)
 {
 	/* sleep, but enable all interrupts before committing */
-	if (is_isa_arcompact()) {
-		__asm__("sleep 0x3");
-	} else {
-		__asm__("sleep 0x10");
-	}
+	__asm__ __volatile__(
+		"sleep %0	\n"
+		:
+		:"I"(ISA_SLEEP_ARG)); /* can't be "r" has to be embedded const */
 }
 
 asmlinkage void ret_from_fork(void);
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 93c6ea5..7352475 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -986,42 +986,13 @@
 							    (const u8 *)(fde +
 									 1) +
 							    *fde, ptrType);
-				if (pc >= endLoc)
+				if (pc >= endLoc) {
 					fde = NULL;
-			} else
-				fde = NULL;
-		}
-		if (fde == NULL) {
-			for (fde = table->address, tableSize = table->size;
-			     cie = NULL, tableSize > sizeof(*fde)
-			     && tableSize - sizeof(*fde) >= *fde;
-			     tableSize -= sizeof(*fde) + *fde,
-			     fde += 1 + *fde / sizeof(*fde)) {
-				cie = cie_for_fde(fde, table);
-				if (cie == &bad_cie) {
 					cie = NULL;
-					break;
 				}
-				if (cie == NULL
-				    || cie == &not_fde
-				    || (ptrType = fde_pointer_type(cie)) < 0)
-					continue;
-				ptr = (const u8 *)(fde + 2);
-				startLoc = read_pointer(&ptr,
-							(const u8 *)(fde + 1) +
-							*fde, ptrType);
-				if (!startLoc)
-					continue;
-				if (!(ptrType & DW_EH_PE_indirect))
-					ptrType &=
-					    DW_EH_PE_FORM | DW_EH_PE_signed;
-				endLoc =
-				    startLoc + read_pointer(&ptr,
-							    (const u8 *)(fde +
-									 1) +
-							    *fde, ptrType);
-				if (pc >= startLoc && pc < endLoc)
-					break;
+			} else {
+				fde = NULL;
+				cie = NULL;
 			}
 		}
 	}
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 0ee7398..daf2bf5 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -619,10 +619,10 @@
 
 		int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
 		if (dirty) {
-			/* wback + inv dcache lines */
+			/* wback + inv dcache lines (K-mapping) */
 			__flush_dcache_page(paddr, paddr);
 
-			/* invalidate any existing icache lines */
+			/* invalidate any existing icache lines (U-mapping) */
 			if (vma->vm_flags & VM_EXEC)
 				__inv_icache_page(paddr, vaddr);
 		}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0365cbb..34e1569 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -76,6 +76,8 @@
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_REL
 	select NO_BOOTMEM
+	select OF_EARLY_FLATTREE if OF
+	select OF_RESERVED_MEM if OF
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select PERF_USE_VMALLOC
@@ -1822,8 +1824,6 @@
 	bool "Flattened Device Tree support"
 	select IRQ_DOMAIN
 	select OF
-	select OF_EARLY_FLATTREE
-	select OF_RESERVED_MEM
 	help
 	  Include support for flattened device tree machine descriptions.
 
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index d9ba6b8..00352e7 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -604,6 +604,7 @@
 		reg = <0x6f>;
 		interrupts-extended = <&crossbar_mpu GIC_SPI 2 IRQ_TYPE_EDGE_RISING>,
 				      <&dra7_pmx_core 0x424>;
+		interrupt-names = "irq", "wakeup";
 
 		pinctrl-names = "default";
 		pinctrl-0 = <&mcp79410_pins_default>;
diff --git a/arch/arm/boot/dts/animeo_ip.dts b/arch/arm/boot/dts/animeo_ip.dts
index 4e0ad3b..0962f2f 100644
--- a/arch/arm/boot/dts/animeo_ip.dts
+++ b/arch/arm/boot/dts/animeo_ip.dts
@@ -155,21 +155,21 @@
 			label = "keyswitch_in";
 			gpios = <&pioB 1 GPIO_ACTIVE_HIGH>;
 			linux,code = <28>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		error_in {
 			label = "error_in";
 			gpios = <&pioB 2 GPIO_ACTIVE_HIGH>;
 			linux,code = <29>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		btn {
 			label = "btn";
 			gpios = <&pioC 23 GPIO_ACTIVE_HIGH>;
 			linux,code = <31>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index c6a0e9d..e8b7f67 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -498,6 +498,7 @@
 				reg = <0x70000 0x4000>;
 				interrupts-extended = <&mpic 8>;
 				clocks = <&gateclk 4>;
+				tx-csum-limit = <9800>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/at91-foxg20.dts b/arch/arm/boot/dts/at91-foxg20.dts
index f89598a..6bf873e 100644
--- a/arch/arm/boot/dts/at91-foxg20.dts
+++ b/arch/arm/boot/dts/at91-foxg20.dts
@@ -159,7 +159,7 @@
 			label = "Button";
 			gpios = <&pioC 4 GPIO_ACTIVE_LOW>;
 			linux,code = <0x103>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/at91-kizbox.dts b/arch/arm/boot/dts/at91-kizbox.dts
index bf18ece..229e989 100644
--- a/arch/arm/boot/dts/at91-kizbox.dts
+++ b/arch/arm/boot/dts/at91-kizbox.dts
@@ -24,15 +24,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <18432000>;
-		};
-
 		main_xtal {
 			clock-frequency = <18432000>;
 		};
@@ -94,14 +85,14 @@
 			label = "PB_RST";
 			gpios = <&pioB 30 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		user {
 			label = "PB_USER";
 			gpios = <&pioB 31 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x101>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-kizbox2.dts b/arch/arm/boot/dts/at91-kizbox2.dts
index f0b1563..50a1456 100644
--- a/arch/arm/boot/dts/at91-kizbox2.dts
+++ b/arch/arm/boot/dts/at91-kizbox2.dts
@@ -171,21 +171,21 @@
 			label = "PB_PROG";
 			gpios = <&pioE 27 GPIO_ACTIVE_LOW>;
 			linux,code = <0x102>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		reset {
 			label = "PB_RST";
 			gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		user {
 			label = "PB_USER";
 			gpios = <&pioE 31 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x101>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-kizboxmini.dts b/arch/arm/boot/dts/at91-kizboxmini.dts
index 9f72b49..9682d10 100644
--- a/arch/arm/boot/dts/at91-kizboxmini.dts
+++ b/arch/arm/boot/dts/at91-kizboxmini.dts
@@ -98,14 +98,14 @@
 			label = "PB_PROG";
 			gpios = <&pioC 17 GPIO_ACTIVE_LOW>;
 			linux,code = <0x102>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		reset {
 			label = "PB_RST";
 			gpios = <&pioC 16 GPIO_ACTIVE_LOW>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-qil_a9260.dts b/arch/arm/boot/dts/at91-qil_a9260.dts
index a9aef53..4f2eebf 100644
--- a/arch/arm/boot/dts/at91-qil_a9260.dts
+++ b/arch/arm/boot/dts/at91-qil_a9260.dts
@@ -183,7 +183,7 @@
 			label = "user_pb";
 			gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
 			linux,code = <28>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index e07c2b2..ad6de73 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -45,6 +45,7 @@
 /dts-v1/;
 #include "sama5d2.dtsi"
 #include "sama5d2-pinfunc.h"
+#include <dt-bindings/mfd/atmel-flexcom.h>
 
 / {
 	model = "Atmel SAMA5D2 Xplained";
@@ -59,15 +60,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -91,6 +83,22 @@
 			status = "okay";
 		};
 
+		sdmmc0: sdio-host@a0000000 {
+			bus-width = <8>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_sdmmc0_default>;
+			non-removable;
+			mmc-ddr-1_8v;
+			status = "okay";
+		};
+
+		sdmmc1: sdio-host@b0000000 {
+			bus-width = <4>;
+			pinctrl-names = "default";
+			pinctrl-0 = <&pinctrl_sdmmc1_default>;
+			status = "okay"; /* conflict with qspi0 */
+		};
+
 		apb {
 			spi0: spi@f8000000 {
 				pinctrl-names = "default";
@@ -181,12 +189,49 @@
 				};
 			};
 
+			flx0: flexcom@f8034000 {
+				atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_USART>;
+				status = "disabled"; /* conflict with ISC_D2 & ISC_D3 data pins */
+
+				uart5: serial@200 {
+					compatible = "atmel,at91sam9260-usart";
+					reg = <0x200 0x200>;
+					interrupts = <19 IRQ_TYPE_LEVEL_HIGH 7>;
+					clocks = <&flx0_clk>;
+					clock-names = "usart";
+					pinctrl-names = "default";
+					pinctrl-0 = <&pinctrl_flx0_default>;
+					atmel,fifo-size = <32>;
+					status = "okay";
+				};
+			};
+
 			uart3: serial@fc008000 {
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_uart3_default>;
 				status = "okay";
 			};
 
+			flx4: flexcom@fc018000 {
+				atmel,flexcom-mode = <ATMEL_FLEXCOM_MODE_TWI>;
+				status = "okay";
+
+				i2c2: i2c@600 {
+					compatible = "atmel,sama5d2-i2c";
+					reg = <0x600 0x200>;
+					interrupts = <23 IRQ_TYPE_LEVEL_HIGH 7>;
+					dmas = <0>, <0>;
+					dma-names = "tx", "rx";
+					#address-cells = <1>;
+					#size-cells = <0>;
+					clocks = <&flx4_clk>;
+					pinctrl-names = "default";
+					pinctrl-0 = <&pinctrl_flx4_default>;
+					atmel,fifo-size = <16>;
+					status = "okay";
+				};
+			};
+
 			i2c1: i2c@fc028000 {
 				dmas = <0>, <0>;
 				pinctrl-names = "default";
@@ -201,6 +246,18 @@
 			};
 
 			pinctrl@fc038000 {
+				pinctrl_flx0_default: flx0_default {
+					pinmux = <PIN_PB28__FLEXCOM0_IO0>,
+						 <PIN_PB29__FLEXCOM0_IO1>;
+					bias-disable;
+				};
+
+				pinctrl_flx4_default: flx4_default {
+					pinmux = <PIN_PD12__FLEXCOM4_IO0>,
+						 <PIN_PD13__FLEXCOM4_IO1>;
+					bias-disable;
+				};
+
 				pinctrl_i2c0_default: i2c0_default {
 					pinmux = <PIN_PD21__TWD0>,
 						 <PIN_PD22__TWCK0>;
@@ -227,6 +284,46 @@
 					bias-disable;
 				};
 
+				pinctrl_sdmmc0_default: sdmmc0_default {
+					cmd_data {
+						pinmux = <PIN_PA1__SDMMC0_CMD>,
+							 <PIN_PA2__SDMMC0_DAT0>,
+							 <PIN_PA3__SDMMC0_DAT1>,
+							 <PIN_PA4__SDMMC0_DAT2>,
+							 <PIN_PA5__SDMMC0_DAT3>,
+							 <PIN_PA6__SDMMC0_DAT4>,
+							 <PIN_PA7__SDMMC0_DAT5>,
+							 <PIN_PA8__SDMMC0_DAT6>,
+							 <PIN_PA9__SDMMC0_DAT7>;
+						bias-pull-up;
+					};
+
+					ck_cd_rstn_vddsel {
+						pinmux = <PIN_PA0__SDMMC0_CK>,
+							 <PIN_PA10__SDMMC0_RSTN>,
+							 <PIN_PA11__SDMMC0_VDDSEL>,
+							 <PIN_PA13__SDMMC0_CD>;
+						bias-disable;
+					};
+				};
+
+				pinctrl_sdmmc1_default: sdmmc1_default {
+					cmd_data {
+						pinmux = <PIN_PA28__SDMMC1_CMD>,
+							 <PIN_PA18__SDMMC1_DAT0>,
+							 <PIN_PA19__SDMMC1_DAT1>,
+							 <PIN_PA20__SDMMC1_DAT2>,
+							 <PIN_PA21__SDMMC1_DAT3>;
+						bias-pull-up;
+					};
+
+					conf-ck_cd {
+						pinmux = <PIN_PA22__SDMMC1_CK>,
+							 <PIN_PA30__SDMMC1_CD>;
+						bias-disable;
+					};
+				};
+
 				pinctrl_spi0_default: spi0_default {
 					pinmux = <PIN_PA14__SPI0_SPCK>,
 						 <PIN_PA15__SPI0_MOSI>,
diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
index 8488ac5..ff888d2 100644
--- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts
@@ -315,7 +315,7 @@
 			label = "PB_USER";
 			gpios = <&pioE 29 GPIO_ACTIVE_LOW>;
 			linux,code = <0x104>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
index 45371a1..131614f 100644
--- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts
@@ -50,7 +50,6 @@
 	compatible = "atmel,sama5d4-xplained", "atmel,sama5d4", "atmel,sama5";
 
 	chosen {
-		bootargs = "ignore_loglevel earlyprintk";
 		stdout-path = "serial0:115200n8";
 	};
 
@@ -59,15 +58,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -235,7 +225,7 @@
 			label = "pb_user1";
 			gpios = <&pioE 8 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91-sama5d4ek.dts b/arch/arm/boot/dts/at91-sama5d4ek.dts
index 6d272c0..2d4a331 100644
--- a/arch/arm/boot/dts/at91-sama5d4ek.dts
+++ b/arch/arm/boot/dts/at91-sama5d4ek.dts
@@ -50,7 +50,6 @@
 	compatible = "atmel,sama5d4ek", "atmel,sama5d4", "atmel,sama5";
 
 	chosen {
-		bootargs = "ignore_loglevel earlyprintk";
 		stdout-path = "serial0:115200n8";
 	};
 
@@ -59,15 +58,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -304,7 +294,7 @@
 			label = "pb_user1";
 			gpios = <&pioE 13 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91rm9200ek.dts b/arch/arm/boot/dts/at91rm9200ek.dts
index 8dab4b7..f90e1c2 100644
--- a/arch/arm/boot/dts/at91rm9200ek.dts
+++ b/arch/arm/boot/dts/at91rm9200ek.dts
@@ -21,15 +21,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <18432000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts
index 2e92ac0..55bd51f 100644
--- a/arch/arm/boot/dts/at91sam9261ek.dts
+++ b/arch/arm/boot/dts/at91sam9261ek.dts
@@ -22,15 +22,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <18432000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -149,7 +140,7 @@
 					ti,debounce-tol = /bits/ 16 <65535>;
 					ti,debounce-max = /bits/ 16 <1>;
 
-					linux,wakeup;
+					wakeup-source;
 				};
 			};
 
@@ -193,28 +184,28 @@
 			label = "button_0";
 			gpios = <&pioA 27 GPIO_ACTIVE_LOW>;
 			linux,code = <256>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		button_1 {
 			label = "button_1";
 			gpios = <&pioA 26 GPIO_ACTIVE_LOW>;
 			linux,code = <257>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		button_2 {
 			label = "button_2";
 			gpios = <&pioA 25 GPIO_ACTIVE_LOW>;
 			linux,code = <258>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		button_3 {
 			label = "button_3";
 			gpios = <&pioA 24 GPIO_ACTIVE_LOW>;
 			linux,code = <259>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/at91sam9263ek.dts b/arch/arm/boot/dts/at91sam9263ek.dts
index 2338127..59df9d7 100644
--- a/arch/arm/boot/dts/at91sam9263ek.dts
+++ b/arch/arm/boot/dts/at91sam9263ek.dts
@@ -22,15 +22,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <16367660>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -213,14 +204,14 @@
 			label = "left_click";
 			gpios = <&pioC 5 GPIO_ACTIVE_LOW>;
 			linux,code = <272>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		right_click {
 			label = "right_click";
 			gpios = <&pioC 4 GPIO_ACTIVE_LOW>;
 			linux,code = <273>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
index 57548a2..e9cc99b 100644
--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
@@ -19,15 +19,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <18432000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -206,14 +197,14 @@
 			label = "Button 3";
 			gpios = <&pioA 30 GPIO_ACTIVE_LOW>;
 			linux,code = <0x103>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		btn4 {
 			label = "Button 4";
 			gpios = <&pioA 31 GPIO_ACTIVE_LOW>;
 			linux,code = <0x104>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9m10g45ek.dts b/arch/arm/boot/dts/at91sam9m10g45ek.dts
index 9d16ef8..2400c99 100644
--- a/arch/arm/boot/dts/at91sam9m10g45ek.dts
+++ b/arch/arm/boot/dts/at91sam9m10g45ek.dts
@@ -24,15 +24,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-
 		slow_xtal {
 		      clock-frequency = <32768>;
 		};
@@ -323,14 +314,14 @@
 			label = "left_click";
 			gpios = <&pioB 6 GPIO_ACTIVE_LOW>;
 			linux,code = <272>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		right_click {
 			label = "right_click";
 			gpios = <&pioB 7 GPIO_ACTIVE_LOW>;
 			linux,code = <273>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		left {
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index acf3451..ca4ddf8 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -23,15 +23,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <16000000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -219,7 +210,7 @@
 			label = "Enter";
 			gpios = <&pioB 3 GPIO_ACTIVE_LOW>;
 			linux,code = <28>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9rlek.dts b/arch/arm/boot/dts/at91sam9rlek.dts
index 558c9f2..f10566f 100644
--- a/arch/arm/boot/dts/at91sam9rlek.dts
+++ b/arch/arm/boot/dts/at91sam9rlek.dts
@@ -22,15 +22,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
@@ -225,14 +216,14 @@
 			label = "right_click";
 			gpios = <&pioB 0 GPIO_ACTIVE_LOW>;
 			linux,code = <273>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 
 		left_click {
 			label = "left_click";
 			gpios = <&pioB 1 GPIO_ACTIVE_LOW>;
 			linux,code = <272>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/at91sam9x5cm.dtsi b/arch/arm/boot/dts/at91sam9x5cm.dtsi
index 26112eb..b098ad8 100644
--- a/arch/arm/boot/dts/at91sam9x5cm.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5cm.dtsi
@@ -13,17 +13,6 @@
 	};
 
 	clocks {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		main_clock: clock@0 {
-			compatible = "atmel,osc", "fixed-clock";
-			clock-frequency = <12000000>;
-		};
-	};
-
-	clocks {
 		slow_xtal {
 			clock-frequency = <32768>;
 		};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index bc672fb..fe99231 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1459,8 +1459,8 @@
 			interrupt-names = "tx", "rx";
 			dmas = <&sdma_xbar 133>, <&sdma_xbar 132>;
 			dma-names = "tx", "rx";
-			clocks = <&mcasp3_ahclkx_mux>;
-			clock-names = "fck";
+			clocks = <&mcasp3_aux_gfclk_mux>, <&mcasp3_ahclkx_mux>;
+			clock-names = "fck", "ahclkx";
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 49a4f43..1cc2e95 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -122,6 +122,12 @@
 		compatible = "auo,b133htn01";
 		power-supply = <&tps65090_fet6>;
 		backlight = <&backlight>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&dp_out>;
+			};
+		};
 	};
 
 	mmc1_pwrseq: mmc1_pwrseq {
@@ -148,7 +154,14 @@
 	samsung,link-rate = <0x0a>;
 	samsung,lane-count = <2>;
 	samsung,hpd-gpio = <&gpx2 6 GPIO_ACTIVE_HIGH>;
-	panel = <&panel>;
+
+	ports {
+		port {
+			dp_out: endpoint {
+				remote-endpoint = <&panel_in>;
+			};
+		};
+	};
 };
 
 &fimd {
diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index feb9d34..f818ea4 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -486,7 +486,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024000 0x200>;
 				interrupts = <56>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 0>;
 				status = "disabled";
 			};
@@ -495,7 +498,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024200 0x200>;
 				interrupts = <54>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 1>;
 				dr_mode = "host";
 				status = "disabled";
@@ -505,7 +511,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024400 0x200>;
 				interrupts = <55>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 2>;
 				dr_mode = "host";
 				status = "disabled";
@@ -515,7 +524,6 @@
 				#index-cells = <1>;
 				compatible = "fsl,imx27-usbmisc";
 				reg = <0x10024600 0x200>;
-				clocks = <&clks IMX27_CLK_USB_AHB_GATE>;
 			};
 
 			sahara2: sahara@10025000 {
diff --git a/arch/arm/boot/dts/k2l-netcp.dtsi b/arch/arm/boot/dts/k2l-netcp.dtsi
index 01aef23..5acbd0d 100644
--- a/arch/arm/boot/dts/k2l-netcp.dtsi
+++ b/arch/arm/boot/dts/k2l-netcp.dtsi
@@ -137,7 +137,7 @@
 	/* NetCP address range */
 	ranges = <0 0x26000000 0x1000000>;
 
-	clocks = <&papllclk>, <&clkcpgmac>, <&chipclk12>;
+	clocks = <&clkosr>, <&papllclk>, <&clkcpgmac>, <&chipclk12>;
 	dma-coherent;
 
 	ti,navigator-dmas = <&dma_gbe 0>,
diff --git a/arch/arm/boot/dts/kirkwood-ts219.dtsi b/arch/arm/boot/dts/kirkwood-ts219.dtsi
index c56ab6b..0e46560 100644
--- a/arch/arm/boot/dts/kirkwood-ts219.dtsi
+++ b/arch/arm/boot/dts/kirkwood-ts219.dtsi
@@ -40,7 +40,7 @@
 		};
 		poweroff@12100 {
 			compatible = "qnap,power-off";
-			reg = <0x12000 0x100>;
+			reg = <0x12100 0x100>;
 			clocks = <&gate_clk 7>;
 		};
 		spi@10600 {
diff --git a/arch/arm/boot/dts/rk3288-veyron-minnie.dts b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
index 8fd8ef2..85f0373 100644
--- a/arch/arm/boot/dts/rk3288-veyron-minnie.dts
+++ b/arch/arm/boot/dts/rk3288-veyron-minnie.dts
@@ -86,6 +86,10 @@
 	};
 };
 
+&emmc {
+	/delete-property/mmc-hs200-1_8v;
+};
+
 &gpio_keys {
 	pinctrl-0 = <&pwr_key_l &ap_lid_int_l &volum_down_l &volum_up_l>;
 
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 6a79c9c..04ea209 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -452,8 +452,10 @@
 		clock-names = "tsadc", "apb_pclk";
 		resets = <&cru SRST_TSADC>;
 		reset-names = "tsadc-apb";
-		pinctrl-names = "default";
-		pinctrl-0 = <&otp_out>;
+		pinctrl-names = "init", "default", "sleep";
+		pinctrl-0 = <&otp_gpio>;
+		pinctrl-1 = <&otp_out>;
+		pinctrl-2 = <&otp_gpio>;
 		#thermal-sensor-cells = <1>;
 		rockchip,hw-tshut-temp = <95000>;
 		status = "disabled";
@@ -1395,6 +1397,10 @@
 		};
 
 		tsadc {
+			otp_gpio: otp-gpio {
+				rockchip,pins = <0 10 RK_FUNC_GPIO &pcfg_pull_none>;
+			};
+
 			otp_out: otp-out {
 				rockchip,pins = <0 10 RK_FUNC_1 &pcfg_pull_none>;
 			};
diff --git a/arch/arm/boot/dts/sama5d35ek.dts b/arch/arm/boot/dts/sama5d35ek.dts
index d9a9aca..e812f5c 100644
--- a/arch/arm/boot/dts/sama5d35ek.dts
+++ b/arch/arm/boot/dts/sama5d35ek.dts
@@ -49,7 +49,7 @@
 			label = "pb_user1";
 			gpios = <&pioE 27 GPIO_ACTIVE_HIGH>;
 			linux,code = <0x100>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index 15bbaf6..2193637 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -1300,7 +1300,7 @@
 			};
 
 			watchdog@fc068640 {
-				compatible = "atmel,at91sam9260-wdt";
+				compatible = "atmel,sama5d4-wdt";
 				reg = <0xfc068640 0x10>;
 				clocks = <&clk32k>;
 				status = "disabled";
diff --git a/arch/arm/boot/dts/usb_a9260_common.dtsi b/arch/arm/boot/dts/usb_a9260_common.dtsi
index 12edafe..9beea89 100644
--- a/arch/arm/boot/dts/usb_a9260_common.dtsi
+++ b/arch/arm/boot/dts/usb_a9260_common.dtsi
@@ -115,7 +115,7 @@
 			label = "user_pb";
 			gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
 			linux,code = <28>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts
index 68c0de3..8cc6edb 100644
--- a/arch/arm/boot/dts/usb_a9263.dts
+++ b/arch/arm/boot/dts/usb_a9263.dts
@@ -143,7 +143,7 @@
 			label = "user_pb";
 			gpios = <&pioB 10 GPIO_ACTIVE_LOW>;
 			linux,code = <28>;
-			gpio-key,wakeup;
+			wakeup-source;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index 6736bae..0d5acc2 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -158,7 +158,7 @@
 				interrupts = <67 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks VF610_CLK_DSPI0>;
 				clock-names = "dspi";
-				spi-num-chipselects = <5>;
+				spi-num-chipselects = <6>;
 				status = "disabled";
 			};
 
@@ -170,7 +170,7 @@
 				interrupts = <68 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks VF610_CLK_DSPI1>;
 				clock-names = "dspi";
-				spi-num-chipselects = <5>;
+				spi-num-chipselects = <4>;
 				status = "disabled";
 			};
 
@@ -461,6 +461,8 @@
 				clock-names = "adc";
 				#io-channel-cells = <1>;
 				status = "disabled";
+				fsl,adck-max-frequency = <30000000>, <40000000>,
+							<20000000>;
 			};
 
 			esdhc0: esdhc@400b1000 {
@@ -472,8 +474,6 @@
 					<&clks VF610_CLK_ESDHC0>;
 				clock-names = "ipg", "ahb", "per";
 				status = "disabled";
-				fsl,adck-max-frequency = <30000000>, <40000000>,
-							<20000000>;
 			};
 
 			esdhc1: esdhc@400b2000 {
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index 1b1e5ac..e4b1be6 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -125,7 +125,6 @@
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_AT91SAM9X_WATCHDOG=y
-CONFIG_SSB=m
 CONFIG_MFD_ATMEL_HLCDC=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index a0c57ac..63f7e6c 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -129,7 +129,6 @@
 CONFIG_POWER_SUPPLY=y
 CONFIG_POWER_RESET=y
 # CONFIG_HWMON is not set
-CONFIG_SSB=m
 CONFIG_MFD_ATMEL_FLEXCOM=y
 CONFIG_REGULATOR=y
 CONFIG_REGULATOR_FIXED_VOLTAGE=y
diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index be1d07d..1bd9510de 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -40,6 +40,11 @@
 #define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
 #endif
 
+static inline int nr_legacy_irqs(void)
+{
+	return NR_IRQS_LEGACY;
+}
+
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a9c80a2..3095df0 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -28,6 +28,18 @@
 unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
 
+static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
+					 u8 reg_num)
+{
+	return *vcpu_reg(vcpu, reg_num);
+}
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+				unsigned long val)
+{
+	*vcpu_reg(vcpu, reg_num) = val;
+}
+
 bool kvm_condition_valid(struct kvm_vcpu *vcpu);
 void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index 7a2a32a1..ede692f 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -416,6 +416,7 @@
 #define __NR_execveat			(__NR_SYSCALL_BASE+387)
 #define __NR_userfaultfd		(__NR_SYSCALL_BASE+388)
 #define __NR_membarrier			(__NR_SYSCALL_BASE+389)
+#define __NR_mlock2			(__NR_SYSCALL_BASE+390)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 6551d28..066f7f9 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -17,11 +17,6 @@
 #include <asm/mach/pci.h>
 
 static int debug_pci;
-static resource_size_t (*align_resource)(struct pci_dev *dev,
-		  const struct resource *res,
-		  resource_size_t start,
-		  resource_size_t size,
-		  resource_size_t align) = NULL;
 
 /*
  * We can't use pci_get_device() here since we are
@@ -461,7 +456,6 @@
 		sys->busnr   = busnr;
 		sys->swizzle = hw->swizzle;
 		sys->map_irq = hw->map_irq;
-		align_resource = hw->align_resource;
 		INIT_LIST_HEAD(&sys->resources);
 
 		if (hw->private_data)
@@ -470,6 +464,8 @@
 		ret = hw->setup(nr, sys);
 
 		if (ret > 0) {
+			struct pci_host_bridge *host_bridge;
+
 			ret = pcibios_init_resources(nr, sys);
 			if (ret)  {
 				kfree(sys);
@@ -491,6 +487,9 @@
 			busnr = sys->bus->busn_res.end + 1;
 
 			list_add(&sys->node, head);
+
+			host_bridge = pci_find_host_bridge(sys->bus);
+			host_bridge->align_resource = hw->align_resource;
 		} else {
 			kfree(sys);
 			if (ret < 0)
@@ -578,14 +577,18 @@
 {
 	struct pci_dev *dev = data;
 	resource_size_t start = res->start;
+	struct pci_host_bridge *host_bridge;
 
 	if (res->flags & IORESOURCE_IO && start & 0x300)
 		start = (start + 0x3ff) & ~0x3ff;
 
 	start = (start + align - 1) & ~(align - 1);
 
-	if (align_resource)
-		return align_resource(dev, res, start, size, align);
+	host_bridge = pci_find_host_bridge(dev->bus);
+
+	if (host_bridge->align_resource)
+		return host_bridge->align_resource(dev, res,
+				start, size, align);
 
 	return start;
 }
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index fde6c88..ac368bb 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -399,6 +399,7 @@
 		CALL(sys_execveat)
 		CALL(sys_userfaultfd)
 		CALL(sys_membarrier)
+		CALL(sys_mlock2)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index eab83b2..e06fd29 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -564,17 +564,12 @@
 			vcpu_sleep(vcpu);
 
 		/*
-		 * Disarming the background timer must be done in a
-		 * preemptible context, as this call may sleep.
-		 */
-		kvm_timer_flush_hwstate(vcpu);
-
-		/*
 		 * Preparing the interrupts to be injected also
 		 * involves poking the GIC, which must be done in a
 		 * non-preemptible context.
 		 */
 		preempt_disable();
+		kvm_timer_flush_hwstate(vcpu);
 		kvm_vgic_flush_hwstate(vcpu);
 
 		local_irq_disable();
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 974b1c6..3a10c9f 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -115,7 +115,7 @@
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
 			       data);
 		data = vcpu_data_host_to_guest(vcpu, data, len);
-		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
+		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
 	}
 
 	return 0;
@@ -186,7 +186,8 @@
 	rt = vcpu->arch.mmio_decode.rt;
 
 	if (is_write) {
-		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
+					       len);
 
 		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
 		mmio_write_buf(data_buf, len, data);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 6984342..61d96a6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -98,6 +98,11 @@
 	__kvm_flush_dcache_pud(pud);
 }
 
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+	return !pfn_valid(pfn);
+}
+
 /**
  * stage2_dissolve_pmd() - clear and flush huge PMD entry
  * @kvm:	pointer to kvm structure.
@@ -213,7 +218,7 @@
 			kvm_tlb_flush_vmid_ipa(kvm, addr);
 
 			/* No need to invalidate the cache for device mappings */
-			if ((pte_val(old_pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
 				kvm_flush_dcache_pte(old_pte);
 
 			put_page(virt_to_page(pte));
@@ -305,8 +310,7 @@
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if (!pte_none(*pte) &&
-		    (pte_val(*pte) & PAGE_S2_DEVICE) != PAGE_S2_DEVICE)
+		if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
 			kvm_flush_dcache_pte(*pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
@@ -1037,11 +1041,6 @@
 	return kvm_vcpu_dabt_iswrite(vcpu);
 }
 
-static bool kvm_is_device_pfn(unsigned long pfn)
-{
-	return !pfn_valid(pfn);
-}
-
 /**
  * stage2_wp_ptes - write protect PMD range
  * @pmd:	pointer to pmd entry
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0b55696..a9b3b90 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@
 	unsigned long context_id;
 	phys_addr_t target_pc;
 
-	cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
+	cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
@@ -94,8 +94,8 @@
 			return PSCI_RET_INVALID_PARAMS;
 	}
 
-	target_pc = *vcpu_reg(source_vcpu, 2);
-	context_id = *vcpu_reg(source_vcpu, 3);
+	target_pc = vcpu_get_reg(source_vcpu, 2);
+	context_id = vcpu_get_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -114,7 +114,7 @@
 	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
 	 * the general puspose registers are undefined upon CPU_ON.
 	 */
-	*vcpu_reg(vcpu, 0) = context_id;
+	vcpu_set_reg(vcpu, 0, context_id);
 	vcpu->arch.power_off = false;
 	smp_mb();		/* Make sure the above is visible */
 
@@ -134,8 +134,8 @@
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *tmp;
 
-	target_affinity = *vcpu_reg(vcpu, 1);
-	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+	target_affinity = vcpu_get_reg(vcpu, 1);
+	lowest_affinity_level = vcpu_get_reg(vcpu, 2);
 
 	/* Determine target affinity mask */
 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
@@ -209,7 +209,7 @@
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
 	int ret = 1;
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
 	switch (psci_fn) {
@@ -273,13 +273,13 @@
 		break;
 	}
 
-	*vcpu_reg(vcpu, 0) = val;
+	vcpu_set_reg(vcpu, 0, val);
 	return ret;
 }
 
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
 	switch (psci_fn) {
@@ -295,7 +295,7 @@
 		break;
 	}
 
-	*vcpu_reg(vcpu, 0) = val;
+	vcpu_set_reg(vcpu, 0, val);
 	return 1;
 }
 
diff --git a/arch/arm/mach-dove/include/mach/entry-macro.S b/arch/arm/mach-dove/include/mach/entry-macro.S
index 72d622b..df1d44b 100644
--- a/arch/arm/mach-dove/include/mach/entry-macro.S
+++ b/arch/arm/mach-dove/include/mach/entry-macro.S
@@ -18,13 +18,13 @@
 	@ check low interrupts
 	ldr	\irqstat, [\base, #IRQ_CAUSE_LOW_OFF]
 	ldr	\tmp, [\base, #IRQ_MASK_LOW_OFF]
-	mov	\irqnr, #31
+	mov	\irqnr, #32
 	ands	\irqstat, \irqstat, \tmp
 
 	@ if no low interrupts set, check high interrupts
 	ldreq	\irqstat, [\base, #IRQ_CAUSE_HIGH_OFF]
 	ldreq	\tmp, [\base, #IRQ_MASK_HIGH_OFF]
-	moveq	\irqnr, #63
+	moveq	\irqnr, #64
 	andeqs	\irqstat, \irqstat, \tmp
 
 	@ find first active interrupt source
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index 8e7976a..cfc696b 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -177,6 +177,7 @@
 	.irq_unmask		= imx_gpc_irq_unmask,
 	.irq_retrigger		= irq_chip_retrigger_hierarchy,
 	.irq_set_wake		= imx_gpc_irq_set_wake,
+	.irq_set_type           = irq_chip_set_type_parent,
 #ifdef CONFIG_SMP
 	.irq_set_affinity	= irq_chip_set_affinity_parent,
 #endif
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 5305ec7..79e1f87 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -143,9 +143,9 @@
 		 * Ensure that CPU power state is set to ON to avoid CPU
 		 * powerdomain transition on wfi
 		 */
-		clkdm_wakeup(cpu1_clkdm);
-		omap_set_pwrdm_state(cpu1_pwrdm, PWRDM_POWER_ON);
-		clkdm_allow_idle(cpu1_clkdm);
+		clkdm_wakeup_nolock(cpu1_clkdm);
+		pwrdm_set_next_pwrst(cpu1_pwrdm, PWRDM_POWER_ON);
+		clkdm_allow_idle_nolock(cpu1_clkdm);
 
 		if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD)) {
 			while (gic_dist_disabled()) {
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cc8a9871..48495ad 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -890,6 +890,36 @@
 	return ret;
 }
 
+static void _enable_optional_clocks(struct omap_hwmod *oh)
+{
+	struct omap_hwmod_opt_clk *oc;
+	int i;
+
+	pr_debug("omap_hwmod: %s: enabling optional clocks\n", oh->name);
+
+	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+		if (oc->_clk) {
+			pr_debug("omap_hwmod: enable %s:%s\n", oc->role,
+				 __clk_get_name(oc->_clk));
+			clk_enable(oc->_clk);
+		}
+}
+
+static void _disable_optional_clocks(struct omap_hwmod *oh)
+{
+	struct omap_hwmod_opt_clk *oc;
+	int i;
+
+	pr_debug("omap_hwmod: %s: disabling optional clocks\n", oh->name);
+
+	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
+		if (oc->_clk) {
+			pr_debug("omap_hwmod: disable %s:%s\n", oc->role,
+				 __clk_get_name(oc->_clk));
+			clk_disable(oc->_clk);
+		}
+}
+
 /**
  * _enable_clocks - enable hwmod main clock and interface clocks
  * @oh: struct omap_hwmod *
@@ -917,6 +947,9 @@
 			clk_enable(os->_clk);
 	}
 
+	if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+		_enable_optional_clocks(oh);
+
 	/* The opt clocks are controlled by the device driver. */
 
 	return 0;
@@ -948,41 +981,14 @@
 			clk_disable(os->_clk);
 	}
 
+	if (oh->flags & HWMOD_OPT_CLKS_NEEDED)
+		_disable_optional_clocks(oh);
+
 	/* The opt clocks are controlled by the device driver. */
 
 	return 0;
 }
 
-static void _enable_optional_clocks(struct omap_hwmod *oh)
-{
-	struct omap_hwmod_opt_clk *oc;
-	int i;
-
-	pr_debug("omap_hwmod: %s: enabling optional clocks\n", oh->name);
-
-	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
-		if (oc->_clk) {
-			pr_debug("omap_hwmod: enable %s:%s\n", oc->role,
-				 __clk_get_name(oc->_clk));
-			clk_enable(oc->_clk);
-		}
-}
-
-static void _disable_optional_clocks(struct omap_hwmod *oh)
-{
-	struct omap_hwmod_opt_clk *oc;
-	int i;
-
-	pr_debug("omap_hwmod: %s: disabling optional clocks\n", oh->name);
-
-	for (i = oh->opt_clks_cnt, oc = oh->opt_clks; i > 0; i--, oc++)
-		if (oc->_clk) {
-			pr_debug("omap_hwmod: disable %s:%s\n", oc->role,
-				 __clk_get_name(oc->_clk));
-			clk_disable(oc->_clk);
-		}
-}
-
 /**
  * _omap4_enable_module - enable CLKCTRL modulemode on OMAP4
  * @oh: struct omap_hwmod *
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index ca6df1a..76bce11 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -523,6 +523,8 @@
  * HWMOD_RECONFIG_IO_CHAIN: omap_hwmod code needs to reconfigure wake-up 
  *     events by calling _reconfigure_io_chain() when a device is enabled
  *     or idled.
+ * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to
+ *     operate and they need to be handled at the same time as the main_clk.
  */
 #define HWMOD_SWSUP_SIDLE			(1 << 0)
 #define HWMOD_SWSUP_MSTANDBY			(1 << 1)
@@ -538,6 +540,7 @@
 #define HWMOD_FORCE_MSTANDBY			(1 << 11)
 #define HWMOD_SWSUP_SIDLE_ACT			(1 << 12)
 #define HWMOD_RECONFIG_IO_CHAIN			(1 << 13)
+#define HWMOD_OPT_CLKS_NEEDED			(1 << 14)
 
 /*
  * omap_hwmod._int_flags definitions
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index 51d1ecb..ee4e044 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -1298,6 +1298,44 @@
 };
 
 /*
+ * 'mcasp' class
+ *
+ */
+static struct omap_hwmod_class_sysconfig dra7xx_mcasp_sysc = {
+	.sysc_offs	= 0x0004,
+	.sysc_flags	= SYSC_HAS_SIDLEMODE,
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+	.sysc_fields	= &omap_hwmod_sysc_type3,
+};
+
+static struct omap_hwmod_class dra7xx_mcasp_hwmod_class = {
+	.name	= "mcasp",
+	.sysc	= &dra7xx_mcasp_sysc,
+};
+
+/* mcasp3 */
+static struct omap_hwmod_opt_clk mcasp3_opt_clks[] = {
+	{ .role = "ahclkx", .clk = "mcasp3_ahclkx_mux" },
+};
+
+static struct omap_hwmod dra7xx_mcasp3_hwmod = {
+	.name		= "mcasp3",
+	.class		= &dra7xx_mcasp_hwmod_class,
+	.clkdm_name	= "l4per2_clkdm",
+	.main_clk	= "mcasp3_aux_gfclk_mux",
+	.flags		= HWMOD_OPT_CLKS_NEEDED,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER2_MCASP3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER2_MCASP3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= mcasp3_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(mcasp3_opt_clks),
+};
+
+/*
  * 'mmc' class
  *
  */
@@ -2566,6 +2604,22 @@
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
+/* l4_per2 -> mcasp3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__mcasp3 = {
+	.master		= &dra7xx_l4_per2_hwmod,
+	.slave		= &dra7xx_mcasp3_hwmod,
+	.clk		= "l4_root_clk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> mcasp3 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__mcasp3 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_mcasp3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
 /* l4_per1 -> elm */
 static struct omap_hwmod_ocp_if dra7xx_l4_per1__elm = {
 	.master		= &dra7xx_l4_per1_hwmod,
@@ -3308,6 +3362,8 @@
 	&dra7xx_l4_wkup__dcan1,
 	&dra7xx_l4_per2__dcan2,
 	&dra7xx_l4_per2__cpgmac0,
+	&dra7xx_l4_per2__mcasp3,
+	&dra7xx_l3_main_1__mcasp3,
 	&dra7xx_gmac__mdio,
 	&dra7xx_l4_cfg__dma_system,
 	&dra7xx_l3_main_1__dss,
diff --git a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
index b1288f5..6256052 100644
--- a/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_81xx_data.c
@@ -144,6 +144,7 @@
 	.name		= "l4_ls",
 	.clkdm_name	= "alwon_l3s_clkdm",
 	.class		= &l4_hwmod_class,
+	.flags		= HWMOD_NO_IDLEST,
 };
 
 /*
@@ -155,6 +156,7 @@
 	.name		= "l4_hs",
 	.clkdm_name	= "alwon_l3_med_clkdm",
 	.class		= &l4_hwmod_class,
+	.flags		= HWMOD_NO_IDLEST,
 };
 
 /* L3 slow -> L4 ls peripheral interface running at 125MHz */
@@ -850,6 +852,7 @@
 	.name		= "emac0",
 	.clkdm_name	= "alwon_ethernet_clkdm",
 	.class		= &dm816x_emac_hwmod_class,
+	.flags		= HWMOD_NO_IDLEST,
 };
 
 static struct omap_hwmod_ocp_if dm81xx_l4_hs__emac0 = {
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 1dfe346..5814477 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -24,9 +24,6 @@
 #include <linux/platform_data/iommu-omap.h>
 #include <linux/platform_data/wkup_m3.h>
 
-#include <asm/siginfo.h>
-#include <asm/signal.h>
-
 #include "common.h"
 #include "common-board-devices.h"
 #include "dss-common.h"
@@ -385,29 +382,6 @@
 }
 #endif /* CONFIG_ARCH_OMAP3 */
 
-#ifdef CONFIG_SOC_TI81XX
-static int fault_fixed_up;
-
-static int t410_abort_handler(unsigned long addr, unsigned int fsr,
-			      struct pt_regs *regs)
-{
-	if ((fsr == 0x406 || fsr == 0xc06) && !fault_fixed_up) {
-		pr_warn("External imprecise Data abort at addr=%#lx, fsr=%#x ignored.\n",
-			addr, fsr);
-		fault_fixed_up = 1;
-		return 0;
-	}
-
-	return 1;
-}
-
-static void __init t410_abort_init(void)
-{
-	hook_fault_code(16 + 6, t410_abort_handler, SIGBUS, BUS_OBJERR,
-			"imprecise external abort");
-}
-#endif
-
 #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5)
 static struct iommu_platform_data omap4_iommu_pdata = {
 	.reset_name = "mmu_cache",
@@ -536,9 +510,6 @@
 	{ "openpandora,omap3-pandora-600mhz", omap3_pandora_legacy_init, },
 	{ "openpandora,omap3-pandora-1ghz", omap3_pandora_legacy_init, },
 #endif
-#ifdef CONFIG_SOC_TI81XX
-	{ "hp,t410", t410_abort_init, },
-#endif
 #ifdef CONFIG_SOC_OMAP5
 	{ "ti,omap5-uevm", omap5_uevm_legacy_init, },
 #endif
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 87b98bf9..2dbd378 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -301,11 +301,11 @@
 	if (omap_irq_pending())
 		return;
 
-	trace_cpu_idle(1, smp_processor_id());
+	trace_cpu_idle_rcuidle(1, smp_processor_id());
 
 	omap_sram_idle();
 
-	trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 #ifdef CONFIG_SUSPEND
diff --git a/arch/arm/mach-orion5x/include/mach/entry-macro.S b/arch/arm/mach-orion5x/include/mach/entry-macro.S
index 79eb502..73919a3 100644
--- a/arch/arm/mach-orion5x/include/mach/entry-macro.S
+++ b/arch/arm/mach-orion5x/include/mach/entry-macro.S
@@ -21,5 +21,5 @@
 	@ find cause bits that are unmasked
 	ands	\irqstat, \irqstat, \tmp	@ clear Z flag if any
 	clzne	\irqnr,	\irqstat		@ calc irqnr
-	rsbne	\irqnr, \irqnr, #31
+	rsbne	\irqnr, \irqnr, #32
 	.endm
diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c
index 13eba2b..8fbfb10 100644
--- a/arch/arm/mach-pxa/palm27x.c
+++ b/arch/arm/mach-pxa/palm27x.c
@@ -344,7 +344,7 @@
 {
 	palm_bl_power	= bl;
 	palm_lcd_power	= lcd;
-	pwm_add_lookup(palm27x_pwm_lookup, ARRAY_SIZE(palm27x_pwm_lookup));
+	pwm_add_table(palm27x_pwm_lookup, ARRAY_SIZE(palm27x_pwm_lookup));
 	platform_device_register(&palm27x_backlight);
 }
 #endif
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
index aebf6de..0b5c387 100644
--- a/arch/arm/mach-pxa/palmtc.c
+++ b/arch/arm/mach-pxa/palmtc.c
@@ -169,7 +169,7 @@
 #if defined(CONFIG_BACKLIGHT_PWM) || defined(CONFIG_BACKLIGHT_PWM_MODULE)
 static struct pwm_lookup palmtc_pwm_lookup[] = {
 	PWM_LOOKUP("pxa25x-pwm.1", 0, "pwm-backlight.0", NULL, PALMTC_PERIOD_NS,
-		   PWM_PERIOD_NORMAL),
+		   PWM_POLARITY_NORMAL),
 };
 
 static struct platform_pwm_backlight_data palmtc_backlight_data = {
diff --git a/arch/arm/mach-shmobile/setup-r8a7793.c b/arch/arm/mach-shmobile/setup-r8a7793.c
index 1d2825c..5fce87f 100644
--- a/arch/arm/mach-shmobile/setup-r8a7793.c
+++ b/arch/arm/mach-shmobile/setup-r8a7793.c
@@ -19,7 +19,7 @@
 #include "common.h"
 #include "rcar-gen2.h"
 
-static const char *r8a7793_boards_compat_dt[] __initconst = {
+static const char * const r8a7793_boards_compat_dt[] __initconst = {
 	"renesas,r8a7793",
 	NULL,
 };
diff --git a/arch/arm/mach-zx/Kconfig b/arch/arm/mach-zx/Kconfig
index 7fdc5bf..446334a 100644
--- a/arch/arm/mach-zx/Kconfig
+++ b/arch/arm/mach-zx/Kconfig
@@ -13,7 +13,7 @@
 	select ARM_GLOBAL_TIMER
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
-	select PM_GENERIC_DOMAINS
+	select PM_GENERIC_DOMAINS if PM
 	help
 	  Support for ZTE ZX296702 SoC which is a dual core CortexA9MP
 endif
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 2f4b14c..591f9db 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1061,7 +1061,7 @@
 	}
 	build_epilogue(&ctx);
 
-	flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx));
+	flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
 
 #if __LINUX_ARM_ARCH__ < 7
 	if (ctx.imm_count)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ac16a4..871f217 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -49,7 +49,7 @@
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
+	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
@@ -316,6 +316,27 @@
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_834220
+	bool "Cortex-A57: 834220: Stage 2 translation fault might be incorrectly reported in presence of a Stage 1 fault"
+	depends on KVM
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 834220 on Cortex-A57 parts up to r1p2.
+
+	  Affected Cortex-A57 parts might report a Stage 2 translation
+	  fault as the result of a Stage 1 fault for load crossing a
+	  page boundary when there is a permission or device memory
+	  alignment fault at Stage 1 and a translation fault at Stage 2.
+
+	  The workaround is to verify that the Stage 1 translation
+	  doesn't generate a fault before handling the Stage 2 fault.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_845719
 	bool "Cortex-A53: 845719: a load might read incorrect data"
 	depends on COMPAT
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index ce47792..f7bd9bf 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -237,7 +237,7 @@
 static struct crypto_alg aes_alg = {
 	.cra_name		= "aes",
 	.cra_driver_name	= "aes-ce",
-	.cra_priority		= 300,
+	.cra_priority		= 250,
 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 624f967..9622eb4 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -64,27 +64,31 @@
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1;						\
+	union { typeof(*p) __val; char __c[1]; } __u;			\
 	compiletime_assert_atomic_type(*p);				\
 	switch (sizeof(*p)) {						\
 	case 1:								\
 		asm volatile ("ldarb %w0, %1"				\
-			: "=r" (___p1) : "Q" (*p) : "memory");		\
+			: "=r" (*(__u8 *)__u.__c)			\
+			: "Q" (*p) : "memory");				\
 		break;							\
 	case 2:								\
 		asm volatile ("ldarh %w0, %1"				\
-			: "=r" (___p1) : "Q" (*p) : "memory");		\
+			: "=r" (*(__u16 *)__u.__c)			\
+			: "Q" (*p) : "memory");				\
 		break;							\
 	case 4:								\
 		asm volatile ("ldar %w0, %1"				\
-			: "=r" (___p1) : "Q" (*p) : "memory");		\
+			: "=r" (*(__u32 *)__u.__c)			\
+			: "Q" (*p) : "memory");				\
 		break;							\
 	case 8:								\
 		asm volatile ("ldar %0, %1"				\
-			: "=r" (___p1) : "Q" (*p) : "memory");		\
+			: "=r" (*(__u64 *)__u.__c)			\
+			: "Q" (*p) : "memory");				\
 		break;							\
 	}								\
-	___p1;								\
+	__u.__val;							\
 })
 
 #define read_barrier_depends()		do { } while(0)
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 7fbed69..eb8432b 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -23,7 +23,6 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/ptrace.h>
 
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
@@ -234,7 +233,7 @@
 	return (u32)(unsigned long)uptr;
 }
 
-#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs()))
+#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
 
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 11d5bb0f..8f271b8 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -29,8 +29,9 @@
 #define ARM64_HAS_PAN				4
 #define ARM64_HAS_LSE_ATOMICS			5
 #define ARM64_WORKAROUND_CAVIUM_23154		6
+#define ARM64_WORKAROUND_834220			7
 
-#define ARM64_NCAPS				7
+#define ARM64_NCAPS				8
 
 #ifndef __ASSEMBLY__
 
@@ -46,8 +47,12 @@
 #define FTR_STRICT	true	/* SANITY check strict matching required */
 #define FTR_NONSTRICT	false	/* SANITY check ignored */
 
+#define FTR_SIGNED	true	/* Value should be treated as signed */
+#define FTR_UNSIGNED	false	/* Value should be treated as unsigned */
+
 struct arm64_ftr_bits {
-	bool		strict;	  /* CPU Sanity check: strict matching required ? */
+	bool		sign;	/* Value is signed ? */
+	bool		strict;	/* CPU Sanity check: strict matching required ? */
 	enum ftr_type	type;
 	u8		shift;
 	u8		width;
@@ -123,6 +128,18 @@
 	return cpuid_feature_extract_field_width(features, field, 4);
 }
 
+static inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width)
+{
+	return (u64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline unsigned int __attribute_const__
+cpuid_feature_extract_unsigned_field(u64 features, int field)
+{
+	return cpuid_feature_extract_unsigned_field_width(features, field, 4);
+}
+
 static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
 {
 	return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -130,7 +147,9 @@
 
 static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
 {
-	return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+	return ftrp->sign ?
+		cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) :
+		cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width);
 }
 
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 54d0ead..61e08f3 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -18,7 +18,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/acpi.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 
@@ -26,22 +25,16 @@
 #include <asm/xen/hypervisor.h>
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
-extern struct dma_map_ops *dma_ops;
 extern struct dma_map_ops dummy_dma_ops;
 
 static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
-	if (unlikely(!dev))
-		return dma_ops;
-	else if (dev->archdata.dma_ops)
+	if (dev && dev->archdata.dma_ops)
 		return dev->archdata.dma_ops;
-	else if (acpi_disabled)
-		return dma_ops;
 
 	/*
-	 * When ACPI is enabled, if arch_set_dma_ops is not called,
-	 * we will disable device DMA capability by setting it
-	 * to dummy_dma_ops.
+	 * We expect no ISA devices, and all other DMA masters are expected to
+	 * have someone call arch_setup_dma_ops at device creation time.
 	 */
 	return &dummy_dma_ops;
 }
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index e54415e..9732908 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -138,16 +138,18 @@
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
+	u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
-		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+		cpuid_feature_extract_unsigned_field(dfr0,
 						ID_AA64DFR0_BRPS_SHIFT);
 }
 
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
+	u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
-		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+		cpuid_feature_extract_unsigned_field(dfr0,
 						ID_AA64DFR0_WRPS_SHIFT);
 }
 
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 23eb450..8e8d306 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -7,4 +7,9 @@
 
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
+static inline int nr_legacy_irqs(void)
+{
+	return 0;
+}
+
 #endif
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 17e92f0..25a4021 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -99,12 +99,22 @@
 	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
 }
 
-static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+/*
+ * vcpu_get_reg and vcpu_set_reg should always be passed a register number
+ * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
+ * AArch32 with banked registers.
+ */
+static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+					 u8 reg_num)
 {
-	if (vcpu_mode_is_32bit(vcpu))
-		return vcpu_reg32(vcpu, reg_num);
+	return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
 
-	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+				unsigned long val)
+{
+	if (reg_num != 31)
+		vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
 }
 
 /* Get vcpu SPSR for current mode */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index c0e8789..2416578 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -101,7 +101,7 @@
 #define destroy_context(mm)		do { } while(0)
 void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
-#define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
+#define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; })
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9819a94..7e074f9 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -81,6 +81,7 @@
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 24926f2..feb6b4e 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -75,6 +75,15 @@
 			   (1 << MIDR_VARIANT_SHIFT) | 2),
 	},
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_834220
+	{
+	/* Cortex-A57 r0p0 - r1p2 */
+		.desc = "ARM erratum 834220",
+		.capability = ARM64_WORKAROUND_834220,
+		MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+			   (1 << MIDR_VARIANT_SHIFT) | 2),
+	},
+#endif
 #ifdef CONFIG_ARM64_ERRATUM_845719
 	{
 	/* Cortex-A53 r0p[01234] */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c8cf892..0669c63 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -44,8 +44,9 @@
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
-#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
+		.sign = SIGNED,				\
 		.strict = STRICT,			\
 		.type = TYPE,				\
 		.shift = SHIFT,				\
@@ -53,6 +54,14 @@
 		.safe_val = SAFE_VAL,			\
 	}
 
+/* Define a feature with signed values */
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	__ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
+/* Define a feature with unsigned value */
+#define U_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	__ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL)
+
 #define ARM64_FTR_END					\
 	{						\
 		.width = 0,				\
@@ -99,7 +108,7 @@
 	 * Differing PARange is fine as long as all peripherals and memory are mapped
 	 * within the minimum PARange of all CPUs
 	 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -115,18 +124,18 @@
 };
 
 static struct arm64_ftr_bits ftr_ctr[] = {
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
 	 * make use of *minLine
 	 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),	/* L1Ip */
+	U_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),	/* L1Ip */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),	/* RAZ */
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
 	ARM64_FTR_END,
 };
 
@@ -144,12 +153,12 @@
 
 static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
 	ARM64_FTR_END,
 };
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 706679d..212ae63 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -30,6 +30,7 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/delay.h>
 
 /*
  * In case the boot CPU is hotpluggable, we record its initial state and
@@ -112,6 +113,10 @@
 		 */
 		seq_printf(m, "processor\t: %d\n", i);
 
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+			   loops_per_jiffy / (500000UL/HZ),
+			   loops_per_jiffy / (5000UL/HZ) % 100);
+
 		/*
 		 * Dump out the common processor features in a single line.
 		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index de46b50..4eeb171 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -127,7 +127,11 @@
 	table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
 	config_tables = early_memremap(efi_to_phys(efi.systab->tables),
 				       table_size);
-
+	if (config_tables == NULL) {
+		pr_warn("Unable to map EFI config table array.\n");
+		retval = -ENOMEM;
+		goto out;
+	}
 	retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
 					 sizeof(efi_config_table_64_t), NULL);
 
@@ -209,6 +213,14 @@
 			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK)));
 	memmap.phys_map = params.mmap;
 	memmap.map = early_memremap(params.mmap, params.mmap_size);
+	if (memmap.map == NULL) {
+		/*
+		* If we are booting via UEFI, the UEFI memory map is the only
+		* description of memory we have, so there is little point in
+		* proceeding if we cannot access it.
+		*/
+		panic("Unable to map EFI memory map.\n");
+	}
 	memmap.map_end = memmap.map + params.mmap_size;
 	memmap.desc_size = params.desc_size;
 	memmap.desc_version = params.desc_ver;
@@ -224,8 +236,9 @@
 {
 	efi_memory_desc_t *md;
 
+	init_new_context(NULL, &efi_mm);
+
 	for_each_efi_memory_desc(&memmap, md) {
-		u64 paddr, npages, size;
 		pgprot_t prot;
 
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
@@ -233,11 +246,6 @@
 		if (md->virt_addr == 0)
 			return false;
 
-		paddr = md->phys_addr;
-		npages = md->num_pages;
-		memrange_efi_to_native(&paddr, &npages);
-		size = npages << PAGE_SHIFT;
-
 		pr_info("  EFI remap 0x%016llx => %p\n",
 			md->phys_addr, (void *)md->virt_addr);
 
@@ -254,7 +262,9 @@
 		else
 			prot = PAGE_KERNEL;
 
-		create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
+		create_pgd_mapping(&efi_mm, md->phys_addr, md->virt_addr,
+				   md->num_pages << EFI_PAGE_SHIFT, 
+				   __pgprot(pgprot_val(prot) | PTE_NG));
 	}
 	return true;
 }
@@ -270,12 +280,12 @@
 
 	if (!efi_enabled(EFI_BOOT)) {
 		pr_info("EFI services will not be available.\n");
-		return -1;
+		return 0;
 	}
 
 	if (efi_runtime_disabled()) {
 		pr_info("EFI runtime services will be disabled.\n");
-		return -1;
+		return 0;
 	}
 
 	pr_info("Remapping and enabling EFI services.\n");
@@ -285,7 +295,7 @@
 						   mapsize);
 	if (!memmap.map) {
 		pr_err("Failed to remap EFI memory map\n");
-		return -1;
+		return -ENOMEM;
 	}
 	memmap.map_end = memmap.map + mapsize;
 	efi.memmap = &memmap;
@@ -294,13 +304,13 @@
 						   sizeof(efi_system_table_t));
 	if (!efi.systab) {
 		pr_err("Failed to remap EFI System Table\n");
-		return -1;
+		return -ENOMEM;
 	}
 	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
 
 	if (!efi_virtmap_init()) {
 		pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
-		return -1;
+		return -ENOMEM;
 	}
 
 	/* Set up runtime services function pointers */
@@ -329,14 +339,7 @@
 
 static void efi_set_pgd(struct mm_struct *mm)
 {
-	if (mm == &init_mm)
-		cpu_set_reserved_ttbr0();
-	else
-		cpu_switch_mm(mm->pgd, mm);
-
-	local_flush_tlb_all();
-	if (icache_is_aivivt())
-		__local_flush_icache_all();
+	switch_mm(NULL, mm, NULL);
 }
 
 void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index fce95e1..1095aa4 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -1,3 +1,4 @@
+#include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
@@ -71,6 +72,13 @@
 	local_dbg_save(flags);
 
 	/*
+	 * Function graph tracer state gets incosistent when the kernel
+	 * calls functions that never return (aka suspend finishers) hence
+	 * disable graph tracing during their execution.
+	 */
+	pause_graph_tracing();
+
+	/*
 	 * mm context saved on the stack, it will be restored when
 	 * the cpu comes out of reset through the identity mapped
 	 * page tables, so that the thread address space is properly
@@ -111,6 +119,8 @@
 			hw_breakpoint_restore(NULL);
 	}
 
+	unpause_graph_tracing();
+
 	/*
 	 * Restore pstate flags. OS lock and mdscr have been already
 	 * restored, so from this point onwards, debugging is fully
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 68a0759..15f0477 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -37,7 +37,7 @@
 {
 	int ret;
 
-	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
 
 	ret = kvm_psci_call(vcpu);
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1599701..86c2898 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -864,6 +864,10 @@
 ENDPROC(__kvm_flush_vm_context)
 
 __kvm_hyp_panic:
+	// Stash PAR_EL1 before corrupting it in __restore_sysregs
+	mrs	x0, par_el1
+	push	x0, xzr
+
 	// Guess the context by looking at VTTBR:
 	// If zero, then we're already a host.
 	// Otherwise restore a minimal host context before panicing.
@@ -898,7 +902,7 @@
 	mrs	x3, esr_el2
 	mrs	x4, far_el2
 	mrs	x5, hpfar_el2
-	mrs	x6, par_el1
+	pop	x6, xzr		// active context PAR_EL1
 	mrs	x7, tpidr_el2
 
 	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
@@ -914,7 +918,7 @@
 ENDPROC(__kvm_hyp_panic)
 
 __hyp_panic_str:
-	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+	.ascii	"HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
 
 	.align	2
 
@@ -1015,9 +1019,15 @@
 	b.ne	1f		// Not an abort we care about
 
 	/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
 	and	x2, x1, #ESR_ELx_FSC_TYPE
 	cmp	x2, #FSC_PERM
 	b.ne	1f		// Not a permission fault
+alternative_else
+	nop			// Use the permission fault path to
+	nop			// check for a valid S1 translation,
+	nop			// regardless of the ESR value.
+alternative_endif
 
 	/*
 	 * Check for Stage-1 page table walk, which is guaranteed
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 85c5715..648112e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -48,7 +48,7 @@
 
 	/* Note: These now point to the banked copies */
 	*vcpu_spsr(vcpu) = new_spsr_value;
-	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+	*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
 
 	/* Branch to exception vector */
 	if (sctlr & (1 << 13))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 87a64e8..d2650e8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -78,7 +78,7 @@
  * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
  */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (!p->is_write)
@@ -94,21 +94,19 @@
  * sys_regs and leave it in complete control of the caches.
  */
 static bool access_vm_reg(struct kvm_vcpu *vcpu,
-			  const struct sys_reg_params *p,
+			  struct sys_reg_params *p,
 			  const struct sys_reg_desc *r)
 {
-	unsigned long val;
 	bool was_enabled = vcpu_has_cache_enabled(vcpu);
 
 	BUG_ON(!p->is_write);
 
-	val = *vcpu_reg(vcpu, p->Rt);
 	if (!p->is_aarch32) {
-		vcpu_sys_reg(vcpu, r->reg) = val;
+		vcpu_sys_reg(vcpu, r->reg) = p->regval;
 	} else {
 		if (!p->is_32bit)
-			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
-		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+			vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
+		vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
 	}
 
 	kvm_toggle_cache(vcpu, was_enabled);
@@ -122,22 +120,19 @@
  * for both AArch64 and AArch32 accesses.
  */
 static bool access_gic_sgi(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *p,
+			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 {
-	u64 val;
-
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 
-	val = *vcpu_reg(vcpu, p->Rt);
-	vgic_v3_dispatch_sgi(vcpu, val);
+	vgic_v3_dispatch_sgi(vcpu, p->regval);
 
 	return true;
 }
 
 static bool trap_raz_wi(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (p->is_write)
@@ -147,19 +142,19 @@
 }
 
 static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *p,
+			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		p->regval = (1 << 3);
 		return true;
 	}
 }
 
 static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
-				   const struct sys_reg_params *p,
+				   struct sys_reg_params *p,
 				   const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
@@ -167,7 +162,7 @@
 	} else {
 		u32 val;
 		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
-		*vcpu_reg(vcpu, p->Rt) = val;
+		p->regval = val;
 		return true;
 	}
 }
@@ -200,17 +195,17 @@
  *   now use the debug registers.
  */
 static bool trap_debug_regs(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
-		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu_sys_reg(vcpu, r->reg) = p->regval;
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+		p->regval = vcpu_sys_reg(vcpu, r->reg);
 	}
 
-	trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
+	trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
 
 	return true;
 }
@@ -225,10 +220,10 @@
  * hyp.S code switches between host and guest values in future.
  */
 static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
-			      const struct sys_reg_params *p,
+			      struct sys_reg_params *p,
 			      u64 *dbg_reg)
 {
-	u64 val = *vcpu_reg(vcpu, p->Rt);
+	u64 val = p->regval;
 
 	if (p->is_32bit) {
 		val &= 0xffffffffUL;
@@ -240,19 +235,16 @@
 }
 
 static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
-			      const struct sys_reg_params *p,
+			      struct sys_reg_params *p,
 			      u64 *dbg_reg)
 {
-	u64 val = *dbg_reg;
-
+	p->regval = *dbg_reg;
 	if (p->is_32bit)
-		val &= 0xffffffffUL;
-
-	*vcpu_reg(vcpu, p->Rt) = val;
+		p->regval &= 0xffffffffUL;
 }
 
 static inline bool trap_bvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -294,7 +286,7 @@
 }
 
 static inline bool trap_bcr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
@@ -337,7 +329,7 @@
 }
 
 static inline bool trap_wvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
@@ -380,7 +372,7 @@
 }
 
 static inline bool trap_wcr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
@@ -687,7 +679,7 @@
 };
 
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
@@ -697,23 +689,23 @@
 		u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
 		u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
-		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
-					  (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
-					  (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
-					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+			     (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+			     (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+			     | (6 << 16) | (el3 << 14) | (el3 << 12));
 		return true;
 	}
 }
 
 static bool trap_debug32(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *p,
+			 struct sys_reg_params *p,
 			 const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
-		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu_cp14(vcpu, r->reg) = p->regval;
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+		p->regval = vcpu_cp14(vcpu, r->reg);
 	}
 
 	return true;
@@ -731,7 +723,7 @@
  */
 
 static inline bool trap_xvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -740,12 +732,12 @@
 		u64 val = *dbg_reg;
 
 		val &= 0xffffffffUL;
-		val |= *vcpu_reg(vcpu, p->Rt) << 32;
+		val |= p->regval << 32;
 		*dbg_reg = val;
 
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
+		p->regval = *dbg_reg >> 32;
 	}
 
 	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -991,7 +983,7 @@
  * Return 0 if the access has been handled, and -1 if not.
  */
 static int emulate_cp(struct kvm_vcpu *vcpu,
-		      const struct sys_reg_params *params,
+		      struct sys_reg_params *params,
 		      const struct sys_reg_desc *table,
 		      size_t num)
 {
@@ -1062,12 +1054,12 @@
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt = (hsr >> 5) & 0xf;
 	int Rt2 = (hsr >> 10) & 0xf;
 
 	params.is_aarch32 = true;
 	params.is_32bit = false;
 	params.CRm = (hsr >> 1) & 0xf;
-	params.Rt = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
 
 	params.Op0 = 0;
@@ -1076,15 +1068,12 @@
 	params.CRn = 0;
 
 	/*
-	 * Massive hack here. Store Rt2 in the top 32bits so we only
-	 * have one register to deal with. As we use the same trap
+	 * Make a 64-bit value out of Rt and Rt2. As we use the same trap
 	 * backends between AArch32 and AArch64, we get away with it.
 	 */
 	if (params.is_write) {
-		u64 val = *vcpu_reg(vcpu, params.Rt);
-		val &= 0xffffffff;
-		val |= *vcpu_reg(vcpu, Rt2) << 32;
-		*vcpu_reg(vcpu, params.Rt) = val;
+		params.regval = vcpu_get_reg(vcpu, Rt) & 0xffffffff;
+		params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
 	}
 
 	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
@@ -1095,11 +1084,10 @@
 	unhandled_cp_access(vcpu, &params);
 
 out:
-	/* Do the opposite hack for the read side */
+	/* Split up the value between registers for the read side */
 	if (!params.is_write) {
-		u64 val = *vcpu_reg(vcpu, params.Rt);
-		val >>= 32;
-		*vcpu_reg(vcpu, Rt2) = val;
+		vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
+		vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
 	}
 
 	return 1;
@@ -1118,21 +1106,24 @@
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt  = (hsr >> 5) & 0xf;
 
 	params.is_aarch32 = true;
 	params.is_32bit = true;
 	params.CRm = (hsr >> 1) & 0xf;
-	params.Rt  = (hsr >> 5) & 0xf;
+	params.regval = vcpu_get_reg(vcpu, Rt);
 	params.is_write = ((hsr & 1) == 0);
 	params.CRn = (hsr >> 10) & 0xf;
 	params.Op0 = 0;
 	params.Op1 = (hsr >> 14) & 0x7;
 	params.Op2 = (hsr >> 17) & 0x7;
 
-	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+	    !emulate_cp(vcpu, &params, global, nr_global)) {
+		if (!params.is_write)
+			vcpu_set_reg(vcpu, Rt, params.regval);
 		return 1;
-	if (!emulate_cp(vcpu, &params, global, nr_global))
-		return 1;
+	}
 
 	unhandled_cp_access(vcpu, &params);
 	return 1;
@@ -1175,7 +1166,7 @@
 }
 
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *params)
+			   struct sys_reg_params *params)
 {
 	size_t num;
 	const struct sys_reg_desc *table, *r;
@@ -1230,6 +1221,8 @@
 {
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+	int Rt = (esr >> 5) & 0x1f;
+	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
 
@@ -1240,10 +1233,14 @@
 	params.CRn = (esr >> 10) & 0xf;
 	params.CRm = (esr >> 1) & 0xf;
 	params.Op2 = (esr >> 17) & 0x7;
-	params.Rt = (esr >> 5) & 0x1f;
+	params.regval = vcpu_get_reg(vcpu, Rt);
 	params.is_write = !(esr & 1);
 
-	return emulate_sys_reg(vcpu, &params);
+	ret = emulate_sys_reg(vcpu, &params);
+
+	if (!params.is_write)
+		vcpu_set_reg(vcpu, Rt, params.regval);
+	return ret;
 }
 
 /******************************************************************************
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index eaa324e..dbbb01c 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -28,7 +28,7 @@
 	u8	CRn;
 	u8	CRm;
 	u8	Op2;
-	u8	Rt;
+	u64	regval;
 	bool	is_write;
 	bool	is_aarch32;
 	bool	is_32bit;	/* Only valid if is_aarch32 is true */
@@ -44,7 +44,7 @@
 
 	/* Trapped access from guest, if non-NULL. */
 	bool (*access)(struct kvm_vcpu *,
-		       const struct sys_reg_params *,
+		       struct sys_reg_params *,
 		       const struct sys_reg_desc *);
 
 	/* Initialization for vcpu. */
@@ -77,9 +77,9 @@
 }
 
 static inline bool read_zero(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_params *p)
+			     struct sys_reg_params *p)
 {
-	*vcpu_reg(vcpu, p->Rt) = 0;
+	p->regval = 0;
 	return true;
 }
 
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 1e45768..ed90578 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -31,13 +31,13 @@
 #include "sys_regs.h"
 
 static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *p,
+			 struct sys_reg_params *p,
 			 const struct sys_reg_desc *r)
 {
 	if (p->is_write)
 		return ignore_write(vcpu, p);
 
-	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
 	return true;
 }
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index f636a26..e87f53f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -76,13 +76,28 @@
 		__flush_icache_all();
 }
 
-static int is_reserved_asid(u64 asid)
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
 {
 	int cpu;
-	for_each_possible_cpu(cpu)
-		if (per_cpu(reserved_asids, cpu) == asid)
-			return 1;
-	return 0;
+	bool hit = false;
+
+	/*
+	 * Iterate over the set of reserved ASIDs looking for a match.
+	 * If we find one, then we can update our mm to use newasid
+	 * (i.e. the same ASID in the current generation) but we can't
+	 * exit the loop early, since we need to ensure that all copies
+	 * of the old ASID are updated to reflect the mm. Failure to do
+	 * so could result in us missing the reserved ASID in a future
+	 * generation.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(reserved_asids, cpu) == asid) {
+			hit = true;
+			per_cpu(reserved_asids, cpu) = newasid;
+		}
+	}
+
+	return hit;
 }
 
 static u64 new_context(struct mm_struct *mm, unsigned int cpu)
@@ -92,12 +107,14 @@
 	u64 generation = atomic64_read(&asid_generation);
 
 	if (asid != 0) {
+		u64 newasid = generation | (asid & ~ASID_MASK);
+
 		/*
 		 * If our current ASID was active during a rollover, we
 		 * can continue to use it and this was just a false alarm.
 		 */
-		if (is_reserved_asid(asid))
-			return generation | (asid & ~ASID_MASK);
+		if (check_update_reserved_asid(asid, newasid))
+			return newasid;
 
 		/*
 		 * We had a valid ASID in a previous life, so try to re-use
@@ -105,7 +122,7 @@
 		 */
 		asid &= ~ASID_MASK;
 		if (!__test_and_set_bit(asid, asid_map))
-			goto bump_gen;
+			return newasid;
 	}
 
 	/*
@@ -129,10 +146,7 @@
 set_asid:
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
-
-bump_gen:
-	asid |= generation;
-	return asid;
+	return asid | generation;
 }
 
 void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 131a199..7963aa4 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/gfp.h>
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/genalloc.h>
@@ -28,9 +29,6 @@
 
 #include <asm/cacheflush.h>
 
-struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
 static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 				 bool coherent)
 {
@@ -515,13 +513,7 @@
 
 static int __init arm64_dma_init(void)
 {
-	int ret;
-
-	dma_ops = &swiotlb_dma_ops;
-
-	ret = atomic_pool_init();
-
-	return ret;
+	return atomic_pool_init();
 }
 arch_initcall(arm64_dma_init);
 
@@ -552,10 +544,14 @@
 {
 	bool coherent = is_device_dma_coherent(dev);
 	int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+	size_t iosize = size;
 	void *addr;
 
 	if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
 		return NULL;
+
+	size = PAGE_ALIGN(size);
+
 	/*
 	 * Some drivers rely on this, and we probably don't want the
 	 * possibility of stale kernel data being read by devices anyway.
@@ -566,7 +562,7 @@
 		struct page **pages;
 		pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
 
-		pages = iommu_dma_alloc(dev, size, gfp, ioprot,	handle,
+		pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
 					flush_page);
 		if (!pages)
 			return NULL;
@@ -574,7 +570,7 @@
 		addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
 					      __builtin_return_address(0));
 		if (!addr)
-			iommu_dma_free(dev, pages, size, handle);
+			iommu_dma_free(dev, pages, iosize, handle);
 	} else {
 		struct page *page;
 		/*
@@ -591,7 +587,7 @@
 		if (!addr)
 			return NULL;
 
-		*handle = iommu_dma_map_page(dev, page, 0, size, ioprot);
+		*handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
 		if (iommu_dma_mapping_error(dev, *handle)) {
 			if (coherent)
 				__free_pages(page, get_order(size));
@@ -606,6 +602,9 @@
 static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 			       dma_addr_t handle, struct dma_attrs *attrs)
 {
+	size_t iosize = size;
+
+	size = PAGE_ALIGN(size);
 	/*
 	 * @cpu_addr will be one of 3 things depending on how it was allocated:
 	 * - A remapped array of pages from iommu_dma_alloc(), for all
@@ -617,17 +616,17 @@
 	 * Hence how dodgy the below logic looks...
 	 */
 	if (__in_atomic_pool(cpu_addr, size)) {
-		iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+		iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
 		__free_from_pool(cpu_addr, size);
 	} else if (is_vmalloc_addr(cpu_addr)){
 		struct vm_struct *area = find_vm_area(cpu_addr);
 
 		if (WARN_ON(!area || !area->pages))
 			return;
-		iommu_dma_free(dev, area->pages, size, &handle);
+		iommu_dma_free(dev, area->pages, iosize, &handle);
 		dma_common_free_remap(cpu_addr, size, VM_USERMAP);
 	} else {
-		iommu_dma_unmap_page(dev, handle, size, 0, NULL);
+		iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
 		__free_pages(virt_to_page(cpu_addr), get_order(size));
 	}
 }
@@ -984,8 +983,8 @@
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			struct iommu_ops *iommu, bool coherent)
 {
-	if (!acpi_disabled && !dev->archdata.dma_ops)
-		dev->archdata.dma_ops = dma_ops;
+	if (!dev->archdata.dma_ops)
+		dev->archdata.dma_ops = &swiotlb_dma_ops;
 
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 19211c4..92ddac1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -393,16 +393,16 @@
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
-	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 8"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
-	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 12"			},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
-	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 17"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 18"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 19"			},
 	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
@@ -410,16 +410,16 @@
 	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
 	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
 	{ do_bad,		SIGBUS,  0,		"synchronous parity error"	},
-	{ do_bad,		SIGBUS,  0,		"asynchronous parity error"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 25"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 26"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 27"			},
-	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
-	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
-	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
-	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk)" },
 	{ do_bad,		SIGBUS,  0,		"unknown 32"			},
 	{ do_bad,		SIGBUS,  BUS_ADRALN,	"alignment fault"		},
-	{ do_bad,		SIGBUS,  0,		"debug event"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 34"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 35"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 36"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 37"			},
@@ -433,21 +433,21 @@
 	{ do_bad,		SIGBUS,  0,		"unknown 45"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 46"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 47"			},
-	{ do_bad,		SIGBUS,  0,		"unknown 48"			},
+	{ do_bad,		SIGBUS,  0,		"TLB conflict abort"		},
 	{ do_bad,		SIGBUS,  0,		"unknown 49"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 50"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 51"			},
 	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" },
-	{ do_bad,		SIGBUS,  0,		"unknown 53"			},
+	{ do_bad,		SIGBUS,  0,		"implementation fault (unsupported exclusive)" },
 	{ do_bad,		SIGBUS,  0,		"unknown 54"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 55"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 56"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 57"			},
-	{ do_bad,		SIGBUS,  0,		"implementation fault (coprocessor abort)" },
+	{ do_bad,		SIGBUS,  0,		"unknown 58" 			},
 	{ do_bad,		SIGBUS,  0,		"unknown 59"			},
 	{ do_bad,		SIGBUS,  0,		"unknown 60"			},
-	{ do_bad,		SIGBUS,  0,		"unknown 61"			},
-	{ do_bad,		SIGBUS,  0,		"unknown 62"			},
+	{ do_bad,		SIGBUS,  0,		"section domain fault"		},
+	{ do_bad,		SIGBUS,  0,		"page domain fault"		},
 	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
 };
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e3f563c..873e363 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -64,8 +64,12 @@
 
 static void __init *early_alloc(unsigned long sz)
 {
-	void *ptr = __va(memblock_alloc(sz, sz));
-	BUG_ON(!ptr);
+	phys_addr_t phys;
+	void *ptr;
+
+	phys = memblock_alloc(sz, sz);
+	BUG_ON(!phys);
+	ptr = __va(phys);
 	memset(ptr, 0, sz);
 	return ptr;
 }
@@ -81,55 +85,19 @@
 	do {
 		/*
 		 * Need to have the least restrictive permissions available
-		 * permissions will be fixed up later. Default the new page
-		 * range as contiguous ptes.
+		 * permissions will be fixed up later
 		 */
-		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
 		pfn++;
 	} while (pte++, i++, i < PTRS_PER_PTE);
 }
 
-/*
- * Given a PTE with the CONT bit set, determine where the CONT range
- * starts, and clear the entire range of PTE CONT bits.
- */
-static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
-{
-	int i;
-
-	pte -= CONT_RANGE_OFFSET(addr);
-	for (i = 0; i < CONT_PTES; i++) {
-		set_pte(pte, pte_mknoncont(*pte));
-		pte++;
-	}
-	flush_tlb_all();
-}
-
-/*
- * Given a range of PTEs set the pfn and provided page protection flags
- */
-static void __populate_init_pte(pte_t *pte, unsigned long addr,
-				unsigned long end, phys_addr_t phys,
-				pgprot_t prot)
-{
-	unsigned long pfn = __phys_to_pfn(phys);
-
-	do {
-		/* clear all the bits except the pfn, then apply the prot */
-		set_pte(pte, pfn_pte(pfn, prot));
-		pte++;
-		pfn++;
-		addr += PAGE_SIZE;
-	} while (addr != end);
-}
-
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, phys_addr_t phys,
+				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
 				  void *(*alloc)(unsigned long size))
 {
 	pte_t *pte;
-	unsigned long next;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
 		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -142,27 +110,9 @@
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		next = min(end, (addr + CONT_SIZE) & CONT_MASK);
-		if (((addr | next | phys) & ~CONT_MASK) == 0) {
-			/* a block of CONT_PTES  */
-			__populate_init_pte(pte, addr, next, phys,
-					    __pgprot(pgprot_val(prot) | PTE_CONT));
-		} else {
-			/*
-			 * If the range being split is already inside of a
-			 * contiguous range but this PTE isn't going to be
-			 * contiguous, then we want to unmark the adjacent
-			 * ranges, then update the portion of the range we
-			 * are interrested in.
-			 */
-			 clear_cont_pte_range(pte, addr);
-			 __populate_init_pte(pte, addr, next, phys, prot);
-		}
-
-		pte += (next - addr) >> PAGE_SHIFT;
-		phys += next - addr;
-		addr = next;
-	} while (addr != end);
+		set_pte(pte, pfn_pte(pfn, prot));
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
 static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -223,7 +173,8 @@
 				}
 			}
 		} else {
-			alloc_init_pte(pmd, addr, next, phys, prot, alloc);
+			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
+				       prot, alloc);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
@@ -362,8 +313,8 @@
 	 * for now. This will get more fine grained later once all memory
 	 * is mapped
 	 */
-	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+	unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
+	unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
 
 	if (end < kernel_x_start) {
 		create_mapping(start, __phys_to_virt(start),
@@ -451,18 +402,18 @@
 {
 #ifdef CONFIG_DEBUG_RODATA
 	/* now that we are actually fully mapped, make the start/end more fine grained */
-	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+	if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
 		unsigned long aligned_start = round_down(__pa(_stext),
-							SECTION_SIZE);
+							 SWAPPER_BLOCK_SIZE);
 
 		create_mapping(aligned_start, __phys_to_virt(aligned_start),
 				__pa(_stext) - aligned_start,
 				PAGE_KERNEL);
 	}
 
-	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+	if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
 		unsigned long aligned_end = round_up(__pa(__init_end),
-							SECTION_SIZE);
+							  SWAPPER_BLOCK_SIZE);
 		create_mapping(__pa(__init_end), (unsigned long)__init_end,
 				aligned_end - __pa(__init_end),
 				PAGE_KERNEL);
@@ -475,7 +426,7 @@
 {
 	create_mapping_late(__pa(_stext), (unsigned long)_stext,
 				(unsigned long)_etext - (unsigned long)_stext,
-				PAGE_KERNEL_EXEC | PTE_RDONLY);
+				PAGE_KERNEL_ROX);
 
 }
 #endif
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index cf3c7d4..b162ad7 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -50,7 +50,7 @@
 	[BPF_REG_8] = A64_R(21),
 	[BPF_REG_9] = A64_R(22),
 	/* read-only frame pointer to access stack */
-	[BPF_REG_FP] = A64_FP,
+	[BPF_REG_FP] = A64_R(25),
 	/* temporary register for internal BPF JIT */
 	[TMP_REG_1] = A64_R(23),
 	[TMP_REG_2] = A64_R(24),
@@ -139,6 +139,12 @@
 /* Stack must be multiples of 16B */
 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
 
+#define _STACK_SIZE \
+	(MAX_BPF_STACK \
+	 + 4 /* extra for skb_copy_bits buffer */)
+
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+
 static void build_prologue(struct jit_ctx *ctx)
 {
 	const u8 r6 = bpf2a64[BPF_REG_6];
@@ -150,10 +156,35 @@
 	const u8 rx = bpf2a64[BPF_REG_X];
 	const u8 tmp1 = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
-	int stack_size = MAX_BPF_STACK;
 
-	stack_size += 4; /* extra for skb_copy_bits buffer */
-	stack_size = STACK_ALIGN(stack_size);
+	/*
+	 * BPF prog stack layout
+	 *
+	 *                         high
+	 * original A64_SP =>   0:+-----+ BPF prologue
+	 *                        |FP/LR|
+	 * current A64_FP =>  -16:+-----+
+	 *                        | ... | callee saved registers
+	 *                        +-----+
+	 *                        |     | x25/x26
+	 * BPF fp register => -80:+-----+ <= (BPF_FP)
+	 *                        |     |
+	 *                        | ... | BPF prog stack
+	 *                        |     |
+	 *                        +-----+ <= (BPF_FP - MAX_BPF_STACK)
+	 *                        |RSVD | JIT scratchpad
+	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
+	 *                        |     |
+	 *                        | ... | Function call stack
+	 *                        |     |
+	 *                        +-----+
+	 *                          low
+	 *
+	 */
+
+	/* Save FP and LR registers to stay align with ARM64 AAPCS */
+	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
+	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
 
 	/* Save callee-saved register */
 	emit(A64_PUSH(r6, r7, A64_SP), ctx);
@@ -161,12 +192,15 @@
 	if (ctx->tmp_used)
 		emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx);
 
-	/* Set up BPF stack */
-	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+	/* Save fp (x25) and x26. SP requires 16 bytes alignment */
+	emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
 
-	/* Set up frame pointer */
+	/* Set up BPF prog stack base register (x25) */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
+	/* Set up function call stack */
+	emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+
 	/* Clear registers A and X */
 	emit_a64_mov_i64(ra, 0, ctx);
 	emit_a64_mov_i64(rx, 0, ctx);
@@ -182,13 +216,12 @@
 	const u8 fp = bpf2a64[BPF_REG_FP];
 	const u8 tmp1 = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
-	int stack_size = MAX_BPF_STACK;
-
-	stack_size += 4; /* extra for skb_copy_bits buffer */
-	stack_size = STACK_ALIGN(stack_size);
 
 	/* We're done with BPF stack */
-	emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
+	emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
+
+	/* Restore fs (x25) and x26 */
+	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
 
 	/* Restore callee-saved register */
 	if (ctx->tmp_used)
@@ -196,8 +229,8 @@
 	emit(A64_POP(r8, r9, A64_SP), ctx);
 	emit(A64_POP(r6, r7, A64_SP), ctx);
 
-	/* Restore frame pointer */
-	emit(A64_MOV(1, fp, A64_SP), ctx);
+	/* Restore FP/LR registers */
+	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
 
 	/* Set return value */
 	emit(A64_MOV(1, A64_R(0), r0), ctx);
@@ -557,7 +590,25 @@
 	case BPF_ST | BPF_MEM | BPF_H:
 	case BPF_ST | BPF_MEM | BPF_B:
 	case BPF_ST | BPF_MEM | BPF_DW:
-		goto notyet;
+		/* Load imm to a register then store it */
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp2, off, ctx);
+		emit_a64_mov_i(1, tmp, imm, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(A64_STR32(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_H:
+			emit(A64_STRH(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_B:
+			emit(A64_STRB(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_DW:
+			emit(A64_STR64(tmp, dst, tmp2), ctx);
+			break;
+		}
+		break;
 
 	/* STX: *(size *)(dst + off) = src */
 	case BPF_STX | BPF_MEM | BPF_W:
@@ -624,7 +675,7 @@
 			return -EINVAL;
 		}
 		emit_a64_mov_i64(r3, size, ctx);
-		emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
+		emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
 		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
@@ -758,7 +809,7 @@
 	if (bpf_jit_enable > 1)
 		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
 
-	bpf_flush_icache(ctx.image, ctx.image + ctx.idx);
+	bpf_flush_icache(header, ctx.image + ctx.idx);
 
 	set_memory_ro((unsigned long)header, header->pages);
 	prog->bpf_func = (void *)ctx.image;
diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c
index f7836c6..c32f767 100644
--- a/arch/m68k/coldfire/m54xx.c
+++ b/arch/m68k/coldfire/m54xx.c
@@ -98,7 +98,7 @@
 	memstart = PAGE_ALIGN(_ramstart);
 	min_low_pfn = PFN_DOWN(_rambase);
 	start_pfn = PFN_DOWN(memstart);
-	max_low_pfn = PFN_DOWN(_ramend);
+	max_pfn = max_low_pfn = PFN_DOWN(_ramend);
 	high_memory = (void *)_ramend;
 
 	m68k_virt_to_node_shift = fls(_ramend - _rambase - 1) - 6;
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 0793a7f..f9d96bf 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -4,7 +4,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define NR_syscalls		375
+#define NR_syscalls		376
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_OLD_STAT
diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h
index 5e6fae6..36cf129 100644
--- a/arch/m68k/include/uapi/asm/unistd.h
+++ b/arch/m68k/include/uapi/asm/unistd.h
@@ -380,5 +380,6 @@
 #define __NR_sendmmsg		372
 #define __NR_userfaultfd	373
 #define __NR_membarrier		374
+#define __NR_mlock2		375
 
 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 88c27d9..76b9113 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -238,11 +238,14 @@
 	 * Give all the memory to the bootmap allocator, tell it to put the
 	 * boot mem_map at the start of memory.
 	 */
+	min_low_pfn = PFN_DOWN(memory_start);
+	max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+
 	bootmap_size = init_bootmem_node(
 			NODE_DATA(0),
-			memory_start >> PAGE_SHIFT, /* map goes here */
-			PAGE_OFFSET >> PAGE_SHIFT,	/* 0 on coldfire */
-			memory_end >> PAGE_SHIFT);
+			min_low_pfn,		/* map goes here */
+			PFN_DOWN(PAGE_OFFSET),
+			max_pfn);
 	/*
 	 * Free the usable memory, we have to make sure we do not free
 	 * the bootmem bitmap so we then reserve it after freeing it :-)
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 5dd0e80..282cd90 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -395,3 +395,4 @@
 	.long sys_sendmmsg
 	.long sys_userfaultfd
 	.long sys_membarrier
+	.long sys_mlock2		/* 375 */
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index b958916..8f37fdd 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -250,7 +250,7 @@
 	high_memory = phys_to_virt(max_addr);
 
 	min_low_pfn = availmem >> PAGE_SHIFT;
-	max_low_pfn = max_addr >> PAGE_SHIFT;
+	max_pfn = max_low_pfn = max_addr >> PAGE_SHIFT;
 
 	for (i = 0; i < m68k_num_memory; i++) {
 		addr = m68k_memory[i].addr;
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index a8b942b..2a5f43a 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -118,13 +118,13 @@
 	memory_end = memory_end & PAGE_MASK;
 
 	start_page = __pa(memory_start) >> PAGE_SHIFT;
-	num_pages = __pa(memory_end) >> PAGE_SHIFT;
+	max_pfn = num_pages = __pa(memory_end) >> PAGE_SHIFT;
 
 	high_memory = (void *)memory_end;
 	availmem = memory_start;
 
 	m68k_setup_node(0);
-	availmem += init_bootmem_node(NODE_DATA(0), start_page, 0, num_pages);
+	availmem += init_bootmem(start_page, num_pages);
 	availmem = (availmem + (PAGE_SIZE-1)) & PAGE_MASK;
 
 	free_bootmem(__pa(availmem), memory_end - (availmem));
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 1ba2120..8755d61 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -216,9 +216,9 @@
 					   AR71XX_RESET_SIZE);
 	ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
 					 AR71XX_PLL_SIZE);
+	ath79_detect_sys_type();
 	ath79_ddr_ctrl_init();
 
-	ath79_detect_sys_type();
 	if (mips_machtype != ATH79_MACH_GENERIC_OF)
 		detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX);
 
@@ -281,3 +281,8 @@
 	     "Generic",
 	     "Generic AR71XX/AR724X/AR913X based board",
 	     ath79_generic_init);
+
+MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
+	     "DTB",
+	     "Generic AR71XX/AR724X/AR913X based board (DT)",
+	     NULL);
diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index fb7734e..13d0439 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi
@@ -107,7 +107,7 @@
 			miscintc: interrupt-controller@18060010 {
 				compatible = "qca,ar9132-misc-intc",
 					   "qca,ar7100-misc-intc";
-				reg = <0x18060010 0x4>;
+				reg = <0x18060010 0x8>;
 
 				interrupt-parent = <&cpuintc>;
 				interrupts = <6>;
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index ad1fccd..2046c02 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -200,8 +200,9 @@
 {
 	/* avoid <linux/mm.h> include hell */
 	extern unsigned long max_mapnr;
+	unsigned long pfn_offset = ARCH_PFN_OFFSET;
 
-	return pfn >= ARCH_PFN_OFFSET && pfn < max_mapnr;
+	return pfn >= pfn_offset && pfn < max_mapnr;
 }
 
 #elif defined(CONFIG_SPARSEMEM)
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d5fa3ea..41b1b09 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1581,7 +1581,7 @@
 
 	base = (inst >> 21) & 0x1f;
 	op_inst = (inst >> 16) & 0x1f;
-	offset = inst & 0xffff;
+	offset = (int16_t)inst;
 	cache = (inst >> 16) & 0x3;
 	op = (inst >> 18) & 0x7;
 
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 7bab3a4..7e22108 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -157,9 +157,11 @@
 
 FEXPORT(__kvm_mips_load_asid)
 	/* Set the ASID for the Guest Kernel */
-	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
-			        /* addresses shift to 0x80000000 */
-	bltz	t0, 1f		/* If kernel */
+	PTR_L	t0, VCPU_COP0(k1)
+	LONG_L	t0, COP0_STATUS(t0)
+	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
+	xori	t0, KSU_USER
+	bnez	t0, 1f		/* If kernel */
 	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
 	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
@@ -474,9 +476,11 @@
 	mtc0	t0, CP0_EPC
 
 	/* Set the ASID for the Guest Kernel */
-	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
-				/* addresses shift to 0x80000000 */
-	bltz	t0, 1f		/* If kernel */
+	PTR_L	t0, VCPU_COP0(k1)
+	LONG_L	t0, COP0_STATUS(t0)
+	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
+	xori	t0, KSU_USER
+	bnez	t0, 1f		/* If kernel */
 	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
 	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 49ff3bf..b9b803f 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -279,7 +279,7 @@
 
 	if (!gebase) {
 		err = -ENOMEM;
-		goto out_free_cpu;
+		goto out_uninit_cpu;
 	}
 	kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
 		  ALIGN(size, PAGE_SIZE), gebase);
@@ -343,6 +343,9 @@
 out_free_gebase:
 	kfree(gebase);
 
+out_uninit_cpu:
+	kvm_vcpu_uninit(vcpu);
+
 out_free_cpu:
 	kfree(vcpu);
 
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c
index 8a97802..dbbeccc 100644
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -11,6 +11,7 @@
  *  by the Free Software Foundation.
  */
 
+#include <linux/delay.h>
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/io.h>
@@ -232,8 +233,7 @@
 	ioport_resource.end = RT2880_PCI_IO_BASE + RT2880_PCI_IO_SIZE - 1;
 
 	rt2880_pci_reg_write(0, RT2880_PCI_REG_PCICFG_ADDR);
-	for (i = 0; i < 0xfffff; i++)
-		;
+	udelay(1);
 
 	rt2880_pci_reg_write(0x79, RT2880_PCI_REG_ARBCTL);
 	rt2880_pci_reg_write(0x07FF0001, RT2880_PCI_REG_BAR0SETUP_ADDR);
diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c
index 4f925e0..78b2ef4 100644
--- a/arch/mips/pmcs-msp71xx/msp_setup.c
+++ b/arch/mips/pmcs-msp71xx/msp_setup.c
@@ -10,6 +10,8 @@
  * option) any later version.
  */
 
+#include <linux/delay.h>
+
 #include <asm/bootinfo.h>
 #include <asm/cacheflush.h>
 #include <asm/idle.h>
@@ -77,7 +79,7 @@
 	 */
 
 	/* Wait a bit for the DDRC to settle */
-	for (i = 0; i < 100000000; i++);
+	mdelay(125);
 
 #if defined(CONFIG_PMC_MSP7120_GW)
 	/*
diff --git a/arch/mips/sni/reset.c b/arch/mips/sni/reset.c
index 244f942..db8f88b 100644
--- a/arch/mips/sni/reset.c
+++ b/arch/mips/sni/reset.c
@@ -3,6 +3,8 @@
  *
  *  Reset a SNI machine.
  */
+#include <linux/delay.h>
+
 #include <asm/io.h>
 #include <asm/reboot.h>
 #include <asm/sni.h>
@@ -32,9 +34,9 @@
 	for (;;) {
 		for (i = 0; i < 100; i++) {
 			kb_wait();
-			for (j = 0; j < 100000 ; j++)
-				/* nothing */;
+			udelay(50);
 			outb_p(0xfe, 0x64);	 /* pulse reset low */
+			udelay(50);
 		}
 	}
 }
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 4434b54..78ae555 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -1,6 +1,7 @@
 config MN10300
 	def_bool y
 	select HAVE_OPROFILE
+	select HAVE_UID16
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_ARCH_TRACEHOOK
@@ -37,9 +38,6 @@
 config NUMA
 	def_bool n
 
-config UID16
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 223cdcc..87bf88e 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -23,22 +23,6 @@
 	end += (cpuinfo.dcache_line_size - 1);
 	end &= ~(cpuinfo.dcache_line_size - 1);
 
-	for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
-		__asm__ __volatile__ ("   flushda 0(%0)\n"
-					: /* Outputs */
-					: /* Inputs  */ "r"(addr)
-					/* : No clobber */);
-	}
-}
-
-static void __flush_dcache_all(unsigned long start, unsigned long end)
-{
-	unsigned long addr;
-
-	start &= ~(cpuinfo.dcache_line_size - 1);
-	end += (cpuinfo.dcache_line_size - 1);
-	end &= ~(cpuinfo.dcache_line_size - 1);
-
 	if (end > start + cpuinfo.dcache_size)
 		end = start + cpuinfo.dcache_size;
 
@@ -112,7 +96,7 @@
 
 void flush_cache_all(void)
 {
-	__flush_dcache_all(0, cpuinfo.dcache_size);
+	__flush_dcache(0, cpuinfo.dcache_size);
 	__flush_icache(0, cpuinfo.icache_size);
 }
 
@@ -182,7 +166,7 @@
 	 */
 	unsigned long start = (unsigned long)page_address(page);
 
-	__flush_dcache_all(start, start + PAGE_SIZE);
+	__flush_dcache(start, start + PAGE_SIZE);
 }
 
 void flush_dcache_page(struct page *page)
@@ -268,7 +252,7 @@
 {
 	flush_cache_page(vma, user_vaddr, page_to_pfn(page));
 	memcpy(dst, src, len);
-	__flush_dcache_all((unsigned long)src, (unsigned long)src + len);
+	__flush_dcache((unsigned long)src, (unsigned long)src + len);
 	if (vma->vm_flags & VM_EXEC)
 		__flush_icache((unsigned long)src, (unsigned long)src + len);
 }
@@ -279,7 +263,7 @@
 {
 	flush_cache_page(vma, user_vaddr, page_to_pfn(page));
 	memcpy(dst, src, len);
-	__flush_dcache_all((unsigned long)dst, (unsigned long)dst + len);
+	__flush_dcache((unsigned long)dst, (unsigned long)dst + len);
 	if (vma->vm_flags & VM_EXEC)
 		__flush_icache((unsigned long)dst, (unsigned long)dst + len);
 }
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c365469..729f891 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -108,6 +108,9 @@
 	default 3 if 64BIT && PARISC_PAGE_SIZE_4KB
 	default 2
 
+config SYS_SUPPORTS_HUGETLBFS
+	def_bool y if PA20
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h
new file mode 100644
index 0000000..7d56a9c
--- /dev/null
+++ b/arch/parisc/include/asm/hugetlb.h
@@ -0,0 +1,85 @@
+#ifndef _ASM_PARISC64_HUGETLB_H
+#define _ASM_PARISC64_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+					  unsigned long addr, unsigned long end,
+					  unsigned long floor,
+					  unsigned long ceiling)
+{
+	free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	pte_t old_pte = *ptep;
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	int changed = !pte_same(*ptep, pte);
+	if (changed) {
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+		flush_tlb_page(vma, addr);
+	}
+	return changed;
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_PARISC64_HUGETLB_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index 60d5d17..80e742a 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -145,11 +145,22 @@
 #endif /* CONFIG_DISCONTIGMEM */
 
 #ifdef CONFIG_HUGETLB_PAGE
-#define HPAGE_SHIFT		22	/* 4MB (is this fixed?) */
+#define HPAGE_SHIFT		PMD_SHIFT /* fixed for transparent huge pages */
 #define HPAGE_SIZE      	((1UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define REAL_HPAGE_SHIFT	20 /* 20 = 1MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_1M
+#elif !defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB)
+# define REAL_HPAGE_SHIFT	22 /* 22 = 4MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4M
+#else
+# define REAL_HPAGE_SHIFT	24 /* 24 = 16MB */
+# define _HUGE_PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16M
 #endif
+#endif /* CONFIG_HUGETLB_PAGE */
 
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 3edbb9f..f2fd327 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -35,7 +35,7 @@
 				        PxD_FLAG_VALID | 
 					PxD_FLAG_ATTACHED) 
 			+ (__u32)(__pa((unsigned long)pgd) >> PxD_VALUE_SHIFT));
-		/* The first pmd entry also is marked with _PAGE_GATEWAY as
+		/* The first pmd entry also is marked with PxD_FLAG_ATTACHED as
 		 * a signal that this pmd may not be freed */
 		__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
 #endif
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index f93c4a4..d8534f9 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -83,7 +83,11 @@
 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e))
 
 /* This is the size of the initially mapped kernel memory */
-#define KERNEL_INITIAL_ORDER	24	/* 0 to 1<<24 = 16MB */
+#ifdef CONFIG_64BIT
+#define KERNEL_INITIAL_ORDER	25	/* 1<<25 = 32MB */
+#else
+#define KERNEL_INITIAL_ORDER	24	/* 1<<24 = 16MB */
+#endif
 #define KERNEL_INITIAL_SIZE	(1 << KERNEL_INITIAL_ORDER)
 
 #if CONFIG_PGTABLE_LEVELS == 3
@@ -167,7 +171,7 @@
 #define _PAGE_NO_CACHE_BIT 24   /* (0x080) Uncached Page (U bit) */
 #define _PAGE_ACCESSED_BIT 23   /* (0x100) Software: Page Accessed */
 #define _PAGE_PRESENT_BIT  22   /* (0x200) Software: translation valid */
-/* bit 21 was formerly the FLUSH bit but is now unused */
+#define _PAGE_HPAGE_BIT    21   /* (0x400) Software: Huge Page */
 #define _PAGE_USER_BIT     20   /* (0x800) Software: User accessible page */
 
 /* N.B. The bits are defined in terms of a 32 bit word above, so the */
@@ -194,6 +198,7 @@
 #define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT))
 #define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
 #define _PAGE_PRESENT  (1 << xlate_pabit(_PAGE_PRESENT_BIT))
+#define _PAGE_HUGE     (1 << xlate_pabit(_PAGE_HPAGE_BIT))
 #define _PAGE_USER     (1 << xlate_pabit(_PAGE_USER_BIT))
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_ACCESSED)
@@ -217,7 +222,7 @@
 #define PxD_FLAG_VALID    (1 << xlate_pabit(_PxD_VALID_BIT))
 #define PxD_FLAG_MASK     (0xf)
 #define PxD_FLAG_SHIFT    (4)
-#define PxD_VALUE_SHIFT   (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */
+#define PxD_VALUE_SHIFT   (PFN_PTE_SHIFT-PxD_FLAG_SHIFT)
 
 #ifndef __ASSEMBLY__
 
@@ -363,6 +368,18 @@
 static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
 
 /*
+ * Huge pte definitions.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+#define pte_huge(pte)           (pte_val(pte) & _PAGE_HUGE)
+#define pte_mkhuge(pte)         (__pte(pte_val(pte) | _PAGE_HUGE))
+#else
+#define pte_huge(pte)           (0)
+#define pte_mkhuge(pte)         (pte)
+#endif
+
+
+/*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
@@ -410,8 +427,9 @@
 /* Find an entry in the second-level page table.. */
 
 #if CONFIG_PGTABLE_LEVELS == 3
+#define pmd_index(addr)         (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 #define pmd_offset(dir,address) \
-((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1)))
+((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(address))
 #else
 #define pmd_offset(dir,addr) ((pmd_t *) dir)
 #endif
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 54adb60..7e759ec 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -192,33 +192,6 @@
  */
 typedef unsigned int elf_caddr_t;
 
-#define start_thread_som(regs, new_pc, new_sp) do {	\
-	unsigned long *sp = (unsigned long *)new_sp;	\
-	__u32 spaceid = (__u32)current->mm->context;	\
-	unsigned long pc = (unsigned long)new_pc;	\
-	/* offset pc for priv. level */			\
-	pc |= 3;					\
-							\
-	regs->iasq[0] = spaceid;			\
-	regs->iasq[1] = spaceid;			\
-	regs->iaoq[0] = pc;				\
-	regs->iaoq[1] = pc + 4;                         \
-	regs->sr[2] = LINUX_GATEWAY_SPACE;              \
-	regs->sr[3] = 0xffff;				\
-	regs->sr[4] = spaceid;				\
-	regs->sr[5] = spaceid;				\
-	regs->sr[6] = spaceid;				\
-	regs->sr[7] = spaceid;				\
-	regs->gr[ 0] = USER_PSW;                        \
-	regs->gr[30] = ((new_sp)+63)&~63;		\
-	regs->gr[31] = pc;				\
-							\
-	get_user(regs->gr[26],&sp[0]);			\
-	get_user(regs->gr[25],&sp[-1]); 		\
-	get_user(regs->gr[24],&sp[-2]); 		\
-	get_user(regs->gr[23],&sp[-3]); 		\
-} while(0)
-
 /* The ELF abi wants things done a "wee bit" differently than
  * som does.  Supporting this behavior here avoids
  * having our own version of create_elf_tables.
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index ecc3ae1..dd4d187 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,16 +49,6 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
-/* The range 12-64 is reserved for page size specification. */
-#define MADV_4K_PAGES   12              /* Use 4K pages  */
-#define MADV_16K_PAGES  14              /* Use 16K pages */
-#define MADV_64K_PAGES  16              /* Use 64K pages */
-#define MADV_256K_PAGES 18              /* Use 256K pages */
-#define MADV_1M_PAGES   20              /* Use 1 Megabyte pages */
-#define MADV_4M_PAGES   22              /* Use 4 Megabyte pages */
-#define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
-#define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
-
 #define MADV_MERGEABLE   65		/* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
 
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 59001ce..d2f6257 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -290,6 +290,14 @@
 	DEFINE(ASM_PFN_PTE_SHIFT, PFN_PTE_SHIFT);
 	DEFINE(ASM_PT_INITIAL, PT_INITIAL);
 	BLANK();
+	/* HUGEPAGE_SIZE is only used in vmlinux.lds.S to align kernel text
+	 * and kernel data on physical huge pages */
+#ifdef CONFIG_HUGETLB_PAGE
+	DEFINE(HUGEPAGE_SIZE, 1UL << REAL_HPAGE_SHIFT);
+#else
+	DEFINE(HUGEPAGE_SIZE, PAGE_SIZE);
+#endif
+	BLANK();
 	DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip));
 	DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space));
 	DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr));
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index c5ef408..623496c 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -502,21 +502,38 @@
 	STREG		\pte,0(\ptp)
 	.endm
 
+	/* We have (depending on the page size):
+	 * - 38 to 52-bit Physical Page Number
+	 * - 12 to 26-bit page offset
+	 */
 	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
 	 * to a CPU TLB 4k PFN (4k => 12 bits to shift) */
-	#define PAGE_ADD_SHIFT  (PAGE_SHIFT-12)
+	#define PAGE_ADD_SHIFT		(PAGE_SHIFT-12)
+	#define PAGE_ADD_HUGE_SHIFT	(REAL_HPAGE_SHIFT-12)
 
 	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-	.macro		convert_for_tlb_insert20 pte
+	.macro		convert_for_tlb_insert20 pte,tmp
+#ifdef CONFIG_HUGETLB_PAGE
+	copy		\pte,\tmp
+	extrd,u		\tmp,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
+				64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
+
+	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,\
+				(63-58)+PAGE_ADD_SHIFT,\pte
+	extrd,u,*=	\tmp,_PAGE_HPAGE_BIT+32,1,%r0
+	depdi		_HUGE_PAGE_SIZE_ENCODING_DEFAULT,63,\
+				(63-58)+PAGE_ADD_HUGE_SHIFT,\pte
+#else /* Huge pages disabled */
 	extrd,u		\pte,(63-ASM_PFN_PTE_SHIFT)+(63-58)+PAGE_ADD_SHIFT,\
 				64-PAGE_SHIFT-PAGE_ADD_SHIFT,\pte
 	depdi		_PAGE_SIZE_ENCODING_DEFAULT,63,\
 				(63-58)+PAGE_ADD_SHIFT,\pte
+#endif
 	.endm
 
 	/* Convert the pte and prot to tlb insertion values.  How
 	 * this happens is quite subtle, read below */
-	.macro		make_insert_tlb	spc,pte,prot
+	.macro		make_insert_tlb	spc,pte,prot,tmp
 	space_to_prot   \spc \prot        /* create prot id from space */
 	/* The following is the real subtlety.  This is depositing
 	 * T <-> _PAGE_REFTRAP
@@ -553,7 +570,7 @@
 	depdi		1,12,1,\prot
 
 	/* Drop prot bits and convert to page addr for iitlbt and idtlbt */
-	convert_for_tlb_insert20 \pte
+	convert_for_tlb_insert20 \pte \tmp
 	.endm
 
 	/* Identical macro to make_insert_tlb above, except it
@@ -646,17 +663,12 @@
 
 
 	/*
-	 * Align fault_vector_20 on 4K boundary so that both
-	 * fault_vector_11 and fault_vector_20 are on the
-	 * same page. This is only necessary as long as we
-	 * write protect the kernel text, which we may stop
-	 * doing once we use large page translations to cover
-	 * the static part of the kernel address space.
+	 * Fault_vectors are architecturally required to be aligned on a 2K
+	 * boundary
 	 */
 
 	.text
-
-	.align 4096
+	.align 2048
 
 ENTRY(fault_vector_20)
 	/* First vector is invalid (0) */
@@ -1147,7 +1159,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 	
 	idtlbt          pte,prot
 
@@ -1173,7 +1185,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	idtlbt          pte,prot
 
@@ -1267,7 +1279,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	f_extend	pte,t1
 
@@ -1295,7 +1307,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	f_extend	pte,t1
 	
@@ -1404,7 +1416,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 	
 	iitlbt          pte,prot
 
@@ -1428,7 +1440,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	iitlbt          pte,prot
 
@@ -1514,7 +1526,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	f_extend	pte,t1
 
@@ -1534,7 +1546,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
 	update_accessed	ptp,pte,t0,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	f_extend	pte,t1
 
@@ -1566,7 +1578,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
 	update_dirty	ptp,pte,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 		
 	idtlbt          pte,prot
 
@@ -1610,7 +1622,7 @@
 	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
 	update_dirty	ptp,pte,t1
 
-	make_insert_tlb	spc,pte,prot
+	make_insert_tlb	spc,pte,prot,t1
 
 	f_extend	pte,t1
 	
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index e7d6452..75aa0db 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -69,7 +69,7 @@
 	stw,ma          %arg2,4(%r1)
 	stw,ma          %arg3,4(%r1)
 
-	/* Initialize startup VM. Just map first 8/16 MB of memory */
+	/* Initialize startup VM. Just map first 16/32 MB of memory */
 	load32		PA(swapper_pg_dir),%r4
 	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
 	mtctl		%r4,%cr25	/* Initialize user root pointer */
@@ -107,7 +107,7 @@
 	/* Now initialize the PTEs themselves.  We use RWX for
 	 * everything ... it will get remapped correctly later */
 	ldo		0+_PAGE_KERNEL_RWX(%r0),%r3 /* Hardwired 0 phys addr start */
-	ldi		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
+	load32		(1<<(KERNEL_INITIAL_ORDER-PAGE_SHIFT)),%r11 /* PFN count */
 	load32		PA(pg0),%r1
 
 $pgt_fill_loop:
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 72a3c65..f7ea626 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -130,7 +130,16 @@
 	printk(KERN_INFO "The 32-bit Kernel has started...\n");
 #endif
 
-	printk(KERN_INFO "Default page size is %dKB.\n", (int)(PAGE_SIZE / 1024));
+	printk(KERN_INFO "Kernel default page size is %d KB. Huge pages ",
+		(int)(PAGE_SIZE / 1024));
+#ifdef CONFIG_HUGETLB_PAGE
+	printk(KERN_CONT "enabled with %d MB physical and %d MB virtual size",
+		 1 << (REAL_HPAGE_SHIFT - 20), 1 << (HPAGE_SHIFT - 20));
+#else
+	printk(KERN_CONT "disabled");
+#endif
+	printk(KERN_CONT ".\n");
+
 
 	pdc_console_init();
 
@@ -377,6 +386,7 @@
 void start_parisc(void)
 {
 	extern void start_kernel(void);
+	extern void early_trap_init(void);
 
 	int ret, cpunum;
 	struct pdc_coproc_cfg coproc_cfg;
@@ -397,6 +407,8 @@
 		panic("must have an fpu to boot linux");
 	}
 
+	early_trap_init(); /* initialize checksum of fault_vector */
+
 	start_kernel();
 	// not reached
 }
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 0b8d26d..3fbd725 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -369,7 +369,7 @@
 	ldo	-16(%r30),%r29			/* Reference param save area */
 #endif
 	ldo	TASK_REGS(%r1),%r26
-	bl	do_syscall_trace_exit,%r2
+	BL	do_syscall_trace_exit,%r2
 	STREG   %r28,TASK_PT_GR28(%r1)          /* save return value now */
 	ldo     -THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1      /* get task ptr */
 	LDREG	TI_TASK(%r1), %r1
@@ -390,7 +390,7 @@
 #ifdef CONFIG_64BIT
 	ldo	-16(%r30),%r29			/* Reference param save area */
 #endif
-	bl	do_syscall_trace_exit,%r2
+	BL	do_syscall_trace_exit,%r2
 	ldo	TASK_REGS(%r1),%r26
 
 	ldil	L%syscall_exit_rfi,%r1
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index b99b39f..553b098 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -807,7 +807,7 @@
 }
 
 
-int __init check_ivt(void *iva)
+void __init initialize_ivt(const void *iva)
 {
 	extern u32 os_hpmc_size;
 	extern const u32 os_hpmc[];
@@ -818,8 +818,8 @@
 	u32 *hpmcp;
 	u32 length;
 
-	if (strcmp((char *)iva, "cows can fly"))
-		return -1;
+	if (strcmp((const char *)iva, "cows can fly"))
+		panic("IVT invalid");
 
 	ivap = (u32 *)iva;
 
@@ -839,28 +839,23 @@
 	    check += ivap[i];
 
 	ivap[5] = -check;
-
-	return 0;
 }
 	
+
+/* early_trap_init() is called before we set up kernel mappings and
+ * write-protect the kernel */
+void  __init early_trap_init(void)
+{
+	extern const void fault_vector_20;
+
 #ifndef CONFIG_64BIT
-extern const void fault_vector_11;
+	extern const void fault_vector_11;
+	initialize_ivt(&fault_vector_11);
 #endif
-extern const void fault_vector_20;
+
+	initialize_ivt(&fault_vector_20);
+}
 
 void __init trap_init(void)
 {
-	void *iva;
-
-	if (boot_cpu_data.cpu_type >= pcxu)
-		iva = (void *) &fault_vector_20;
-	else
-#ifdef CONFIG_64BIT
-		panic("Can't boot 64-bit OS on PA1.1 processor!");
-#else
-		iva = (void *) &fault_vector_11;
-#endif
-
-	if (check_ivt(iva))
-		panic("IVT invalid");
 }
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 0dacc5c..308f290 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -60,7 +60,7 @@
 		EXIT_DATA
 	}
 	PERCPU_SECTION(8)
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(HUGEPAGE_SIZE);
 	__init_end = .;
 	/* freed after init ends here */
 
@@ -116,7 +116,7 @@
 	 * that we can properly leave these
 	 * as writable
 	 */
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(HUGEPAGE_SIZE);
 	data_start = .;
 
 	EXCEPTION_TABLE(8)
@@ -135,8 +135,11 @@
 	_edata = .;
 
 	/* BSS */
-	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 8)
+	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE)
 
+	/* bootmap is allocated in setup_bootmem() directly behind bss. */
+
+	. = ALIGN(HUGEPAGE_SIZE);
 	_end = . ;
 
 	STABS_DEBUG
diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile
index 758ceef..134393d 100644
--- a/arch/parisc/mm/Makefile
+++ b/arch/parisc/mm/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-y	 := init.o fault.o ioremap.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
new file mode 100644
index 0000000..f6fdc77
--- /dev/null
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -0,0 +1,161 @@
+/*
+ * PARISC64 Huge TLB page support.
+ *
+ * This parisc implementation is heavily based on the SPARC and x86 code.
+ *
+ * Copyright (C) 2015 Helge Deller <deller@gmx.de>
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+
+	if (addr)
+		addr = ALIGN(addr, huge_page_size(h));
+
+	/* we need to make sure the colouring is OK */
+	return arch_get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	/* We must align the address, because our caller will run
+	 * set_huge_pte_at() on whatever we return, which writes out
+	 * all of the sub-ptes for the hugepage range.  So we have
+	 * to give it the first such sub-pte.
+	 */
+	addr &= HPAGE_MASK;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (pud) {
+		pmd = pmd_alloc(mm, pud, addr);
+		if (pmd)
+			pte = pte_alloc_map(mm, NULL, pmd, addr);
+	}
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte = NULL;
+
+	addr &= HPAGE_MASK;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_none(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (!pud_none(*pud)) {
+			pmd = pmd_offset(pud, addr);
+			if (!pmd_none(*pmd))
+				pte = pte_offset_map(pmd, addr);
+		}
+	}
+	return pte;
+}
+
+/* Purge data and instruction TLB entries.  Must be called holding
+ * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+static inline void purge_tlb_entries_huge(struct mm_struct *mm, unsigned long addr)
+{
+	int i;
+
+	/* We may use multiple physical huge pages (e.g. 2x1 MB) to emulate
+	 * Linux standard huge pages (e.g. 2 MB) */
+	BUILD_BUG_ON(REAL_HPAGE_SHIFT > HPAGE_SHIFT);
+
+	addr &= HPAGE_MASK;
+	addr |= _HUGE_PAGE_SIZE_ENCODING_DEFAULT;
+
+	for (i = 0; i < (1 << (HPAGE_SHIFT-REAL_HPAGE_SHIFT)); i++) {
+		mtsp(mm->context, 1);
+		pdtlb(addr);
+		if (unlikely(split_tlb))
+			pitlb(addr);
+		addr += (1UL << REAL_HPAGE_SHIFT);
+	}
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
+{
+	unsigned long addr_start;
+	int i;
+
+	addr &= HPAGE_MASK;
+	addr_start = addr;
+
+	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
+		/* Directly write pte entry.  We could call set_pte_at(mm, addr, ptep, entry)
+		 * instead, but then we get double locking on pa_tlb_lock. */
+		*ptep = entry;
+		ptep++;
+
+		/* Drop the PAGE_SIZE/non-huge tlb entry */
+		purge_tlb_entries(mm, addr);
+
+		addr += PAGE_SIZE;
+		pte_val(entry) += PAGE_SIZE;
+	}
+
+	purge_tlb_entries_huge(mm, addr_start);
+}
+
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	pte_t entry;
+
+	entry = *ptep;
+	set_huge_pte_at(mm, addr, ptep, __pte(0));
+
+	return entry;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index c5fec48..1b366c4 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -409,15 +409,11 @@
 	unsigned long vaddr;
 	unsigned long ro_start;
 	unsigned long ro_end;
-	unsigned long fv_addr;
-	unsigned long gw_addr;
-	extern const unsigned long fault_vector_20;
-	extern void * const linux_gateway_page;
+	unsigned long kernel_end;
 
 	ro_start = __pa((unsigned long)_text);
 	ro_end   = __pa((unsigned long)&data_start);
-	fv_addr  = __pa((unsigned long)&fault_vector_20) & PAGE_MASK;
-	gw_addr  = __pa((unsigned long)&linux_gateway_page) & PAGE_MASK;
+	kernel_end  = __pa((unsigned long)&_end);
 
 	end_paddr = start_paddr + size;
 
@@ -475,24 +471,25 @@
 			for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) {
 				pte_t pte;
 
-				/*
-				 * Map the fault vector writable so we can
-				 * write the HPMC checksum.
-				 */
 				if (force)
 					pte =  __mk_pte(address, pgprot);
-				else if (parisc_text_address(vaddr) &&
-					 address != fv_addr)
+				else if (parisc_text_address(vaddr)) {
 					pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+					if (address >= ro_start && address < kernel_end)
+						pte = pte_mkhuge(pte);
+				}
 				else
 #if defined(CONFIG_PARISC_PAGE_SIZE_4KB)
-				if (address >= ro_start && address < ro_end
-							&& address != fv_addr
-							&& address != gw_addr)
-					pte = __mk_pte(address, PAGE_KERNEL_RO);
-				else
+				if (address >= ro_start && address < ro_end) {
+					pte = __mk_pte(address, PAGE_KERNEL_EXEC);
+					pte = pte_mkhuge(pte);
+				} else
 #endif
+				{
 					pte = __mk_pte(address, pgprot);
+					if (address >= ro_start && address < kernel_end)
+						pte = pte_mkhuge(pte);
+				}
 
 				if (address >= end_paddr) {
 					if (force)
@@ -536,15 +533,12 @@
 
 	/* force the kernel to see the new TLB entries */
 	__flush_tlb_range(0, init_begin, init_end);
-	/* Attempt to catch anyone trying to execute code here
-	 * by filling the page with BRK insns.
-	 */
-	memset((void *)init_begin, 0x00, init_end - init_begin);
+
 	/* finally dump all the instructions which were cached, since the
 	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
 	
-	free_initmem_default(-1);
+	free_initmem_default(POISON_FREE_INITMEM);
 
 	/* set up a new led state on systems shipped LED State panel */
 	pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE);
@@ -728,8 +722,8 @@
 		unsigned long size;
 
 		start_paddr = pmem_ranges[range].start_pfn << PAGE_SHIFT;
-		end_paddr = start_paddr + (pmem_ranges[range].pages << PAGE_SHIFT);
 		size = pmem_ranges[range].pages << PAGE_SHIFT;
+		end_paddr = start_paddr + size;
 
 		map_pages((unsigned long)__va(start_paddr), start_paddr,
 			  size, PAGE_KERNEL, 0);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a908ada..2220f7a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -108,6 +108,7 @@
 #define MSR_TS_T	__MASK(MSR_TS_T_LG)	/*  Transaction Transactional */
 #define MSR_TS_MASK	(MSR_TS_T | MSR_TS_S)   /* Transaction State bits */
 #define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
 #define MSR_TM_TRANSACTIONAL(x)	(((x) & MSR_TS_MASK) == MSR_TS_T)
 #define MSR_TM_SUSPENDED(x)	(((x) & MSR_TS_MASK) == MSR_TS_S)
 
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index c9e26cb..f2b0b1b 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -382,3 +382,4 @@
 SYSCALL(shmdt)
 SYSCALL(shmget)
 COMPAT_SYS(shmctl)
+SYSCALL(mlock2)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 6d8f802..4b6b8ac 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		378
+#define __NR_syscalls		379
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 81579e9..1effea5 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -400,5 +400,6 @@
 #define __NR_shmdt		375
 #define __NR_shmget		376
 #define __NR_shmctl		377
+#define __NR_mlock2		378
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75b6676..646bf4d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -551,6 +551,24 @@
 		msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
 	}
 
+	/*
+	 * Use the current MSR TM suspended bit to track if we have
+	 * checkpointed state outstanding.
+	 * On signal delivery, we'd normally reclaim the checkpointed
+	 * state to obtain stack pointer (see:get_tm_stackpointer()).
+	 * This will then directly return to userspace without going
+	 * through __switch_to(). However, if the stack frame is bad,
+	 * we need to exit this thread which calls __switch_to() which
+	 * will again attempt to reclaim the already saved tm state.
+	 * Hence we need to check that we've not already reclaimed
+	 * this state.
+	 * We do this using the current MSR, rather tracking it in
+	 * some specific thread_struct bit, as it has the additional
+	 * benifit of checking for a potential TM bad thing exception.
+	 */
+	if (!MSR_TM_SUSPENDED(mfmsr()))
+		return;
+
 	tm_reclaim(thr, thr->regs->msr, cause);
 
 	/* Having done the reclaim, we now have the checkpointed
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 0dbee46..ef7c24e 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -875,6 +875,15 @@
 		return 1;
 #endif /* CONFIG_SPE */
 
+	/* Get the top half of the MSR from the user context */
+	if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
+		return 1;
+	msr_hi <<= 32;
+	/* If TM bits are set to the reserved value, it's an invalid context */
+	if (MSR_TM_RESV(msr_hi))
+		return 1;
+	/* Pull in the MSR TM bits from the user context */
+	regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
 	/* Now, recheckpoint.  This loads up all of the checkpointed (older)
 	 * registers, including FP and V[S]Rs.  After recheckpointing, the
 	 * transactional versions should be loaded.
@@ -884,11 +893,6 @@
 	current->thread.tm_texasr |= TEXASR_FS;
 	/* This loads the checkpointed FP/VEC state, if used */
 	tm_recheckpoint(&current->thread, msr);
-	/* Get the top half of the MSR */
-	if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
-		return 1;
-	/* Pull in MSR TM from user context */
-	regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
 
 	/* This loads the speculative FP/VEC state, if used */
 	if (msr & MSR_FP) {
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 20756df..c676ece 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -438,6 +438,10 @@
 
 	/* get MSR separately, transfer the LE bit if doing signal return */
 	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	/* Don't allow reserved mode. */
+	if (MSR_TM_RESV(msr))
+		return -EINVAL;
+
 	/* pull in MSR TM from user context */
 	regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
 
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 0c5d8ee..d1e7b0a 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -312,6 +312,7 @@
 extern void reipl_ccw_dev(struct ccw_dev_id *id);
 
 struct cio_iplinfo {
+	u8 ssid;
 	u16 devno;
 	int is_qdio;
 };
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 3ad48f2..bab6739 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -206,9 +206,16 @@
 } while (0)
 #endif /* CONFIG_COMPAT */
 
-extern unsigned long mmap_rnd_mask;
-
-#define STACK_RND_MASK	(test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
+/*
+ * Cache aliasing on the latest machines calls for a mapping granularity
+ * of 512KB. For 64-bit processes use a 512KB alignment and a randomization
+ * of up to 1GB. For 31-bit processes the virtual address space is limited,
+ * use no alignment and limit the randomization to 8MB.
+ */
+#define BRK_RND_MASK	(is_32bit_task() ? 0x7ffUL : 0x3ffffUL)
+#define MMAP_RND_MASK	(is_32bit_task() ? 0x7ffUL : 0x3ff80UL)
+#define MMAP_ALIGN_MASK	(is_32bit_task() ? 0 : 0x7fUL)
+#define STACK_RND_MASK	MMAP_RND_MASK
 
 #define ARCH_DLINFO							    \
 do {									    \
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 39ae6a3..86634e7 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -64,7 +64,8 @@
 
 struct ipl_block_ccw {
 	u8  reserved1[84];
-	u8  reserved2[2];
+	u16 reserved2 : 13;
+	u8  ssid : 3;
 	u16 devno;
 	u8  vm_flags;
 	u8  reserved3[3];
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7a7abf1..1aac41e 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -195,5 +195,7 @@
 void dma_free_seg_table(unsigned long);
 unsigned long *dma_alloc_cpu_table(void);
 void dma_cleanup_tables(unsigned long *);
-void dma_update_cpu_trans(unsigned long *, void *, dma_addr_t, int);
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr);
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags);
+
 #endif
diff --git a/arch/s390/include/asm/trace/diag.h b/arch/s390/include/asm/trace/diag.h
index 776f307..cc6cfe7 100644
--- a/arch/s390/include/asm/trace/diag.h
+++ b/arch/s390/include/asm/trace/diag.h
@@ -19,7 +19,7 @@
 #define TRACE_INCLUDE_PATH asm/trace
 #define TRACE_INCLUDE_FILE diag
 
-TRACE_EVENT(diagnose,
+TRACE_EVENT(s390_diagnose,
 	TP_PROTO(unsigned short nr),
 	TP_ARGS(nr),
 	TP_STRUCT__entry(
@@ -32,9 +32,9 @@
 );
 
 #ifdef CONFIG_TRACEPOINTS
-void trace_diagnose_norecursion(int diag_nr);
+void trace_s390_diagnose_norecursion(int diag_nr);
 #else
-static inline void trace_diagnose_norecursion(int diag_nr) { }
+static inline void trace_s390_diagnose_norecursion(int diag_nr) { }
 #endif
 
 #endif /* _TRACE_S390_DIAG_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index a848adb..34ec202 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -192,14 +192,14 @@
 #define __NR_set_tid_address	252
 #define __NR_fadvise64		253
 #define __NR_timer_create	254
-#define __NR_timer_settime	(__NR_timer_create+1)
-#define __NR_timer_gettime	(__NR_timer_create+2)
-#define __NR_timer_getoverrun	(__NR_timer_create+3)
-#define __NR_timer_delete	(__NR_timer_create+4)
-#define __NR_clock_settime	(__NR_timer_create+5)
-#define __NR_clock_gettime	(__NR_timer_create+6)
-#define __NR_clock_getres	(__NR_timer_create+7)
-#define __NR_clock_nanosleep	(__NR_timer_create+8)
+#define __NR_timer_settime	255
+#define __NR_timer_gettime	256
+#define __NR_timer_getoverrun	257
+#define __NR_timer_delete	258
+#define __NR_clock_settime	259
+#define __NR_clock_gettime	260
+#define __NR_clock_getres	261
+#define __NR_clock_nanosleep	262
 /* Number 263 is reserved for vserver */
 #define __NR_statfs64		265
 #define __NR_fstatfs64		266
@@ -309,7 +309,8 @@
 #define __NR_recvfrom		371
 #define __NR_recvmsg		372
 #define __NR_shutdown		373
-#define NR_syscalls 374
+#define __NR_mlock2		374
+#define NR_syscalls 375
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 09f1940..fac4eed 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -176,3 +176,4 @@
 COMPAT_SYSCALL_WRAP3(getsockname, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, int __user *, usockaddr_len);
 COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
+COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index f98766e..48b37b8 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -121,14 +121,14 @@
 void diag_stat_inc(enum diag_stat_enum nr)
 {
 	this_cpu_inc(diag_stat.counter[nr]);
-	trace_diagnose(diag_map[nr].code);
+	trace_s390_diagnose(diag_map[nr].code);
 }
 EXPORT_SYMBOL(diag_stat_inc);
 
 void diag_stat_inc_norecursion(enum diag_stat_enum nr)
 {
 	this_cpu_inc(diag_stat.counter[nr]);
-	trace_diagnose_norecursion(diag_map[nr].code);
+	trace_s390_diagnose_norecursion(diag_map[nr].code);
 }
 EXPORT_SYMBOL(diag_stat_inc_norecursion);
 
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 1255c6c..301ee9c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -26,6 +26,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
+#include <asm/ptrace.h>
 
 #define ARCH_OFFSET	4
 
@@ -59,19 +60,6 @@
 	.long	0x020006e0,0x20000050
 
 	.org	0x200
-#
-# subroutine to set architecture mode
-#
-.Lsetmode:
-	mvi	__LC_AR_MODE_ID,1	# set esame flag
-	slr	%r0,%r0 		# set cpuid to zero
-	lhi	%r1,2			# mode 2 = esame (dump)
-	sigp	%r1,%r0,0x12		# switch to esame mode
-	bras	%r13,0f
-	.fill	16,4,0x0
-0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
-	sam31				# switch to 31 bit addressing mode
-	br	%r14
 
 #
 # subroutine to wait for end I/O
@@ -159,7 +147,14 @@
 	.long	0x02200050,0x00000000
 
 iplstart:
-	bas	%r14,.Lsetmode		# Immediately switch to 64 bit mode
+	mvi	__LC_AR_MODE_ID,1	# set esame flag
+	slr	%r0,%r0			# set cpuid to zero
+	lhi	%r1,2			# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12		# switch to esame mode
+	bras	%r13,0f
+	.fill	16,4,0x0
+0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
+	sam31				# switch to 31 bit addressing mode
 	lh	%r1,0xb8		# test if subchannel number
 	bct	%r1,.Lnoload		#  is valid
 	l	%r1,0xb8		# load ipl subchannel number
@@ -269,71 +264,6 @@
 .Lcpuid:.fill	8,1,0
 
 #
-# SALIPL loader support. Based on a patch by Rob van der Heij.
-# This entry point is called directly from the SALIPL loader and
-# doesn't need a builtin ipl record.
-#
-	.org	0x800
-ENTRY(start)
-	stm	%r0,%r15,0x07b0		# store registers
-	bas	%r14,.Lsetmode		# Immediately switch to 64 bit mode
-	basr	%r12,%r0
-.base:
-	l	%r11,.parm
-	l	%r8,.cmd		# pointer to command buffer
-
-	ltr	%r9,%r9			# do we have SALIPL parameters?
-	bp	.sk8x8
-
-	mvc	0(64,%r8),0x00b0	# copy saved registers
-	xc	64(240-64,%r8),0(%r8)	# remainder of buffer
-	tr	0(64,%r8),.lowcase
-	b	.gotr
-.sk8x8:
-	mvc	0(240,%r8),0(%r9)	# copy iplparms into buffer
-.gotr:
-	slr	%r0,%r0
-	st	%r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
-	st	%r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
-	j	startup 		# continue with startup
-.cmd:	.long	COMMAND_LINE		# address of command line buffer
-.parm:	.long	PARMAREA
-.lowcase:
-	.byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
-	.byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
-	.byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
-	.byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
-	.byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
-	.byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
-	.byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
-	.byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
-	.byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
-	.byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
-	.byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
-	.byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
-	.byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
-	.byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
-	.byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
-	.byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
-
-	.byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
-	.byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
-	.byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
-	.byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
-	.byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
-	.byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
-	.byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
-	.byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
-	.byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87	# .abcdefg
-	.byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf	# hi
-	.byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97	# .jklmnop
-	.byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf	# qr
-	.byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7	# ..stuvwx
-	.byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef	# yz
-	.byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
-	.byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
-
-#
 # startup-code at 0x10000, running in absolute addressing mode
 # this is called either by the ipl loader or directly by PSW restart
 # or linload or SALIPL
@@ -364,7 +294,7 @@
 	bras	%r13,0f
 	.fill	16,4,0x0
 0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
-	sam31				# switch to 31 bit addressing mode
+	sam64				# switch to 64 bit addressing mode
 	basr	%r13,0			# get base
 .LPG0:
 	xc	0x200(256),0x200	# partially clear lowcore
@@ -395,7 +325,7 @@
 	jnz	1b
 	j	4f
 2:	l	%r15,.Lstack-.LPG0(%r13)
-	ahi	%r15,-96
+	ahi	%r15,-STACK_FRAME_OVERHEAD
 	la	%r2,.Lals_string-.LPG0(%r13)
 	l	%r3,.Lsclp_print-.LPG0(%r13)
 	basr	%r14,%r3
@@ -429,8 +359,7 @@
 	.long 1, 0xc0000000
 #endif
 4:
-	/* Continue with 64bit startup code in head64.S */
-	sam64				# switch to 64 bit mode
+	/* Continue with startup code in head64.S */
 	jg	startup_continue
 
 	.align	8
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f6d8acd..b1f0a90 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,6 +121,7 @@
  * Must be in data section since the bss section
  * is not cleared when these are accessed.
  */
+static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
 static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
 u32 ipl_flags __attribute__((__section__(".data"))) = 0;
 
@@ -197,6 +198,33 @@
 	return snprintf(page, PAGE_SIZE, _format, ##args);		\
 }
 
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long ssid, devno;					\
+									\
+	if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2)		\
+		return -EINVAL;						\
+									\
+	if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL)		\
+		return -EINVAL;						\
+									\
+	_ipl_blk.ssid = ssid;						\
+	_ipl_blk.devno = devno;						\
+	return len;							\
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n",				\
+		 _ipl_blk.ssid, _ipl_blk.devno);			\
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk);			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, (S_IRUGO | S_IWUSR),				\
+	       sys_##_prefix##_##_name##_show,				\
+	       sys_##_prefix##_##_name##_store)				\
+
 #define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
 IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
@@ -395,7 +423,7 @@
 
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		return sprintf(page, "0.0.%04x\n", ipl_devno);
+		return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
@@ -687,21 +715,14 @@
 				       struct bin_attribute *attr,
 				       char *buf, loff_t off, size_t count)
 {
+	size_t scpdata_len = count;
 	size_t padding;
-	size_t scpdata_len;
 
-	if (off < 0)
+
+	if (off)
 		return -EINVAL;
 
-	if (off >= DIAG308_SCPDATA_SIZE)
-		return -ENOSPC;
-
-	if (count > DIAG308_SCPDATA_SIZE - off)
-		count = DIAG308_SCPDATA_SIZE - off;
-
-	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
-	scpdata_len = off + count;
-
+	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
 	if (scpdata_len % 8) {
 		padding = 8 - (scpdata_len % 8);
 		memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
@@ -717,7 +738,7 @@
 }
 static struct bin_attribute sys_reipl_fcp_scp_data_attr =
 	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
-		   reipl_fcp_scpdata_write, PAGE_SIZE);
+		   reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
 
 static struct bin_attribute *reipl_fcp_bin_attrs[] = {
 	&sys_reipl_fcp_scp_data_attr,
@@ -814,9 +835,7 @@
 };
 
 /* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
-	reipl_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
 
 /* NSS wrapper */
 static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -1056,8 +1075,8 @@
 
 	switch (reipl_method) {
 	case REIPL_METHOD_CCW_CIO:
+		devid.ssid  = reipl_block_ccw->ipl_info.ccw.ssid;
 		devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
-		devid.ssid  = 0;
 		reipl_ccw_dev(&devid);
 		break;
 	case REIPL_METHOD_CCW_VM:
@@ -1192,6 +1211,7 @@
 
 	reipl_block_ccw_init(reipl_block_ccw);
 	if (ipl_info.type == IPL_TYPE_CCW) {
+		reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
 		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
 		reipl_block_ccw_fill_parms(reipl_block_ccw);
 	}
@@ -1336,9 +1356,7 @@
 };
 
 /* CCW dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
-		   dump_block_ccw->ipl_info.ccw.devno);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
 
 static struct attribute *dump_ccw_attrs[] = {
 	&sys_dump_ccw_device_attr.attr,
@@ -1418,8 +1436,8 @@
 
 	switch (dump_method) {
 	case DUMP_METHOD_CCW_CIO:
+		devid.ssid  = dump_block_ccw->ipl_info.ccw.ssid;
 		devid.devno = dump_block_ccw->ipl_info.ccw.devno;
-		devid.ssid  = 0;
 		reipl_ccw_dev(&devid);
 		break;
 	case DUMP_METHOD_CCW_VM:
@@ -1939,14 +1957,14 @@
 	ipl_info.type = get_ipl_type();
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
+		ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
 		ipl_info.data.ccw.dev_id.devno = ipl_devno;
-		ipl_info.data.ccw.dev_id.ssid = 0;
 		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
+		ipl_info.data.fcp.dev_id.ssid = 0;
 		ipl_info.data.fcp.dev_id.devno =
 			IPL_PARMBLOCK_START->ipl_info.fcp.devno;
-		ipl_info.data.fcp.dev_id.ssid = 0;
 		ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
 		ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
 		break;
@@ -1978,6 +1996,7 @@
 	if (cio_get_iplinfo(&iplinfo))
 		return;
 
+	ipl_ssid = iplinfo.ssid;
 	ipl_devno = iplinfo.devno;
 	ipl_flags |= IPL_DEVNO_VALID;
 	if (!iplinfo.is_qdio)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 688a3aa..114ee8b 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -243,11 +243,7 @@
 
 static inline unsigned long brk_rnd(void)
 {
-	/* 8MB for 32bit, 1GB for 64bit */
-	if (is_32bit_task())
-		return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
-	else
-		return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+	return (get_random_int() & BRK_RND_MASK) << PAGE_SHIFT;
 }
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
index fa0bdff..9fe7781 100644
--- a/arch/s390/kernel/sclp.c
+++ b/arch/s390/kernel/sclp.c
@@ -21,7 +21,7 @@
 	__ctl_load(cr0_new, 0, 0);
 
 	psw_ext_save = S390_lowcore.external_new_psw;
-	psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA);
+	psw_mask = __extract_psw();
 	S390_lowcore.external_new_psw.mask = psw_mask;
 	psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT;
 	S390_lowcore.ext_int_code = 0;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ce0cbd6..c837bca 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -764,9 +764,6 @@
 	get_cpu_id(&cpu_id);
 	add_device_randomness(&cpu_id, sizeof(cpu_id));
 	switch (cpu_id.machine) {
-	case 0x9672:
-		strcpy(elf_platform, "g5");
-		break;
 	case 0x2064:
 	case 0x2066:
 	default:	/* Use "z900" as default for 64 bit kernels. */
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 8c56929..5378c3e 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -382,3 +382,4 @@
 SYSCALL(sys_recvfrom,compat_sys_recvfrom)
 SYSCALL(sys_recvmsg,compat_sys_recvmsg)
 SYSCALL(sys_shutdown,sys_shutdown)
+SYSCALL(sys_mlock2,compat_sys_mlock2)
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
index 73239bb..21a5df9 100644
--- a/arch/s390/kernel/trace.c
+++ b/arch/s390/kernel/trace.c
@@ -9,11 +9,11 @@
 #define CREATE_TRACE_POINTS
 #include <asm/trace/diag.h>
 
-EXPORT_TRACEPOINT_SYMBOL(diagnose);
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
 
 static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
 
-void trace_diagnose_norecursion(int diag_nr)
+void trace_s390_diagnose_norecursion(int diag_nr)
 {
 	unsigned long flags;
 	unsigned int *depth;
@@ -22,7 +22,7 @@
 	depth = this_cpu_ptr(&diagnose_trace_depth);
 	if (*depth == 0) {
 		(*depth)++;
-		trace_diagnose(diag_nr);
+		trace_s390_diagnose(diag_nr);
 		(*depth)--;
 	}
 	local_irq_restore(flags);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 373e323..6a75352 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1030,8 +1030,7 @@
 				   src_id, 0);
 
 	/* sending vcpu invalid */
-	if (src_id >= KVM_MAX_VCPUS ||
-	    kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+	if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
 		return -EINVAL;
 
 	if (sclp.has_sigpif)
@@ -1110,6 +1109,10 @@
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 				   irq->u.emerg.code, 0);
 
+	/* sending vcpu invalid */
+	if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+		return -EINVAL;
+
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
 	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8fe2f1c..8465892 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -342,12 +342,16 @@
 		r = 0;
 		break;
 	case KVM_CAP_S390_VECTOR_REGISTERS:
-		if (MACHINE_HAS_VX) {
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus)) {
+			r = -EBUSY;
+		} else if (MACHINE_HAS_VX) {
 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
 			set_kvm_facility(kvm->arch.model.fac->list, 129);
 			r = 0;
 		} else
 			r = -EINVAL;
+		mutex_unlock(&kvm->lock);
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 			 r ? "(not available)" : "(success)");
 		break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 77191b8..d76b51c 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -660,7 +660,7 @@
 
 	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
 
-	if (!MACHINE_HAS_PFMF)
+	if (!test_kvm_facility(vcpu->kvm, 8))
 		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index da690b6..77c22d6 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -291,12 +291,8 @@
 			   u16 cpu_addr, u32 parameter, u64 *status_reg)
 {
 	int rc;
-	struct kvm_vcpu *dst_vcpu;
+	struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
 	if (!dst_vcpu)
 		return SIGP_CC_NOT_OPERATIONAL;
 
@@ -478,7 +474,7 @@
 	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
 	if (order_code == SIGP_EXTERNAL_CALL) {
-		dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
 		BUG_ON(dest_vcpu == NULL);
 
 		kvm_s390_vcpu_wakeup(dest_vcpu);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c3c07d3..c722400 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -48,37 +48,13 @@
 
 static void __init setup_zero_pages(void)
 {
-	struct cpuid cpu_id;
 	unsigned int order;
 	struct page *page;
 	int i;
 
-	get_cpu_id(&cpu_id);
-	switch (cpu_id.machine) {
-	case 0x9672:	/* g5 */
-	case 0x2064:	/* z900 */
-	case 0x2066:	/* z900 */
-	case 0x2084:	/* z990 */
-	case 0x2086:	/* z990 */
-	case 0x2094:	/* z9-109 */
-	case 0x2096:	/* z9-109 */
-		order = 0;
-		break;
-	case 0x2097:	/* z10 */
-	case 0x2098:	/* z10 */
-	case 0x2817:	/* z196 */
-	case 0x2818:	/* z196 */
-		order = 2;
-		break;
-	case 0x2827:	/* zEC12 */
-	case 0x2828:	/* zEC12 */
-		order = 5;
-		break;
-	case 0x2964:	/* z13 */
-	default:
-		order = 7;
-		break;
-	}
+	/* Latest machines require a mapping granularity of 512KB */
+	order = 7;
+
 	/* Limit number of empty zero pages for small memory sizes */
 	while (order > 2 && (totalram_pages >> 10) < (1UL << order))
 		order--;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 6e552af..ea01477 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -31,9 +31,6 @@
 #include <linux/security.h>
 #include <asm/pgalloc.h>
 
-unsigned long mmap_rnd_mask;
-static unsigned long mmap_align_mask;
-
 static unsigned long stack_maxrandom_size(void)
 {
 	if (!(current->flags & PF_RANDOMIZE))
@@ -62,10 +59,7 @@
 
 unsigned long arch_mmap_rnd(void)
 {
-	if (is_32bit_task())
-		return (get_random_int() & 0x7ff) << PAGE_SHIFT;
-	else
-		return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+	return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
 }
 
 static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -92,7 +86,6 @@
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
-	int do_color_align;
 
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
@@ -108,15 +101,14 @@
 			return addr;
 	}
 
-	do_color_align = 0;
-	if (filp || (flags & MAP_SHARED))
-		do_color_align = !is_32bit_task();
-
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_base;
 	info.high_limit = TASK_SIZE;
-	info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+	if (filp || (flags & MAP_SHARED))
+		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+	else
+		info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	return vm_unmapped_area(&info);
 }
@@ -130,7 +122,6 @@
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
-	int do_color_align;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -148,15 +139,14 @@
 			return addr;
 	}
 
-	do_color_align = 0;
-	if (filp || (flags & MAP_SHARED))
-		do_color_align = !is_32bit_task();
-
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
 	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
 	info.high_limit = mm->mmap_base;
-	info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+	if (filp || (flags & MAP_SHARED))
+		info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+	else
+		info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	addr = vm_unmapped_area(&info);
 
@@ -254,35 +244,3 @@
 		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
 	}
 }
-
-static int __init setup_mmap_rnd(void)
-{
-	struct cpuid cpu_id;
-
-	get_cpu_id(&cpu_id);
-	switch (cpu_id.machine) {
-	case 0x9672:
-	case 0x2064:
-	case 0x2066:
-	case 0x2084:
-	case 0x2086:
-	case 0x2094:
-	case 0x2096:
-	case 0x2097:
-	case 0x2098:
-	case 0x2817:
-	case 0x2818:
-	case 0x2827:
-	case 0x2828:
-		mmap_rnd_mask = 0x7ffUL;
-		mmap_align_mask = 0UL;
-		break;
-	case 0x2964:	/* z13 */
-	default:
-		mmap_rnd_mask = 0x3ff80UL;
-		mmap_align_mask = 0x7fUL;
-		break;
-	}
-	return 0;
-}
-early_initcall(setup_mmap_rnd);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 37d10f7..d348f2c 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -33,7 +33,7 @@
 		return NULL;
 
 	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
-		*entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+		*entry = ZPCI_TABLE_INVALID;
 	return table;
 }
 
@@ -51,7 +51,7 @@
 		return NULL;
 
 	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
-		*entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+		*entry = ZPCI_PTE_INVALID;
 	return table;
 }
 
@@ -95,7 +95,7 @@
 	return pto;
 }
 
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
 {
 	unsigned long *sto, *pto;
 	unsigned int rtx, sx, px;
@@ -114,20 +114,10 @@
 	return &pto[px];
 }
 
-void dma_update_cpu_trans(unsigned long *dma_table, void *page_addr,
-			  dma_addr_t dma_addr, int flags)
+void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
 {
-	unsigned long *entry;
-
-	entry = dma_walk_cpu_trans(dma_table, dma_addr);
-	if (!entry) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
 	if (flags & ZPCI_PTE_INVALID) {
 		invalidate_pt_entry(entry);
-		return;
 	} else {
 		set_pt_pfaa(entry, page_addr);
 		validate_pt_entry(entry);
@@ -146,18 +136,25 @@
 	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
 	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags;
+	unsigned long *entry;
 	int i, rc = 0;
 
 	if (!nr_pages)
 		return -EINVAL;
 
 	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
-	if (!zdev->dma_table)
+	if (!zdev->dma_table) {
+		rc = -EINVAL;
 		goto no_refresh;
+	}
 
 	for (i = 0; i < nr_pages; i++) {
-		dma_update_cpu_trans(zdev->dma_table, page_addr, dma_addr,
-				     flags);
+		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+		if (!entry) {
+			rc = -ENOMEM;
+			goto undo_cpu_trans;
+		}
+		dma_update_cpu_trans(entry, page_addr, flags);
 		page_addr += PAGE_SIZE;
 		dma_addr += PAGE_SIZE;
 	}
@@ -176,6 +173,18 @@
 
 	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
 				nr_pages * PAGE_SIZE);
+undo_cpu_trans:
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+		flags = ZPCI_PTE_INVALID;
+		while (i-- > 0) {
+			page_addr -= PAGE_SIZE;
+			dma_addr -= PAGE_SIZE;
+			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+			if (!entry)
+				break;
+			dma_update_cpu_trans(entry, page_addr, flags);
+		}
+	}
 
 no_refresh:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -260,6 +269,16 @@
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 }
 
+static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
+{
+	struct {
+		unsigned long rc;
+		unsigned long addr;
+	} __packed data = {rc, addr};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
 static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction direction,
@@ -270,33 +289,40 @@
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
 	dma_addr_t dma_addr;
+	int ret;
 
 	/* This rounds up number of pages based on size and offset */
 	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
 	iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
-	if (iommu_page_index == -1)
+	if (iommu_page_index == -1) {
+		ret = -ENOSPC;
 		goto out_err;
+	}
 
 	/* Use rounded up size */
 	size = nr_pages * PAGE_SIZE;
 
 	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
-	if (dma_addr + size > zdev->end_dma)
+	if (dma_addr + size > zdev->end_dma) {
+		ret = -ERANGE;
 		goto out_free;
+	}
 
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
 
-	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
-		atomic64_add(nr_pages, &zdev->mapped_pages);
-		return dma_addr + (offset & ~PAGE_MASK);
-	}
+	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (ret)
+		goto out_free;
+
+	atomic64_add(nr_pages, &zdev->mapped_pages);
+	return dma_addr + (offset & ~PAGE_MASK);
 
 out_free:
 	dma_free_iommu(zdev, iommu_page_index, nr_pages);
 out_err:
 	zpci_err("map error:\n");
-	zpci_err_hex(&pa, sizeof(pa));
+	zpci_err_dma(ret, pa);
 	return DMA_ERROR_CODE;
 }
 
@@ -306,14 +332,16 @@
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long iommu_page_index;
-	int npages;
+	int npages, ret;
 
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
 	dma_addr = dma_addr & PAGE_MASK;
-	if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
-			     ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+			       ZPCI_PTE_INVALID);
+	if (ret) {
 		zpci_err("unmap error:\n");
-		zpci_err_hex(&dma_addr, sizeof(dma_addr));
+		zpci_err_dma(ret, dma_addr);
+		return;
 	}
 
 	atomic64_add(npages, &zdev->unmapped_pages);
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 0033e96..9011a88 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -23,7 +23,6 @@
 #include <stdarg.h>
 #include <linux/types.h>
 #include <linux/edd.h>
-#include <asm/boot.h>
 #include <asm/setup.h>
 #include "bitops.h"
 #include "ctype.h"
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index aa8a96b..95c7a81 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -19,6 +19,8 @@
 #include "video.h"
 #include "vesa.h"
 
+#include <uapi/asm/boot.h>
+
 /*
  * Common variables
  */
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 05111bb..77780e3 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -13,6 +13,8 @@
  * Select video mode
  */
 
+#include <uapi/asm/boot.h>
+
 #include "boot.h"
 #include "video.h"
 #include "vesa.h"
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 53616ca..a55697d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -509,6 +509,17 @@
 	 * tracking that we're in kernel mode.
 	 */
 	SWAPGS
+
+	/*
+	 * We need to tell lockdep that IRQs are off.  We can't do this until
+	 * we fix gsbase, and we should do it before enter_from_user_mode
+	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+	 * the simplest way to handle it is to just call it twice if
+	 * we enter from user mode.  There's no reason to optimize this since
+	 * TRACE_IRQS_OFF is a no-op if lockdep is off.
+	 */
+	TRACE_IRQS_OFF
+
 #ifdef CONFIG_CONTEXT_TRACKING
 	call enter_from_user_mode
 #endif
@@ -1049,12 +1060,18 @@
 	SWAPGS
 
 .Lerror_entry_from_usermode_after_swapgs:
+	/*
+	 * We need to tell lockdep that IRQs are off.  We can't do this until
+	 * we fix gsbase, and we should do it before enter_from_user_mode
+	 * (which can take locks).
+	 */
+	TRACE_IRQS_OFF
 #ifdef CONFIG_CONTEXT_TRACKING
 	call enter_from_user_mode
 #endif
+	ret
 
 .Lerror_entry_done:
-
 	TRACE_IRQS_OFF
 	ret
 
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 9f39056..690b402 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -35,7 +35,7 @@
 #define MSR_IA32_PERFCTR0		0x000000c1
 #define MSR_IA32_PERFCTR1		0x000000c2
 #define MSR_FSB_FREQ			0x000000cd
-#define MSR_NHM_PLATFORM_INFO		0x000000ce
+#define MSR_PLATFORM_INFO		0x000000ce
 
 #define MSR_NHM_SNB_PKG_CST_CFG_CTL	0x000000e2
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
@@ -44,7 +44,6 @@
 #define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
 
-#define MSR_PLATFORM_INFO		0x000000ce
 #define MSR_MTRRcap			0x000000fe
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c5b7fb2..cc071c6 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -9,20 +9,22 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-#define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
-#define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-
-/* Cast PAGE_MASK to a signed type so that it is sign-extended if
-   virtual addresses are 32-bits but physical addresses are larger
-   (ie, 32-bit PAE). */
-#define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-
 #define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
 
 #define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
 
+#define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
+#define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+
+/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+   virtual addresses are 32-bits but physical addresses are larger
+   (ie, 32-bit PAE). */
+#define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index dd5b0aa..a471cad 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -279,17 +279,14 @@
 static inline pudval_t pud_pfn_mask(pud_t pud)
 {
 	if (native_pud_val(pud) & _PAGE_PSE)
-		return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+		return PHYSICAL_PUD_PAGE_MASK;
 	else
 		return PTE_PFN_MASK;
 }
 
 static inline pudval_t pud_flags_mask(pud_t pud)
 {
-	if (native_pud_val(pud) & _PAGE_PSE)
-		return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
-	else
-		return ~PTE_PFN_MASK;
+	return ~pud_pfn_mask(pud);
 }
 
 static inline pudval_t pud_flags(pud_t pud)
@@ -300,17 +297,14 @@
 static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
 {
 	if (native_pmd_val(pmd) & _PAGE_PSE)
-		return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+		return PHYSICAL_PMD_PAGE_MASK;
 	else
 		return PTE_PFN_MASK;
 }
 
 static inline pmdval_t pmd_flags_mask(pmd_t pmd)
 {
-	if (native_pmd_val(pmd) & _PAGE_PSE)
-		return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
-	else
-		return ~PTE_PFN_MASK;
+	return ~pmd_pfn_mask(pmd);
 }
 
 static inline pmdval_t pmd_flags(pmd_t pmd)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 48d34d2..cd0fc0c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_PLATFORM_H
 #define _ASM_X86_PLATFORM_H
 
-#include <asm/pgtable_types.h>
 #include <asm/bootparam.h>
 
 struct mpc_bus;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4ddd780..c2b7522 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -273,10 +273,9 @@
 
 static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 {
-	unsigned long eflags;
+	unsigned long eflags = native_save_fl();
 
 	/* This should have been cleared long ago */
-	raw_local_save_flags(eflags);
 	BUG_ON(eflags & X86_EFLAGS_AC);
 
 	if (cpu_has(c, X86_FEATURE_SMAP)) {
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7fc27f1..b3e94ef 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -698,3 +698,4 @@
 	return error;
 
 }
+late_initcall(microcode_init);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index db9a675..bca14c8 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -547,6 +547,7 @@
 	INTEL_CHV_IDS(&chv_stolen_funcs),
 	INTEL_SKL_IDS(&gen9_stolen_funcs),
 	INTEL_BXT_IDS(&gen9_stolen_funcs),
+	INTEL_KBL_IDS(&gen9_stolen_funcs),
 };
 
 static void __init intel_graphics_stolen(int num, int slot, int func)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ef29b74..31c6a60 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -385,20 +385,19 @@
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int fsave_header_size = sizeof(struct fregs_state);
 	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
-	if (config_enabled(CONFIG_X86_32))
-		size += fsave_header_size;
-
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
 	fx_sw_reserved.xstate_size = xstate_size;
 
-	if (config_enabled(CONFIG_IA32_EMULATION)) {
+	if (config_enabled(CONFIG_IA32_EMULATION) ||
+	    config_enabled(CONFIG_X86_32)) {
+		int fsave_header_size = sizeof(struct fregs_state);
+
 		fx_sw_reserved_ia32 = fx_sw_reserved;
-		fx_sw_reserved_ia32.extended_size += fsave_header_size;
+		fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
 	}
 }
 
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6454f27..70fc312 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -694,7 +694,6 @@
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return NULL;
 
-	xsave = &current->thread.fpu.state.xsave;
 	/*
 	 * We should not ever be requesting features that we
 	 * have not enabled.  Remember that pcntxt_mask is
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index 94ea120..87e1762 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -278,6 +278,12 @@
 	/* save_mcount_regs fills in first two parameters */
 	save_mcount_regs
 
+	/*
+	 * When DYNAMIC_FTRACE is not defined, ARCH_SUPPORTS_FTRACE_OPS is not
+	 * set (see include/asm/ftrace.h and include/linux/ftrace.h).  Only the
+	 * ip and parent ip are used and the list function is called when
+	 * function tracing is enabled.
+	 */
 	call   *ftrace_trace_function
 
 	restore_mcount_regs
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 4f00b63..14415af 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -4,10 +4,22 @@
  */
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/ioport.h>
+
+static int found(u64 start, u64 end, void *data)
+{
+	return 1;
+}
 
 static __init int register_e820_pmem(void)
 {
+	char *pmem = "Persistent Memory (legacy)";
 	struct platform_device *pdev;
+	int rc;
+
+	rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+	if (rc <= 0)
+		return 0;
 
 	/*
 	 * See drivers/nvdimm/e820.c for the implementation, this is
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 29db25f..d2bbe34 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1250,8 +1250,6 @@
 	if (efi_enabled(EFI_BOOT))
 		efi_apply_memmap_quirks();
 #endif
-
-	microcode_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b7ffb7c..cb6282c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -690,12 +690,15 @@
 	signal_setup_done(failed, ksig, stepping);
 }
 
-#ifdef CONFIG_X86_32
-#define NR_restart_syscall	__NR_restart_syscall
-#else /* !CONFIG_X86_32 */
-#define NR_restart_syscall	\
-	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-#endif /* CONFIG_X86_32 */
+static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
+{
+#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+	return __NR_restart_syscall;
+#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
+	return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
+		__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+}
 
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
@@ -724,7 +727,7 @@
 			break;
 
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = NR_restart_syscall;
+			regs->ax = get_nr_restart_syscall(regs);
 			regs->ip -= 2;
 			break;
 		}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 892ee2e5..fbabe4f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@
  */
 #define UDELAY_10MS_DEFAULT 10000
 
-static unsigned int init_udelay = INT_MAX;
+static unsigned int init_udelay = UINT_MAX;
 
 static int __init cpu_init_udelay(char *str)
 {
@@ -522,14 +522,15 @@
 static void __init smp_quirk_init_udelay(void)
 {
 	/* if cmdline changed it from default, leave it alone */
-	if (init_udelay != INT_MAX)
+	if (init_udelay != UINT_MAX)
 		return;
 
 	/* if modern processor, use no delay */
 	if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
-	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
+	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
 		init_udelay = 0;
-
+		return;
+	}
 	/* else, use legacy delay */
 	init_udelay = UDELAY_10MS_DEFAULT;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 87acc52..af823a3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7394,11 +7394,6 @@
 
 	switch (type) {
 	case VMX_VPID_EXTENT_ALL_CONTEXT:
-		if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
-			nested_vmx_failValid(vcpu,
-				VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
-			return 1;
-		}
 		__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
 		nested_vmx_succeed(vcpu);
 		break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00462bd..eed3228 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2763,6 +2763,26 @@
 	return 0;
 }
 
+static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+{
+	return (!lapic_in_kernel(vcpu) ||
+		kvm_apic_accept_pic_intr(vcpu));
+}
+
+/*
+ * if userspace requested an interrupt window, check that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+{
+	return kvm_arch_interrupt_allowed(vcpu) &&
+		!kvm_cpu_has_interrupt(vcpu) &&
+		!kvm_event_needs_reinjection(vcpu) &&
+		kvm_cpu_accept_dm_intr(vcpu);
+}
+
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 				    struct kvm_interrupt *irq)
 {
@@ -2786,6 +2806,7 @@
 		return -EEXIST;
 
 	vcpu->arch.pending_external_vector = irq->irq;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	return 0;
 }
 
@@ -5910,23 +5931,10 @@
 	return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
 }
 
-/*
- * Check if userspace requested an interrupt window, and that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
 {
-	if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
-		return false;
-
-	if (kvm_cpu_has_interrupt(vcpu))
-		return false;
-
-	return (irqchip_split(vcpu->kvm)
-		? kvm_apic_accept_pic_intr(vcpu)
-		: kvm_arch_interrupt_allowed(vcpu));
+	return vcpu->run->request_interrupt_window &&
+		likely(!pic_in_kernel(vcpu->kvm));
 }
 
 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
@@ -5937,17 +5945,9 @@
 	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	if (!irqchip_in_kernel(vcpu->kvm))
-		kvm_run->ready_for_interrupt_injection =
-			kvm_arch_interrupt_allowed(vcpu) &&
-			!kvm_cpu_has_interrupt(vcpu) &&
-			!kvm_event_needs_reinjection(vcpu);
-	else if (!pic_in_kernel(vcpu->kvm))
-		kvm_run->ready_for_interrupt_injection =
-			kvm_apic_accept_pic_intr(vcpu) &&
-			!kvm_cpu_has_interrupt(vcpu);
-	else
-		kvm_run->ready_for_interrupt_injection = 1;
+	kvm_run->ready_for_interrupt_injection =
+		pic_in_kernel(vcpu->kvm) ||
+		kvm_vcpu_ready_for_interrupt_injection(vcpu);
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -6360,8 +6360,10 @@
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-	bool req_int_win = !lapic_in_kernel(vcpu) &&
-		vcpu->run->request_interrupt_window;
+	bool req_int_win =
+		dm_request_for_irq_injection(vcpu) &&
+		kvm_cpu_accept_dm_intr(vcpu);
+
 	bool req_immediate_exit = false;
 
 	if (vcpu->requests) {
@@ -6663,7 +6665,8 @@
 		if (kvm_cpu_has_pending_timer(vcpu))
 			kvm_inject_pending_timer_irqs(vcpu);
 
-		if (dm_request_for_irq_injection(vcpu)) {
+		if (dm_request_for_irq_injection(vcpu) &&
+			kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
 			r = 0;
 			vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
 			++vcpu->stat.request_irq_exits;
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b0ae85f..b2fd67d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -101,19 +101,19 @@
 	switch (type) {
 	case REG_TYPE_RM:
 		regno = X86_MODRM_RM(insn->modrm.value);
-		if (X86_REX_B(insn->rex_prefix.value) == 1)
+		if (X86_REX_B(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
 	case REG_TYPE_INDEX:
 		regno = X86_SIB_INDEX(insn->sib.value);
-		if (X86_REX_X(insn->rex_prefix.value) == 1)
+		if (X86_REX_X(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
 	case REG_TYPE_BASE:
 		regno = X86_SIB_BASE(insn->sib.value);
-		if (X86_REX_B(insn->rex_prefix.value) == 1)
+		if (X86_REX_B(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
@@ -586,6 +586,29 @@
 }
 
 /*
+ * We only want to do a 4-byte get_user() on 32-bit.  Otherwise,
+ * we might run off the end of the bounds table if we are on
+ * a 64-bit kernel and try to get 8 bytes.
+ */
+int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+		long __user *bd_entry_ptr)
+{
+	u32 bd_entry_32;
+	int ret;
+
+	if (is_64bit_mm(mm))
+		return get_user(*bd_entry_ret, bd_entry_ptr);
+
+	/*
+	 * Note that get_user() uses the type of the *pointer* to
+	 * establish the size of the get, not the destination.
+	 */
+	ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
+	*bd_entry_ret = bd_entry_32;
+	return ret;
+}
+
+/*
  * Get the base of bounds tables pointed by specific bounds
  * directory entry.
  */
@@ -605,7 +628,7 @@
 		int need_write = 0;
 
 		pagefault_disable();
-		ret = get_user(bd_entry, bd_entry_ptr);
+		ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
 		pagefault_enable();
 		if (!ret)
 			break;
@@ -700,11 +723,23 @@
  */
 static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
 {
-	unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
-	if (is_64bit_mm(mm))
-		return virt_space / MPX_BD_NR_ENTRIES_64;
-	else
-		return virt_space / MPX_BD_NR_ENTRIES_32;
+	unsigned long long virt_space;
+	unsigned long long GB = (1ULL << 30);
+
+	/*
+	 * This covers 32-bit emulation as well as 32-bit kernels
+	 * running on 64-bit harware.
+	 */
+	if (!is_64bit_mm(mm))
+		return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
+
+	/*
+	 * 'x86_virt_bits' returns what the hardware is capable
+	 * of, and returns the full >32-bit adddress space when
+	 * running 32-bit kernels on 64-bit hardware.
+	 */
+	virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
+	return virt_space / MPX_BD_NR_ENTRIES_64;
 }
 
 /*
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 7bcf06a..6eb3c8a 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -50,18 +50,9 @@
 	if (!found)
 		pci_add_resource(resources, &info->busn);
 
-	list_for_each_entry(root_res, &info->resources, list) {
-		struct resource *res;
-		struct resource *root;
+	list_for_each_entry(root_res, &info->resources, list)
+		pci_add_resource(resources, &root_res->res);
 
-		res = &root_res->res;
-		pci_add_resource(resources, res);
-		if (res->flags & IORESOURCE_IO)
-			root = &ioport_resource;
-		else
-			root = &iomem_resource;
-		insert_resource(root, res);
-	}
 	return;
 
 default_resources:
diff --git a/block/blk-core.c b/block/blk-core.c
index 5131993b..a0af404 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2114,7 +2114,8 @@
 EXPORT_SYMBOL(submit_bio);
 
 /**
- * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * blk_cloned_rq_check_limits - Helper function to check a cloned request
+ *                              for new the queue limits
  * @q:  the queue
  * @rq: the request being checked
  *
@@ -2125,20 +2126,13 @@
  *    after it is inserted to @q, it should be checked against @q before
  *    the insertion using this generic function.
  *
- *    This function should also be useful for request stacking drivers
- *    in some cases below, so export this function.
  *    Request stacking drivers like request-based dm may change the queue
- *    limits while requests are in the queue (e.g. dm's table swapping).
- *    Such request stacking drivers should check those requests against
- *    the new queue limits again when they dispatch those requests,
- *    although such checkings are also done against the old queue limits
- *    when submitting requests.
+ *    limits when retrying requests on other queues. Those requests need
+ *    to be checked against the new queue limits again during dispatch.
  */
-int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+static int blk_cloned_rq_check_limits(struct request_queue *q,
+				      struct request *rq)
 {
-	if (!rq_mergeable(rq))
-		return 0;
-
 	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
@@ -2158,7 +2152,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
@@ -2170,7 +2163,7 @@
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
-	if (blk_rq_check_limits(q, rq))
+	if (blk_cloned_rq_check_limits(q, rq))
 		return -EIO;
 
 	if (rq->rq_disk &&
diff --git a/block/blk-merge.c b/block/blk-merge.c
index de5716d8..e01405a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -76,6 +76,9 @@
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
 	struct bvec_iter iter;
 	unsigned seg_size = 0, nsegs = 0, sectors = 0;
+	unsigned front_seg_size = bio->bi_seg_front_size;
+	bool do_split = true;
+	struct bio *new = NULL;
 
 	bio_for_each_segment(bv, bio, iter) {
 		if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
@@ -98,8 +101,11 @@
 
 			seg_size += bv.bv_len;
 			bvprv = bv;
-			bvprvp = &bv;
+			bvprvp = &bvprv;
 			sectors += bv.bv_len >> 9;
+
+			if (nsegs == 1 && seg_size > front_seg_size)
+				front_seg_size = seg_size;
 			continue;
 		}
 new_segment:
@@ -108,16 +114,29 @@
 
 		nsegs++;
 		bvprv = bv;
-		bvprvp = &bv;
+		bvprvp = &bvprv;
 		seg_size = bv.bv_len;
 		sectors += bv.bv_len >> 9;
+
+		if (nsegs == 1 && seg_size > front_seg_size)
+			front_seg_size = seg_size;
 	}
 
-	*segs = nsegs;
-	return NULL;
+	do_split = false;
 split:
 	*segs = nsegs;
-	return bio_split(bio, sectors, GFP_NOIO, bs);
+
+	if (do_split) {
+		new = bio_split(bio, sectors, GFP_NOIO, bs);
+		if (new)
+			bio = new;
+	}
+
+	bio->bi_seg_front_size = front_seg_size;
+	if (seg_size > bio->bi_seg_back_size)
+		bio->bi_seg_back_size = seg_size;
+
+	return do_split ? new : NULL;
 }
 
 void blk_queue_split(struct request_queue *q, struct bio **bio,
@@ -412,6 +431,12 @@
 	if (sg)
 		sg_mark_end(sg);
 
+	/*
+	 * Something must have been wrong if the figured number of
+	 * segment is bigger than number of req's physical segments
+	 */
+	WARN_ON(nsegs > rq->nr_phys_segments);
+
 	return nsegs;
 }
 EXPORT_SYMBOL(blk_rq_map_sg);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ae09de..6d6f8fe 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1291,15 +1291,16 @@
 		blk_mq_bio_to_request(rq, bio);
 
 		/*
-		 * we do limited pluging. If bio can be merged, do merge.
+		 * We do limited pluging. If the bio can be merged, do that.
 		 * Otherwise the existing request in the plug list will be
 		 * issued. So the plug list will have one request at most
 		 */
 		if (plug) {
 			/*
 			 * The plug list might get flushed before this. If that
-			 * happens, same_queue_rq is invalid and plug list is empty
-			 **/
+			 * happens, same_queue_rq is invalid and plug list is
+			 * empty
+			 */
 			if (same_queue_rq && !list_empty(&plug->mq_list)) {
 				old_rq = same_queue_rq;
 				list_del_init(&old_rq->queuelist);
@@ -1380,12 +1381,15 @@
 		blk_mq_bio_to_request(rq, bio);
 		if (!request_count)
 			trace_block_plug(q);
-		else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+
+		blk_mq_put_ctx(data.ctx);
+
+		if (request_count >= BLK_MAX_REQUEST_COUNT) {
 			blk_flush_plug_list(plug, false);
 			trace_block_plug(q);
 		}
+
 		list_add_tail(&rq->queuelist, &plug->mq_list);
-		blk_mq_put_ctx(data.ctx);
 		return cookie;
 	}
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7d8f129..dd49735 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,7 +91,8 @@
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
+		BLK_SAFE_MAX_SECTORS;
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
@@ -127,6 +128,7 @@
 	lim->max_hw_sectors = UINT_MAX;
 	lim->max_segment_size = UINT_MAX;
 	lim->max_sectors = UINT_MAX;
+	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_same_sectors = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -214,8 +216,8 @@
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request
- * @limits: the queue limits
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
  * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
  * Description:
@@ -224,13 +226,19 @@
  *    the device driver based upon the capabilities of the I/O
  *    controller.
  *
+ *    max_dev_sectors is a hard limit imposed by the storage device for
+ *    READ/WRITE requests. It is set by the disk driver.
+ *
  *    max_sectors is a soft limit imposed by the block layer for
  *    filesystem type requests.  This value can be overridden on a
  *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
  *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
+	struct queue_limits *limits = &q->limits;
+	unsigned int max_sectors;
+
 	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
@@ -238,22 +246,9 @@
 	}
 
 	limits->max_hw_sectors = max_hw_sectors;
-	limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-				    BLK_DEF_MAX_SECTORS);
-}
-EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
-/**
- * blk_queue_max_hw_sectors - set max sectors for a request for this queue
- * @q:  the request queue for the device
- * @max_hw_sectors:  max hardware sectors in the usual 512b unit
- *
- * Description:
- *    See description for blk_limits_max_hw_sectors().
- **/
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
-{
-	blk_limits_max_hw_sectors(&q->limits, max_hw_sectors);
+	max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
+	max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
+	limits->max_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -527,6 +522,7 @@
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
 	t->max_write_same_sectors = min(t->max_write_same_sectors,
 					b->max_write_same_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 565b8da..e140cc4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -205,6 +205,9 @@
 	if (ret < 0)
 		return ret;
 
+	max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb, (unsigned long)
+					 q->limits.max_dev_sectors >> 1);
+
 	if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
 		return -EINVAL;
 
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 246dfb1..aa40aa9 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -158,11 +158,13 @@
 {
 	if (blk_mark_rq_complete(req))
 		return;
-	blk_delete_timer(req);
-	if (req->q->mq_ops)
+
+	if (req->q->mq_ops) {
 		blk_mq_rq_timed_out(req, false);
-	else
+	} else {
+		blk_delete_timer(req);
 		blk_rq_timed_out(req);
+	}
 }
 EXPORT_SYMBOL_GPL(blk_abort_request);
 
diff --git a/block/blk.h b/block/blk.h
index da722eb..c43926d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -72,8 +72,6 @@
 void __blk_queue_free_tags(struct request_queue *q);
 bool __blk_end_bidi_request(struct request *rq, int error,
 			    unsigned int nr_bytes, unsigned int bidi_bytes);
-int blk_queue_enter(struct request_queue *q, gfp_t gfp);
-void blk_queue_exit(struct request_queue *q);
 void blk_freeze_queue(struct request_queue *q);
 
 static inline void blk_queue_enter_live(struct request_queue *q)
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 3de89d4..a163c48 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -21,10 +21,10 @@
 static int noop_dispatch(struct request_queue *q, int force)
 {
 	struct noop_data *nd = q->elevator->elevator_data;
+	struct request *rq;
 
-	if (!list_empty(&nd->queue)) {
-		struct request *rq;
-		rq = list_entry(nd->queue.next, struct request, queuelist);
+	rq = list_first_entry_or_null(&nd->queue, struct request, queuelist);
+	if (rq) {
 		list_del_init(&rq->queuelist);
 		elv_dispatch_sort(q, rq);
 		return 1;
@@ -46,7 +46,7 @@
 
 	if (rq->queuelist.prev == &nd->queue)
 		return NULL;
-	return list_entry(rq->queuelist.prev, struct request, queuelist);
+	return list_prev_entry(rq, queuelist);
 }
 
 static struct request *
@@ -56,7 +56,7 @@
 
 	if (rq->queuelist.next == &nd->queue)
 		return NULL;
-	return list_entry(rq->queuelist.next, struct request, queuelist);
+	return list_next_entry(rq, queuelist);
 }
 
 static int noop_init_queue(struct request_queue *q, struct elevator_type *e)
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3b03015..746935a 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -397,7 +397,7 @@
 	struct hd_struct *part;
 	int res;
 
-	if (bdev->bd_part_count)
+	if (bdev->bd_part_count || bdev->bd_super)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
 	if (res)
diff --git a/block/partitions/mac.c b/block/partitions/mac.c
index c2c48ec..621317a 100644
--- a/block/partitions/mac.c
+++ b/block/partitions/mac.c
@@ -32,7 +32,7 @@
 	Sector sect;
 	unsigned char *data;
 	int slot, blocks_in_map;
-	unsigned secsize;
+	unsigned secsize, datasize, partoffset;
 #ifdef CONFIG_PPC_PMAC
 	int found_root = 0;
 	int found_root_goodness = 0;
@@ -50,10 +50,14 @@
 	}
 	secsize = be16_to_cpu(md->block_size);
 	put_dev_sector(sect);
-	data = read_part_sector(state, secsize/512, &sect);
+	datasize = round_down(secsize, 512);
+	data = read_part_sector(state, datasize / 512, &sect);
 	if (!data)
 		return -1;
-	part = (struct mac_partition *) (data + secsize%512);
+	partoffset = secsize % 512;
+	if (partoffset + sizeof(*part) > datasize)
+		return -1;
+	part = (struct mac_partition *) (data + partoffset);
 	if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
 		put_dev_sector(sect);
 		return 0;		/* not a MacOS disk */
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 0aa6fdf..6d4d456 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -125,7 +125,7 @@
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
 
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -139,7 +139,7 @@
 	}
 	finish_wait(sk_sleep(sk), &wait);
 
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	return err;
 }
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index af31a0e..ca9efe1 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -212,7 +212,7 @@
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
 
-	set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -258,7 +258,7 @@
 		return -EAGAIN;
 	}
 
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -272,7 +272,7 @@
 	}
 	finish_wait(sk_sleep(sk), &wait);
 
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	return err;
 }
diff --git a/drivers/Makefile b/drivers/Makefile
index 73d0391..795d0ca 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -63,6 +63,7 @@
 obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
 
 obj-$(CONFIG_PARPORT)		+= parport/
+obj-$(CONFIG_NVM)		+= lightnvm/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
 obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
@@ -70,7 +71,6 @@
 obj-y				+= macintosh/
 obj-$(CONFIG_IDE)		+= ide/
 obj-$(CONFIG_SCSI)		+= scsi/
-obj-$(CONFIG_NVM)		+= lightnvm/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
 obj-$(CONFIG_TARGET_CORE)	+= target/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 25dbb76..5eef4cb 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -58,10 +58,10 @@
 	bool
 
 config ACPI_DEBUGGER
-	bool "In-kernel debugger (EXPERIMENTAL)"
+	bool "AML debugger interface (EXPERIMENTAL)"
 	select ACPI_DEBUG
 	help
-	  Enable in-kernel debugging facilities: statistics, internal
+	  Enable in-kernel debugging of AML facilities: statistics, internal
 	  object dump, single step control method execution.
 	  This is still under development, currently enabling this only
 	  results in the compilation of the ACPICA debugger files.
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3c083d2..6730f96 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -304,7 +304,7 @@
 
 static int register_pcc_channel(int pcc_subspace_idx)
 {
-	struct acpi_pcct_subspace *cppc_ss;
+	struct acpi_pcct_hw_reduced *cppc_ss;
 	unsigned int len;
 
 	if (pcc_subspace_idx >= 0) {
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index f61a7c8..b420fb4 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1103,7 +1103,7 @@
 	}
 
 err_exit:
-	if (result && q)
+	if (result)
 		acpi_ec_delete_query(q);
 	if (data)
 		*data = value;
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index f7dab53..e7ed39b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -233,11 +233,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_system_address *spa)
 {
+	size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
 
 	list_for_each_entry(nfit_spa, &prev->spas, list) {
-		if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+		if (memcmp(nfit_spa->spa, spa, length) == 0) {
 			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 			return true;
 		}
@@ -259,11 +260,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_memory_map *memdev)
 {
+	size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_memdev *nfit_memdev;
 
 	list_for_each_entry(nfit_memdev, &prev->memdevs, list)
-		if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+		if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
 			list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 			return true;
 		}
@@ -284,11 +286,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_control_region *dcr)
 {
+	size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_dcr *nfit_dcr;
 
 	list_for_each_entry(nfit_dcr, &prev->dcrs, list)
-		if (memcmp(nfit_dcr->dcr, dcr, sizeof(*dcr)) == 0) {
+		if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
 			list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 			return true;
 		}
@@ -308,11 +311,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_data_region *bdw)
 {
+	size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_bdw *nfit_bdw;
 
 	list_for_each_entry(nfit_bdw, &prev->bdws, list)
-		if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+		if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
 			list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
 			return true;
 		}
@@ -332,11 +336,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_interleave *idt)
 {
+	size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_idt *nfit_idt;
 
 	list_for_each_entry(nfit_idt, &prev->idts, list)
-		if (memcmp(nfit_idt->idt, idt, sizeof(*idt)) == 0) {
+		if (memcmp(nfit_idt->idt, idt, length) == 0) {
 			list_move_tail(&nfit_idt->list, &acpi_desc->idts);
 			return true;
 		}
@@ -356,11 +361,12 @@
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_flush_address *flush)
 {
+	size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_flush *nfit_flush;
 
 	list_for_each_entry(nfit_flush, &prev->flushes, list)
-		if (memcmp(nfit_flush->flush, flush, sizeof(*flush)) == 0) {
+		if (memcmp(nfit_flush->flush, flush, length) == 0) {
 			list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
 			return true;
 		}
@@ -655,7 +661,7 @@
 	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
-	return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
+	return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
 }
 static DEVICE_ATTR_RO(revision);
 
@@ -1652,7 +1658,6 @@
 
 	data = (u8 *) acpi_desc->nfit;
 	end = data + sz;
-	data += sizeof(struct acpi_table_nfit);
 	while (!IS_ERR_OR_NULL(data))
 		data = add_table(acpi_desc, &prev, data, end);
 
@@ -1748,13 +1753,29 @@
 		return PTR_ERR(acpi_desc);
 	}
 
-	acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
+	/*
+	 * Save the acpi header for later and then skip it,
+	 * making nfit point to the first nfit table header.
+	 */
+	acpi_desc->acpi_header = *tbl;
+	acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
+	sz -= sizeof(struct acpi_table_nfit);
 
 	/* Evaluate _FIT and override with that if present */
 	status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
 	if (ACPI_SUCCESS(status) && buf.length > 0) {
-		acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
-		sz = buf.length;
+		union acpi_object *obj;
+		/*
+		 * Adjust for the acpi_object header of the _FIT
+		 */
+		obj = buf.pointer;
+		if (obj->type == ACPI_TYPE_BUFFER) {
+			acpi_desc->nfit =
+				(struct acpi_nfit_header *)obj->buffer.pointer;
+			sz = obj->buffer.length;
+		} else
+			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+				 __func__, (int) obj->type);
 	}
 
 	rc = acpi_nfit_init(acpi_desc, sz);
@@ -1777,7 +1798,8 @@
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
 	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_table_nfit *nfit_saved;
+	struct acpi_nfit_header *nfit_saved;
+	union acpi_object *obj;
 	struct device *dev = &adev->dev;
 	acpi_status status;
 	int ret;
@@ -1808,12 +1830,19 @@
 	}
 
 	nfit_saved = acpi_desc->nfit;
-	acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
-	ret = acpi_nfit_init(acpi_desc, buf.length);
-	if (!ret) {
-		/* Merge failed, restore old nfit, and exit */
-		acpi_desc->nfit = nfit_saved;
-		dev_err(dev, "failed to merge updated NFIT\n");
+	obj = buf.pointer;
+	if (obj->type == ACPI_TYPE_BUFFER) {
+		acpi_desc->nfit =
+			(struct acpi_nfit_header *)obj->buffer.pointer;
+		ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
+		if (ret) {
+			/* Merge failed, restore old nfit, and exit */
+			acpi_desc->nfit = nfit_saved;
+			dev_err(dev, "failed to merge updated NFIT\n");
+		}
+	} else {
+		/* Bad _FIT, restore old nfit */
+		dev_err(dev, "Invalid _FIT\n");
 	}
 	kfree(buf.pointer);
 
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 2ea5c07..3d549a3 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -96,7 +96,8 @@
 
 struct acpi_nfit_desc {
 	struct nvdimm_bus_descriptor nd_desc;
-	struct acpi_table_nfit *nfit;
+	struct acpi_table_header acpi_header;
+	struct acpi_nfit_header *nfit;
 	struct mutex spa_map_mutex;
 	struct mutex init_mutex;
 	struct list_head spa_maps;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 850d7bf..ae3fe4e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -768,6 +768,13 @@
 		else
 			continue;
 
+		/*
+		 * Some legacy x86 host bridge drivers use iomem_resource and
+		 * ioport_resource as default resource pool, skip it.
+		 */
+		if (res == root)
+			continue;
+
 		conflict = insert_resource_conflict(root, res);
 		if (conflict) {
 			dev_info(&info->bridge->dev,
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index bf034f8..2fa8304 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -14,7 +14,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/dmi.h>
 #include "sbshc.h"
 
 #define PREFIX "ACPI: "
@@ -30,6 +29,7 @@
 	u8 query_bit;
 	smbus_alarm_callback callback;
 	void *context;
+	bool done;
 };
 
 static int acpi_smbus_hc_add(struct acpi_device *device);
@@ -88,8 +88,6 @@
 	ACPI_SMB_ALARM_DATA = 0x26,	/* 2 bytes alarm data */
 };
 
-static bool macbook;
-
 static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
 {
 	return ec_read(hc->offset + address, data);
@@ -100,27 +98,11 @@
 	return ec_write(hc->offset + address, data);
 }
 
-static inline int smb_check_done(struct acpi_smb_hc *hc)
-{
-	union acpi_smb_status status = {.raw = 0};
-	smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw);
-	return status.fields.done && (status.fields.status == SMBUS_OK);
-}
-
 static int wait_transaction_complete(struct acpi_smb_hc *hc, int timeout)
 {
-	if (wait_event_timeout(hc->wait, smb_check_done(hc),
-			       msecs_to_jiffies(timeout)))
+	if (wait_event_timeout(hc->wait, hc->done, msecs_to_jiffies(timeout)))
 		return 0;
-	/*
-	 * After the timeout happens, OS will try to check the status of SMbus.
-	 * If the status is what OS expected, it will be regarded as the bogus
-	 * timeout.
-	 */
-	if (smb_check_done(hc))
-		return 0;
-	else
-		return -ETIME;
+	return -ETIME;
 }
 
 static int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol,
@@ -135,8 +117,7 @@
 	}
 
 	mutex_lock(&hc->lock);
-	if (macbook)
-		udelay(5);
+	hc->done = false;
 	if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
 		goto end;
 	if (temp) {
@@ -235,8 +216,10 @@
 	if (smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw))
 		return 0;
 	/* Check if it is only a completion notify */
-	if (status.fields.done)
+	if (status.fields.done && status.fields.status == SMBUS_OK) {
+		hc->done = true;
 		wake_up(&hc->wait);
+	}
 	if (!status.fields.alarm)
 		return 0;
 	mutex_lock(&hc->lock);
@@ -262,29 +245,12 @@
 			      acpi_handle handle, acpi_ec_query_func func,
 			      void *data);
 
-static int macbook_dmi_match(const struct dmi_system_id *d)
-{
-	pr_debug("Detected MacBook, enabling workaround\n");
-	macbook = true;
-	return 0;
-}
-
-static struct dmi_system_id acpi_smbus_dmi_table[] = {
-	{ macbook_dmi_match, "Apple MacBook", {
-	  DMI_MATCH(DMI_BOARD_VENDOR, "Apple"),
-	  DMI_MATCH(DMI_PRODUCT_NAME, "MacBook") },
-	},
-	{ },
-};
-
 static int acpi_smbus_hc_add(struct acpi_device *device)
 {
 	int status;
 	unsigned long long val;
 	struct acpi_smb_hc *hc;
 
-	dmi_check_system(acpi_smbus_dmi_table);
-
 	if (!device)
 		return -EINVAL;
 
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e03b1ad..167418e 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1775,10 +1775,10 @@
 	}
 
 	pd = of_genpd_get_from_provider(&pd_args);
+	of_node_put(pd_args.np);
 	if (IS_ERR(pd)) {
 		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
 			__func__, PTR_ERR(pd));
-		of_node_put(dev->of_node);
 		return -EPROBE_DEFER;
 	}
 
@@ -1796,7 +1796,6 @@
 	if (ret < 0) {
 		dev_err(dev, "failed to add to PM domain %s: %d",
 			pd->name, ret);
-		of_node_put(dev->of_node);
 		goto out;
 	}
 
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index e60dd12..1e937ac 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -160,9 +160,6 @@
 		struct gpd_timing_data *td;
 		s64 constraint_ns;
 
-		if (!pdd->dev->driver)
-			continue;
-
 		/*
 		 * Check if the device is allowed to be off long enough for the
 		 * domain to turn off and on (that's how much time it will
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index eb6e674..0d77cd6 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -68,6 +68,9 @@
 	struct wake_irq *wirq;
 	int err;
 
+	if (irq < 0)
+		return -EINVAL;
+
 	wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
 	if (!wirq)
 		return -ENOMEM;
@@ -167,6 +170,9 @@
 	struct wake_irq *wirq;
 	int err;
 
+	if (irq < 0)
+		return -EINVAL;
+
 	wirq = kzalloc(sizeof(*wirq), GFP_KERNEL);
 	if (!wirq)
 		return -ENOMEM;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index a28a562..3457ac8 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3810,7 +3810,6 @@
 	sector_t capacity;
 	unsigned int index = 0;
 	struct kobject *kobj;
-	unsigned char thd_name[16];
 
 	if (dd->disk)
 		goto skip_create_disk; /* hw init done, before rebuild */
@@ -3958,10 +3957,9 @@
 	}
 
 start_service_thread:
-	sprintf(thd_name, "mtip_svc_thd_%02d", index);
 	dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
-						dd, dd->numa_node, "%s",
-						thd_name);
+						dd, dd->numa_node,
+						"mtip_svc_thd_%02d", index);
 
 	if (IS_ERR(dd->mtip_svc_handler)) {
 		dev_err(&dd->pdev->dev, "service thread failed to start\n");
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 6255d1c..0c3940e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/blk-mq.h>
 #include <linux/hrtimer.h>
+#include <linux/lightnvm.h>
 
 struct nullb_cmd {
 	struct list_head list;
@@ -17,6 +18,7 @@
 	struct bio *bio;
 	unsigned int tag;
 	struct nullb_queue *nq;
+	struct hrtimer timer;
 };
 
 struct nullb_queue {
@@ -39,23 +41,14 @@
 
 	struct nullb_queue *queues;
 	unsigned int nr_queues;
+	char disk_name[DISK_NAME_LEN];
 };
 
 static LIST_HEAD(nullb_list);
 static struct mutex lock;
 static int null_major;
 static int nullb_indexes;
-
-struct completion_queue {
-	struct llist_head list;
-	struct hrtimer timer;
-};
-
-/*
- * These are per-cpu for now, they will need to be configured by the
- * complete_queues parameter and appropriately mapped.
- */
-static DEFINE_PER_CPU(struct completion_queue, completion_queues);
+static struct kmem_cache *ppa_cache;
 
 enum {
 	NULL_IRQ_NONE		= 0,
@@ -119,6 +112,10 @@
 module_param(nr_devices, int, S_IRUGO);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
+static bool use_lightnvm;
+module_param(use_lightnvm, bool, S_IRUGO);
+MODULE_PARM_DESC(use_lightnvm, "Register as a LightNVM device");
+
 static int irqmode = NULL_IRQ_SOFTIRQ;
 
 static int null_set_irqmode(const char *str, const struct kernel_param *kp)
@@ -135,8 +132,8 @@
 device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
 
-static int completion_nsec = 10000;
-module_param(completion_nsec, int, S_IRUGO);
+static unsigned long completion_nsec = 10000;
+module_param(completion_nsec, ulong, S_IRUGO);
 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
 
 static int hw_queue_depth = 64;
@@ -173,6 +170,8 @@
 	put_tag(cmd->nq, cmd->tag);
 }
 
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
+
 static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
 {
 	struct nullb_cmd *cmd;
@@ -183,6 +182,11 @@
 		cmd = &nq->cmds[tag];
 		cmd->tag = tag;
 		cmd->nq = nq;
+		if (irqmode == NULL_IRQ_TIMER) {
+			hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL);
+			cmd->timer.function = null_cmd_timer_expired;
+		}
 		return cmd;
 	}
 
@@ -213,6 +217,8 @@
 
 static void end_cmd(struct nullb_cmd *cmd)
 {
+	struct request_queue *q = NULL;
+
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
 		blk_mq_end_request(cmd->rq, 0);
@@ -223,55 +229,37 @@
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
-		break;
+		goto free_cmd;
 	}
 
+	if (cmd->rq)
+		q = cmd->rq->q;
+
+	/* Restart queue if needed, as we are freeing a tag */
+	if (q && !q->mq_ops && blk_queue_stopped(q)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(q->queue_lock, flags);
+		if (blk_queue_stopped(q))
+			blk_start_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+free_cmd:
 	free_cmd(cmd);
 }
 
 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 {
-	struct completion_queue *cq;
-	struct llist_node *entry;
-	struct nullb_cmd *cmd;
-
-	cq = &per_cpu(completion_queues, smp_processor_id());
-
-	while ((entry = llist_del_all(&cq->list)) != NULL) {
-		entry = llist_reverse_order(entry);
-		do {
-			struct request_queue *q = NULL;
-
-			cmd = container_of(entry, struct nullb_cmd, ll_list);
-			entry = entry->next;
-			if (cmd->rq)
-				q = cmd->rq->q;
-			end_cmd(cmd);
-
-			if (q && !q->mq_ops && blk_queue_stopped(q)) {
-				spin_lock(q->queue_lock);
-				if (blk_queue_stopped(q))
-					blk_start_queue(q);
-				spin_unlock(q->queue_lock);
-			}
-		} while (entry);
-	}
+	end_cmd(container_of(timer, struct nullb_cmd, timer));
 
 	return HRTIMER_NORESTART;
 }
 
 static void null_cmd_end_timer(struct nullb_cmd *cmd)
 {
-	struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
+	ktime_t kt = ktime_set(0, completion_nsec);
 
-	cmd->ll_list.next = NULL;
-	if (llist_add(&cmd->ll_list, &cq->list)) {
-		ktime_t kt = ktime_set(0, completion_nsec);
-
-		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
-	}
-
-	put_cpu();
+	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
 }
 
 static void null_softirq_done_fn(struct request *rq)
@@ -369,6 +357,10 @@
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
+	if (irqmode == NULL_IRQ_TIMER) {
+		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cmd->timer.function = null_cmd_timer_expired;
+	}
 	cmd->rq = bd->rq;
 	cmd->nq = hctx->driver_data;
 
@@ -427,15 +419,156 @@
 {
 	list_del_init(&nullb->list);
 
-	del_gendisk(nullb->disk);
+	if (use_lightnvm)
+		nvm_unregister(nullb->disk_name);
+	else
+		del_gendisk(nullb->disk);
 	blk_cleanup_queue(nullb->q);
 	if (queue_mode == NULL_Q_MQ)
 		blk_mq_free_tag_set(&nullb->tag_set);
-	put_disk(nullb->disk);
+	if (!use_lightnvm)
+		put_disk(nullb->disk);
 	cleanup_queues(nullb);
 	kfree(nullb);
 }
 
+#ifdef CONFIG_NVM
+
+static void null_lnvm_end_io(struct request *rq, int error)
+{
+	struct nvm_rq *rqd = rq->end_io_data;
+	struct nvm_dev *dev = rqd->dev;
+
+	dev->mt->end_io(rqd, error);
+
+	blk_put_request(rq);
+}
+
+static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+{
+	struct request *rq;
+	struct bio *bio = rqd->bio;
+
+	rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
+	if (IS_ERR(rq))
+		return -ENOMEM;
+
+	rq->cmd_type = REQ_TYPE_DRV_PRIV;
+	rq->__sector = bio->bi_iter.bi_sector;
+	rq->ioprio = bio_prio(bio);
+
+	if (bio_has_data(bio))
+		rq->nr_phys_segments = bio_phys_segments(q, bio);
+
+	rq->__data_len = bio->bi_iter.bi_size;
+	rq->bio = rq->biotail = bio;
+
+	rq->end_io_data = rqd;
+
+	blk_execute_rq_nowait(q, NULL, rq, 0, null_lnvm_end_io);
+
+	return 0;
+}
+
+static int null_lnvm_id(struct request_queue *q, struct nvm_id *id)
+{
+	sector_t size = gb * 1024 * 1024 * 1024ULL;
+	sector_t blksize;
+	struct nvm_id_group *grp;
+
+	id->ver_id = 0x1;
+	id->vmnt = 0;
+	id->cgrps = 1;
+	id->cap = 0x3;
+	id->dom = 0x1;
+
+	id->ppaf.blk_offset = 0;
+	id->ppaf.blk_len = 16;
+	id->ppaf.pg_offset = 16;
+	id->ppaf.pg_len = 16;
+	id->ppaf.sect_offset = 32;
+	id->ppaf.sect_len = 8;
+	id->ppaf.pln_offset = 40;
+	id->ppaf.pln_len = 8;
+	id->ppaf.lun_offset = 48;
+	id->ppaf.lun_len = 8;
+	id->ppaf.ch_offset = 56;
+	id->ppaf.ch_len = 8;
+
+	do_div(size, bs); /* convert size to pages */
+	do_div(size, 256); /* concert size to pgs pr blk */
+	grp = &id->groups[0];
+	grp->mtype = 0;
+	grp->fmtype = 0;
+	grp->num_ch = 1;
+	grp->num_pg = 256;
+	blksize = size;
+	do_div(size, (1 << 16));
+	grp->num_lun = size + 1;
+	do_div(blksize, grp->num_lun);
+	grp->num_blk = blksize;
+	grp->num_pln = 1;
+
+	grp->fpg_sz = bs;
+	grp->csecs = bs;
+	grp->trdt = 25000;
+	grp->trdm = 25000;
+	grp->tprt = 500000;
+	grp->tprm = 500000;
+	grp->tbet = 1500000;
+	grp->tbem = 1500000;
+	grp->mpos = 0x010101; /* single plane rwe */
+	grp->cpar = hw_queue_depth;
+
+	return 0;
+}
+
+static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name)
+{
+	mempool_t *virtmem_pool;
+
+	virtmem_pool = mempool_create_slab_pool(64, ppa_cache);
+	if (!virtmem_pool) {
+		pr_err("null_blk: Unable to create virtual memory pool\n");
+		return NULL;
+	}
+
+	return virtmem_pool;
+}
+
+static void null_lnvm_destroy_dma_pool(void *pool)
+{
+	mempool_destroy(pool);
+}
+
+static void *null_lnvm_dev_dma_alloc(struct request_queue *q, void *pool,
+				gfp_t mem_flags, dma_addr_t *dma_handler)
+{
+	return mempool_alloc(pool, mem_flags);
+}
+
+static void null_lnvm_dev_dma_free(void *pool, void *entry,
+							dma_addr_t dma_handler)
+{
+	mempool_free(entry, pool);
+}
+
+static struct nvm_dev_ops null_lnvm_dev_ops = {
+	.identity		= null_lnvm_id,
+	.submit_io		= null_lnvm_submit_io,
+
+	.create_dma_pool	= null_lnvm_create_dma_pool,
+	.destroy_dma_pool	= null_lnvm_destroy_dma_pool,
+	.dev_dma_alloc		= null_lnvm_dev_dma_alloc,
+	.dev_dma_free		= null_lnvm_dev_dma_free,
+
+	/* Simulate nvme protocol restriction */
+	.max_phys_sect		= 64,
+};
+#else
+static struct nvm_dev_ops null_lnvm_dev_ops;
+#endif /* CONFIG_NVM */
+
 static int null_open(struct block_device *bdev, fmode_t mode)
 {
 	return 0;
@@ -575,11 +708,6 @@
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
 
-	disk = nullb->disk = alloc_disk_node(1, home_node);
-	if (!disk) {
-		rv = -ENOMEM;
-		goto out_cleanup_blk_queue;
-	}
 
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
@@ -589,6 +717,21 @@
 	blk_queue_logical_block_size(nullb->q, bs);
 	blk_queue_physical_block_size(nullb->q, bs);
 
+	sprintf(nullb->disk_name, "nullb%d", nullb->index);
+
+	if (use_lightnvm) {
+		rv = nvm_register(nullb->q, nullb->disk_name,
+							&null_lnvm_dev_ops);
+		if (rv)
+			goto out_cleanup_blk_queue;
+		goto done;
+	}
+
+	disk = nullb->disk = alloc_disk_node(1, home_node);
+	if (!disk) {
+		rv = -ENOMEM;
+		goto out_cleanup_lightnvm;
+	}
 	size = gb * 1024 * 1024 * 1024ULL;
 	set_capacity(disk, size >> 9);
 
@@ -598,10 +741,15 @@
 	disk->fops		= &null_fops;
 	disk->private_data	= nullb;
 	disk->queue		= nullb->q;
-	sprintf(disk->disk_name, "nullb%d", nullb->index);
+	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+
 	add_disk(disk);
+done:
 	return 0;
 
+out_cleanup_lightnvm:
+	if (use_lightnvm)
+		nvm_unregister(nullb->disk_name);
 out_cleanup_blk_queue:
 	blk_cleanup_queue(nullb->q);
 out_cleanup_tags:
@@ -625,6 +773,18 @@
 		bs = PAGE_SIZE;
 	}
 
+	if (use_lightnvm && bs != 4096) {
+		pr_warn("null_blk: LightNVM only supports 4k block size\n");
+		pr_warn("null_blk: defaults block size to 4k\n");
+		bs = 4096;
+	}
+
+	if (use_lightnvm && queue_mode != NULL_Q_MQ) {
+		pr_warn("null_blk: LightNVM only supported for blk-mq\n");
+		pr_warn("null_blk: defaults queue mode to blk-mq\n");
+		queue_mode = NULL_Q_MQ;
+	}
+
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
 			pr_warn("null_blk: submit_queues param is set to %u.",
@@ -638,32 +798,31 @@
 
 	mutex_init(&lock);
 
-	/* Initialize a separate list for each CPU for issuing softirqs */
-	for_each_possible_cpu(i) {
-		struct completion_queue *cq = &per_cpu(completion_queues, i);
-
-		init_llist_head(&cq->list);
-
-		if (irqmode != NULL_IRQ_TIMER)
-			continue;
-
-		hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		cq->timer.function = null_cmd_timer_expired;
-	}
-
 	null_major = register_blkdev(0, "nullb");
 	if (null_major < 0)
 		return null_major;
 
+	if (use_lightnvm) {
+		ppa_cache = kmem_cache_create("ppa_cache", 64 * sizeof(u64),
+								0, 0, NULL);
+		if (!ppa_cache) {
+			pr_err("null_blk: unable to create ppa cache\n");
+			return -ENOMEM;
+		}
+	}
+
 	for (i = 0; i < nr_devices; i++) {
 		if (null_add_dev()) {
 			unregister_blkdev(null_major, "nullb");
-			return -EINVAL;
+			goto err_ppa;
 		}
 	}
 
 	pr_info("null: module loaded\n");
 	return 0;
+err_ppa:
+	kmem_cache_destroy(ppa_cache);
+	return -EINVAL;
 }
 
 static void __exit null_exit(void)
@@ -678,6 +837,8 @@
 		null_del_dev(nullb);
 	}
 	mutex_unlock(&lock);
+
+	kmem_cache_destroy(ppa_cache);
 }
 
 module_init(null_init);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 235708c..81ea69f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3442,6 +3442,7 @@
 		goto err_rq;
 	}
 	img_request->rq = rq;
+	snapc = NULL; /* img_request consumes a ref */
 
 	if (op_type == OBJ_OP_DISCARD)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
diff --git a/drivers/bus/omap-ocp2scp.c b/drivers/bus/omap-ocp2scp.c
index 9f18569..bf500e0 100644
--- a/drivers/bus/omap-ocp2scp.c
+++ b/drivers/bus/omap-ocp2scp.c
@@ -117,7 +117,7 @@
 
 module_platform_driver(omap_ocp2scp_driver);
 
-MODULE_ALIAS("platform: omap-ocp2scp");
+MODULE_ALIAS("platform:omap-ocp2scp");
 MODULE_AUTHOR("Texas Instruments Inc.");
 MODULE_DESCRIPTION("OMAP OCP2SCP driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 654f6f3..55fe902 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -412,18 +412,42 @@
 	return rv;
 }
 
-static void start_check_enables(struct smi_info *smi_info)
+static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
+{
+	smi_info->last_timeout_jiffies = jiffies;
+	mod_timer(&smi_info->si_timer, new_val);
+	smi_info->timer_running = true;
+}
+
+/*
+ * Start a new message and (re)start the timer and thread.
+ */
+static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
+			  unsigned int size)
+{
+	smi_mod_timer(smi_info, jiffies + SI_TIMEOUT_JIFFIES);
+
+	if (smi_info->thread)
+		wake_up_process(smi_info->thread);
+
+	smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
+}
+
+static void start_check_enables(struct smi_info *smi_info, bool start_timer)
 {
 	unsigned char msg[2];
 
 	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
 	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
 
-	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+	if (start_timer)
+		start_new_msg(smi_info, msg, 2);
+	else
+		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
 	smi_info->si_state = SI_CHECKING_ENABLES;
 }
 
-static void start_clear_flags(struct smi_info *smi_info)
+static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
 {
 	unsigned char msg[3];
 
@@ -432,7 +456,10 @@
 	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
 	msg[2] = WDT_PRE_TIMEOUT_INT;
 
-	smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+	if (start_timer)
+		start_new_msg(smi_info, msg, 3);
+	else
+		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
 	smi_info->si_state = SI_CLEARING_FLAGS;
 }
 
@@ -442,10 +469,8 @@
 	smi_info->curr_msg->data[1] = IPMI_GET_MSG_CMD;
 	smi_info->curr_msg->data_size = 2;
 
-	smi_info->handlers->start_transaction(
-		smi_info->si_sm,
-		smi_info->curr_msg->data,
-		smi_info->curr_msg->data_size);
+	start_new_msg(smi_info, smi_info->curr_msg->data,
+		      smi_info->curr_msg->data_size);
 	smi_info->si_state = SI_GETTING_MESSAGES;
 }
 
@@ -455,20 +480,11 @@
 	smi_info->curr_msg->data[1] = IPMI_READ_EVENT_MSG_BUFFER_CMD;
 	smi_info->curr_msg->data_size = 2;
 
-	smi_info->handlers->start_transaction(
-		smi_info->si_sm,
-		smi_info->curr_msg->data,
-		smi_info->curr_msg->data_size);
+	start_new_msg(smi_info, smi_info->curr_msg->data,
+		      smi_info->curr_msg->data_size);
 	smi_info->si_state = SI_GETTING_EVENTS;
 }
 
-static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
-{
-	smi_info->last_timeout_jiffies = jiffies;
-	mod_timer(&smi_info->si_timer, new_val);
-	smi_info->timer_running = true;
-}
-
 /*
  * When we have a situtaion where we run out of memory and cannot
  * allocate messages, we just leave them in the BMC and run the system
@@ -478,11 +494,11 @@
  * Note that we cannot just use disable_irq(), since the interrupt may
  * be shared.
  */
-static inline bool disable_si_irq(struct smi_info *smi_info)
+static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
 {
 	if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = true;
-		start_check_enables(smi_info);
+		start_check_enables(smi_info, start_timer);
 		return true;
 	}
 	return false;
@@ -492,7 +508,7 @@
 {
 	if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
 		smi_info->interrupt_disabled = false;
-		start_check_enables(smi_info);
+		start_check_enables(smi_info, true);
 		return true;
 	}
 	return false;
@@ -510,7 +526,7 @@
 
 	msg = ipmi_alloc_smi_msg();
 	if (!msg) {
-		if (!disable_si_irq(smi_info))
+		if (!disable_si_irq(smi_info, true))
 			smi_info->si_state = SI_NORMAL;
 	} else if (enable_si_irq(smi_info)) {
 		ipmi_free_smi_msg(msg);
@@ -526,7 +542,7 @@
 		/* Watchdog pre-timeout */
 		smi_inc_stat(smi_info, watchdog_pretimeouts);
 
-		start_clear_flags(smi_info);
+		start_clear_flags(smi_info, true);
 		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
 		if (smi_info->intf)
 			ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -879,8 +895,7 @@
 			msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
 			msg[1] = IPMI_GET_MSG_FLAGS_CMD;
 
-			smi_info->handlers->start_transaction(
-				smi_info->si_sm, msg, 2);
+			start_new_msg(smi_info, msg, 2);
 			smi_info->si_state = SI_GETTING_FLAGS;
 			goto restart;
 		}
@@ -910,7 +925,7 @@
 		 * disable and messages disabled.
 		 */
 		if (smi_info->supports_event_msg_buff || smi_info->irq) {
-			start_check_enables(smi_info);
+			start_check_enables(smi_info, true);
 		} else {
 			smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
 			if (!smi_info->curr_msg)
@@ -920,6 +935,13 @@
 		}
 		goto restart;
 	}
+
+	if (si_sm_result == SI_SM_IDLE && smi_info->timer_running) {
+		/* Ok it if fails, the timer will just go off. */
+		if (del_timer(&smi_info->si_timer))
+			smi_info->timer_running = false;
+	}
+
  out:
 	return si_sm_result;
 }
@@ -2560,6 +2582,7 @@
 	  .data = (void *)(unsigned long) SI_BT },
 	{},
 };
+MODULE_DEVICE_TABLE(of, of_ipmi_match);
 
 static int of_ipmi_probe(struct platform_device *dev)
 {
@@ -2646,7 +2669,6 @@
 	}
 	return 0;
 }
-MODULE_DEVICE_TABLE(of, of_ipmi_match);
 #else
 #define of_ipmi_match NULL
 static int of_ipmi_probe(struct platform_device *dev)
@@ -3613,7 +3635,7 @@
 	 * Start clearing the flags before we enable interrupts or the
 	 * timer to avoid racing with the timer.
 	 */
-	start_clear_flags(new_smi);
+	start_clear_flags(new_smi, false);
 
 	/*
 	 * IRQ is defined to be set when non-zero.  req_events will
@@ -3908,7 +3930,7 @@
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
 	}
-	disable_si_irq(to_clean);
+	disable_si_irq(to_clean, false);
 	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
 		poll(to_clean);
 		schedule_timeout_uninterruptible(1);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 0ac3bd1..096f0ce 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -153,6 +153,9 @@
 /* The pre-timeout is disabled by default. */
 static int pretimeout;
 
+/* Default timeout to set on panic */
+static int panic_wdt_timeout = 255;
+
 /* Default action is to reset the board on a timeout. */
 static unsigned char action_val = WDOG_TIMEOUT_RESET;
 
@@ -293,6 +296,9 @@
 module_param(pretimeout, timeout, 0644);
 MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
 
+module_param(panic_wdt_timeout, timeout, 0644);
+MODULE_PARM_DESC(timeout, "Timeout value on kernel panic in seconds.");
+
 module_param_cb(action, &param_ops_str, action_op, 0644);
 MODULE_PARM_DESC(action, "Timeout action. One of: "
 		 "reset, none, power_cycle, power_off.");
@@ -1189,7 +1195,7 @@
 		/* Make sure we do this only once. */
 		panic_event_handled = 1;
 
-		timeout = 255;
+		timeout = panic_wdt_timeout;
 		pretimeout = 0;
 		panic_halt_ipmi_set_timeout();
 	}
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 71cfdf7..2eb5f0e 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -1,4 +1,5 @@
 menu "Clock Source drivers"
+	depends on !ARCH_USES_GETTIMEOFFSET
 
 config CLKSRC_OF
 	bool
diff --git a/drivers/clocksource/fsl_ftm_timer.c b/drivers/clocksource/fsl_ftm_timer.c
index 10202f1..517e1c7 100644
--- a/drivers/clocksource/fsl_ftm_timer.c
+++ b/drivers/clocksource/fsl_ftm_timer.c
@@ -203,7 +203,7 @@
 	int err;
 
 	ftm_writel(0x00, priv->clkevt_base + FTM_CNTIN);
-	ftm_writel(~0UL, priv->clkevt_base + FTM_MOD);
+	ftm_writel(~0u, priv->clkevt_base + FTM_MOD);
 
 	ftm_reset_counter(priv->clkevt_base);
 
@@ -230,7 +230,7 @@
 	int err;
 
 	ftm_writel(0x00, priv->clksrc_base + FTM_CNTIN);
-	ftm_writel(~0UL, priv->clksrc_base + FTM_MOD);
+	ftm_writel(~0u, priv->clksrc_base + FTM_MOD);
 
 	ftm_reset_counter(priv->clksrc_base);
 
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 1582c1c..235a1ba 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -84,6 +84,7 @@
 config ARM_MT8173_CPUFREQ
 	bool "Mediatek MT8173 CPUFreq support"
 	depends on ARCH_MEDIATEK && REGULATOR
+	depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST)
 	depends on !CPU_THERMAL || THERMAL=y
 	select PM_OPP
 	help
@@ -201,7 +202,7 @@
 
 config ARM_SCPI_CPUFREQ
         tristate "SCPI based CPUfreq driver"
-	depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL
+	depends on ARM_BIG_LITTLE_CPUFREQ && ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
         help
 	  This adds the CPUfreq driver support for ARM big.LITTLE platforms
 	  using SCPI protocol for CPU power management.
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index adbd1de..c59bdcb 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -5,7 +5,6 @@
 config X86_INTEL_PSTATE
        bool "Intel P state control"
        depends on X86
-       select ACPI_PROCESSOR if ACPI
        help
           This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index e8cb334..7c0bdfb 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -98,10 +98,11 @@
 	policy->max = cpu->perf_caps.highest_perf;
 	policy->cpuinfo.min_freq = policy->min;
 	policy->cpuinfo.max_freq = policy->max;
+	policy->shared_type = cpu->shared_type;
 
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
 		cpumask_copy(policy->cpus, cpu->shared_cpu_map);
-	else {
+	else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
 		/* Support only SW_ANY for now. */
 		pr_debug("Unsupported CPU co-ord type\n");
 		return -EFAULT;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7c48e73..8412ce5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -976,10 +976,14 @@
 
 	new_policy.governor = gov;
 
-	/* Use the default policy if its valid. */
-	if (cpufreq_driver->setpolicy)
-		cpufreq_parse_governor(gov->name, &new_policy.policy, NULL);
-
+	/* Use the default policy if there is no last_policy. */
+	if (cpufreq_driver->setpolicy) {
+		if (policy->last_policy)
+			new_policy.policy = policy->last_policy;
+		else
+			cpufreq_parse_governor(gov->name, &new_policy.policy,
+					       NULL);
+	}
 	/* set default policy */
 	return cpufreq_set_policy(policy, &new_policy);
 }
@@ -1330,6 +1334,8 @@
 		if (has_target())
 			strncpy(policy->last_governor, policy->governor->name,
 				CPUFREQ_NAME_LEN);
+		else
+			policy->last_policy = policy->policy;
 	} else if (cpu == policy->cpu) {
 		/* Nominate new CPU */
 		policy->cpu = cpumask_any(policy->cpus);
@@ -1401,13 +1407,10 @@
 	}
 
 	cpumask_clear_cpu(cpu, policy->real_cpus);
-
-	if (cpumask_empty(policy->real_cpus)) {
-		cpufreq_policy_free(policy, true);
-		return;
-	}
-
 	remove_cpu_dev_symlink(policy, cpu);
+
+	if (cpumask_empty(policy->real_cpus))
+		cpufreq_policy_free(policy, true);
 }
 
 static void handle_update(struct work_struct *work)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2e31d09..4d07cbd 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -34,14 +34,10 @@
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
-#endif
-
-#define BYT_RATIOS		0x66a
-#define BYT_VIDS		0x66b
-#define BYT_TURBO_RATIOS	0x66c
-#define BYT_TURBO_VIDS		0x66d
+#define ATOM_RATIOS		0x66a
+#define ATOM_VIDS		0x66b
+#define ATOM_TURBO_RATIOS	0x66c
+#define ATOM_TURBO_VIDS		0x66d
 
 #define FRAC_BITS 8
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
@@ -117,9 +113,6 @@
 	u64	prev_mperf;
 	u64	prev_tsc;
 	struct sample sample;
-#if IS_ENABLED(CONFIG_ACPI)
-	struct acpi_processor_performance acpi_perf_data;
-#endif
 };
 
 static struct cpudata **all_cpu_data;
@@ -150,7 +143,6 @@
 static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
-static int no_acpi_perf;
 
 struct perf_limits {
 	int no_turbo;
@@ -163,8 +155,6 @@
 	int max_sysfs_pct;
 	int min_policy_pct;
 	int min_sysfs_pct;
-	int max_perf_ctl;
-	int min_perf_ctl;
 };
 
 static struct perf_limits performance_limits = {
@@ -191,8 +181,6 @@
 	.max_sysfs_pct = 100,
 	.min_policy_pct = 0,
 	.min_sysfs_pct = 0,
-	.max_perf_ctl = 0,
-	.min_perf_ctl = 0,
 };
 
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE
@@ -201,153 +189,6 @@
 static struct perf_limits *limits = &powersave_limits;
 #endif
 
-#if IS_ENABLED(CONFIG_ACPI)
-/*
- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
- * target ratio 0x17. The _PSS control value stores in a format which can be
- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
- * This function converts the _PSS control value to intel pstate driver format
- * for comparison and assignment.
- */
-static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
-{
-	return cpu->acpi_perf_data.states[index].control >> 8;
-}
-
-static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
-{
-	struct cpudata *cpu;
-	int ret;
-	bool turbo_absent = false;
-	int max_pstate_index;
-	int min_pss_ctl, max_pss_ctl, turbo_pss_ctl;
-	int i;
-
-	cpu = all_cpu_data[policy->cpu];
-
-	pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n",
-		 cpu->pstate.min_pstate, cpu->pstate.max_pstate,
-		 cpu->pstate.turbo_pstate);
-
-	if (!cpu->acpi_perf_data.shared_cpu_map &&
-	    zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map,
-				    GFP_KERNEL, cpu_to_node(policy->cpu))) {
-		return -ENOMEM;
-	}
-
-	ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
-						  policy->cpu);
-	if (ret)
-		return ret;
-
-	/*
-	 * Check if the control value in _PSS is for PERF_CTL MSR, which should
-	 * guarantee that the states returned by it map to the states in our
-	 * list directly.
-	 */
-	if (cpu->acpi_perf_data.control_register.space_id !=
-						ACPI_ADR_SPACE_FIXED_HARDWARE)
-		return -EIO;
-
-	pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu);
-	for (i = 0; i < cpu->acpi_perf_data.state_count; i++)
-		pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
-			 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
-			 (u32) cpu->acpi_perf_data.states[i].core_frequency,
-			 (u32) cpu->acpi_perf_data.states[i].power,
-			 (u32) cpu->acpi_perf_data.states[i].control);
-
-	/*
-	 * If there is only one entry _PSS, simply ignore _PSS and continue as
-	 * usual without taking _PSS into account
-	 */
-	if (cpu->acpi_perf_data.state_count < 2)
-		return 0;
-
-	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
-	min_pss_ctl = convert_to_native_pstate_format(cpu,
-					cpu->acpi_perf_data.state_count - 1);
-	/* Check if there is a turbo freq in _PSS */
-	if (turbo_pss_ctl <= cpu->pstate.max_pstate &&
-	    turbo_pss_ctl > cpu->pstate.min_pstate) {
-		pr_debug("intel_pstate: no turbo range exists in _PSS\n");
-		limits->no_turbo = limits->turbo_disabled = 1;
-		cpu->pstate.turbo_pstate = cpu->pstate.max_pstate;
-		turbo_absent = true;
-	}
-
-	/* Check if the max non turbo p state < Intel P state max */
-	max_pstate_index = turbo_absent ? 0 : 1;
-	max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index);
-	if (max_pss_ctl < cpu->pstate.max_pstate &&
-	    max_pss_ctl > cpu->pstate.min_pstate)
-		cpu->pstate.max_pstate = max_pss_ctl;
-
-	/* check If min perf > Intel P State min */
-	if (min_pss_ctl > cpu->pstate.min_pstate &&
-	    min_pss_ctl < cpu->pstate.max_pstate) {
-		cpu->pstate.min_pstate = min_pss_ctl;
-		policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling;
-	}
-
-	if (turbo_absent)
-		policy->cpuinfo.max_freq = cpu->pstate.max_pstate *
-						cpu->pstate.scaling;
-	else {
-		policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate *
-						cpu->pstate.scaling;
-		/*
-		 * The _PSS table doesn't contain whole turbo frequency range.
-		 * This just contains +1 MHZ above the max non turbo frequency,
-		 * with control value corresponding to max turbo ratio. But
-		 * when cpufreq set policy is called, it will call with this
-		 * max frequency, which will cause a reduced performance as
-		 * this driver uses real max turbo frequency as the max
-		 * frequeny. So correct this frequency in _PSS table to
-		 * correct max turbo frequency based on the turbo ratio.
-		 * Also need to convert to MHz as _PSS freq is in MHz.
-		 */
-		cpu->acpi_perf_data.states[0].core_frequency =
-						turbo_pss_ctl * 100;
-	}
-
-	pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n",
-		 cpu->pstate.min_pstate, cpu->pstate.max_pstate,
-		 cpu->pstate.turbo_pstate);
-	pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n",
-		 policy->cpuinfo.max_freq, policy->cpuinfo.min_freq);
-
-	return 0;
-}
-
-static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
-{
-	struct cpudata *cpu;
-
-	if (!no_acpi_perf)
-		return 0;
-
-	cpu = all_cpu_data[policy->cpu];
-	acpi_processor_unregister_performance(policy->cpu);
-	return 0;
-}
-
-#else
-static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-
-static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
-{
-	return 0;
-}
-#endif
-
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 			     int deadband, int integral) {
 	pid->setpoint = setpoint;
@@ -687,31 +528,31 @@
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 }
 
-static int byt_get_min_pstate(void)
+static int atom_get_min_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(BYT_RATIOS, value);
+	rdmsrl(ATOM_RATIOS, value);
 	return (value >> 8) & 0x7F;
 }
 
-static int byt_get_max_pstate(void)
+static int atom_get_max_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(BYT_RATIOS, value);
+	rdmsrl(ATOM_RATIOS, value);
 	return (value >> 16) & 0x7F;
 }
 
-static int byt_get_turbo_pstate(void)
+static int atom_get_turbo_pstate(void)
 {
 	u64 value;
 
-	rdmsrl(BYT_TURBO_RATIOS, value);
+	rdmsrl(ATOM_TURBO_RATIOS, value);
 	return value & 0x7F;
 }
 
-static void byt_set_pstate(struct cpudata *cpudata, int pstate)
+static void atom_set_pstate(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 	int32_t vid_fp;
@@ -736,27 +577,42 @@
 	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
 }
 
-#define BYT_BCLK_FREQS 5
-static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
-
-static int byt_get_scaling(void)
+static int silvermont_get_scaling(void)
 {
 	u64 value;
 	int i;
+	/* Defined in Table 35-6 from SDM (Sept 2015) */
+	static int silvermont_freq_table[] = {
+		83300, 100000, 133300, 116700, 80000};
 
 	rdmsrl(MSR_FSB_FREQ, value);
-	i = value & 0x3;
+	i = value & 0x7;
+	WARN_ON(i > 4);
 
-	BUG_ON(i > BYT_BCLK_FREQS);
-
-	return byt_freq_table[i] * 100;
+	return silvermont_freq_table[i];
 }
 
-static void byt_get_vid(struct cpudata *cpudata)
+static int airmont_get_scaling(void)
+{
+	u64 value;
+	int i;
+	/* Defined in Table 35-10 from SDM (Sept 2015) */
+	static int airmont_freq_table[] = {
+		83300, 100000, 133300, 116700, 80000,
+		93300, 90000, 88900, 87500};
+
+	rdmsrl(MSR_FSB_FREQ, value);
+	i = value & 0xF;
+	WARN_ON(i > 8);
+
+	return airmont_freq_table[i];
+}
+
+static void atom_get_vid(struct cpudata *cpudata)
 {
 	u64 value;
 
-	rdmsrl(BYT_VIDS, value);
+	rdmsrl(ATOM_VIDS, value);
 	cpudata->vid.min = int_tofp((value >> 8) & 0x7f);
 	cpudata->vid.max = int_tofp((value >> 16) & 0x7f);
 	cpudata->vid.ratio = div_fp(
@@ -764,7 +620,7 @@
 		int_tofp(cpudata->pstate.max_pstate -
 			cpudata->pstate.min_pstate));
 
-	rdmsrl(BYT_TURBO_VIDS, value);
+	rdmsrl(ATOM_TURBO_VIDS, value);
 	cpudata->vid.turbo = value & 0x7f;
 }
 
@@ -885,7 +741,7 @@
 	},
 };
 
-static struct cpu_defaults byt_params = {
+static struct cpu_defaults silvermont_params = {
 	.pid_policy = {
 		.sample_rate_ms = 10,
 		.deadband = 0,
@@ -895,13 +751,33 @@
 		.i_gain_pct = 4,
 	},
 	.funcs = {
-		.get_max = byt_get_max_pstate,
-		.get_max_physical = byt_get_max_pstate,
-		.get_min = byt_get_min_pstate,
-		.get_turbo = byt_get_turbo_pstate,
-		.set = byt_set_pstate,
-		.get_scaling = byt_get_scaling,
-		.get_vid = byt_get_vid,
+		.get_max = atom_get_max_pstate,
+		.get_max_physical = atom_get_max_pstate,
+		.get_min = atom_get_min_pstate,
+		.get_turbo = atom_get_turbo_pstate,
+		.set = atom_set_pstate,
+		.get_scaling = silvermont_get_scaling,
+		.get_vid = atom_get_vid,
+	},
+};
+
+static struct cpu_defaults airmont_params = {
+	.pid_policy = {
+		.sample_rate_ms = 10,
+		.deadband = 0,
+		.setpoint = 60,
+		.p_gain_pct = 14,
+		.d_gain_pct = 0,
+		.i_gain_pct = 4,
+	},
+	.funcs = {
+		.get_max = atom_get_max_pstate,
+		.get_max_physical = atom_get_max_pstate,
+		.get_min = atom_get_min_pstate,
+		.get_turbo = atom_get_turbo_pstate,
+		.set = atom_set_pstate,
+		.get_scaling = airmont_get_scaling,
+		.get_vid = atom_get_vid,
 	},
 };
 
@@ -938,23 +814,12 @@
 	 * policy, or by cpu specific default values determined through
 	 * experimentation.
 	 */
-	if (limits->max_perf_ctl && limits->max_sysfs_pct >=
-						limits->max_policy_pct) {
-		*max = limits->max_perf_ctl;
-	} else {
-		max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf),
-					limits->max_perf));
-		*max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate,
-			       cpu->pstate.turbo_pstate);
-	}
+	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits->max_perf));
+	*max = clamp_t(int, max_perf_adj,
+			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
 
-	if (limits->min_perf_ctl) {
-		*min = limits->min_perf_ctl;
-	} else {
-		min_perf = fp_toint(mul_fp(int_tofp(max_perf),
-				    limits->min_perf));
-		*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
-	}
+	min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits->min_perf));
+	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
@@ -1153,7 +1018,7 @@
 static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	ICPU(0x2a, core_params),
 	ICPU(0x2d, core_params),
-	ICPU(0x37, byt_params),
+	ICPU(0x37, silvermont_params),
 	ICPU(0x3a, core_params),
 	ICPU(0x3c, core_params),
 	ICPU(0x3d, core_params),
@@ -1162,7 +1027,7 @@
 	ICPU(0x45, core_params),
 	ICPU(0x46, core_params),
 	ICPU(0x47, core_params),
-	ICPU(0x4c, byt_params),
+	ICPU(0x4c, airmont_params),
 	ICPU(0x4e, core_params),
 	ICPU(0x4f, core_params),
 	ICPU(0x5e, core_params),
@@ -1229,12 +1094,6 @@
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
-#if IS_ENABLED(CONFIG_ACPI)
-	struct cpudata *cpu;
-	int i;
-#endif
-	pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__,
-		 policy->cpuinfo.max_freq, policy->max);
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
@@ -1242,6 +1101,8 @@
 	    policy->max >= policy->cpuinfo.max_freq) {
 		pr_debug("intel_pstate: set performance\n");
 		limits = &performance_limits;
+		if (hwp_active)
+			intel_pstate_hwp_set();
 		return 0;
 	}
 
@@ -1249,7 +1110,8 @@
 	limits = &powersave_limits;
 	limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
 	limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
-	limits->max_policy_pct = (policy->max * 100) / policy->cpuinfo.max_freq;
+	limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
+					      policy->cpuinfo.max_freq);
 	limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
 
 	/* Normalize user input to [min_policy_pct, max_policy_pct] */
@@ -1261,6 +1123,7 @@
 				   limits->max_sysfs_pct);
 	limits->max_perf_pct = max(limits->min_policy_pct,
 				   limits->max_perf_pct);
+	limits->max_perf = round_up(limits->max_perf, 8);
 
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
@@ -1270,23 +1133,6 @@
 	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
 				  int_tofp(100));
 
-#if IS_ENABLED(CONFIG_ACPI)
-	cpu = all_cpu_data[policy->cpu];
-	for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
-		int control;
-
-		control = convert_to_native_pstate_format(cpu, i);
-		if (control * cpu->pstate.scaling == policy->max)
-			limits->max_perf_ctl = control;
-		if (control * cpu->pstate.scaling == policy->min)
-			limits->min_perf_ctl = control;
-	}
-
-	pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n",
-		 policy->cpuinfo.max_freq, policy->max, limits->min_perf_ctl,
-		 limits->max_perf_ctl);
-#endif
-
 	if (hwp_active)
 		intel_pstate_hwp_set();
 
@@ -1341,30 +1187,18 @@
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->cpuinfo.max_freq =
 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
-	if (!no_acpi_perf)
-		intel_pstate_init_perf_limits(policy);
-	/*
-	 * If there is no acpi perf data or error, we ignore and use Intel P
-	 * state calculated limits, So this is not fatal error.
-	 */
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 
 	return 0;
 }
 
-static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
-{
-	return intel_pstate_exit_perf_limits(policy);
-}
-
 static struct cpufreq_driver intel_pstate_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.verify		= intel_pstate_verify_policy,
 	.setpolicy	= intel_pstate_set_policy,
 	.get		= intel_pstate_get,
 	.init		= intel_pstate_cpu_init,
-	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.name		= "intel_pstate",
 };
@@ -1406,6 +1240,7 @@
 }
 
 #if IS_ENABLED(CONFIG_ACPI)
+#include <acpi/processor.h>
 
 static bool intel_pstate_no_acpi_pss(void)
 {
@@ -1601,9 +1436,6 @@
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
-	if (!strcmp(str, "no_acpi"))
-		no_acpi_perf = 1;
-
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 73ef499..7038f36 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -409,7 +409,7 @@
 		processed += to_process;
 	} while (processed < nbytes);
 
-	rc = memcmp(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
+	rc = crypto_memneq(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
 		    authsize) ? -EBADMSG : 0;
 out:
 	spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index eee624f..abd465f4 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -21,6 +21,7 @@
 
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -418,7 +419,7 @@
 			itag, req->src, req->assoclen + nbytes,
 			crypto_aead_authsize(crypto_aead_reqtfm(req)),
 			SCATTERWALK_FROM_SG);
-		rc = memcmp(itag, otag,
+		rc = crypto_memneq(itag, otag,
 			    crypto_aead_authsize(crypto_aead_reqtfm(req))) ?
 		     -EBADMSG : 0;
 	}
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 03856ad..473d36d 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -198,7 +198,7 @@
 			goto out_err;
 		}
 
-		params_head = section_head->params;
+		params_head = section.params;
 
 		while (params_head) {
 			if (copy_from_user(&key_val, (void __user *)params_head,
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 46f531e..b6f9f42 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -977,7 +977,7 @@
 		} else
 			oicv = (char *)&edesc->link_tbl[0];
 
-		err = memcmp(oicv, icv, authsize) ? -EBADMSG : 0;
+		err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
 	}
 
 	kfree(edesc);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 4e55239..53d22eb 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -729,8 +729,8 @@
 		return NULL;
 
 	dev_info(chan2dev(chan),
-		 "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
-		__func__, xt->src_start, xt->dst_start, xt->numf,
+		 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start, xt->numf,
 		xt->frame_size, flags);
 
 	/*
@@ -824,8 +824,8 @@
 	u32			ctrla;
 	u32			ctrlb;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n",
-			dest, src, len, flags);
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+			&dest, &src, len, flags);
 
 	if (unlikely(!len)) {
 		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
@@ -938,8 +938,8 @@
 	void __iomem		*vaddr;
 	dma_addr_t		paddr;
 
-	dev_vdbg(chan2dev(chan), "%s: d0x%x v0x%x l0x%zx f0x%lx\n", __func__,
-		dest, value, len, flags);
+	dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
+		&dest, value, len, flags);
 
 	if (unlikely(!len)) {
 		dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
@@ -1022,8 +1022,8 @@
 		dma_addr_t dest = sg_dma_address(sg);
 		size_t len = sg_dma_len(sg);
 
-		dev_vdbg(chan2dev(chan), "%s: d0x%08x, l0x%zx\n",
-			 __func__, dest, len);
+		dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n",
+			 __func__, &dest, len);
 
 		if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
 			dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
@@ -1439,9 +1439,9 @@
 	unsigned int		periods = buf_len / period_len;
 	unsigned int		i;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
+	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n",
 			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
-			buf_addr,
+			&buf_addr,
 			periods, buf_len, period_len);
 
 	if (unlikely(!atslave || !buf_len || !period_len)) {
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index d1cfc8c..7f58f06 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -385,9 +385,9 @@
 static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
 {
 	dev_crit(chan2dev(&atchan->chan_common),
-		 "  desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
-		 lli->saddr, lli->daddr,
-		 lli->ctrla, lli->ctrlb, lli->dscr);
+		 "  desc: s%pad d%pad ctrl0x%x:0x%x l0x%pad\n",
+		 &lli->saddr, &lli->daddr,
+		 lli->ctrla, lli->ctrlb, &lli->dscr);
 }
 
 
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index b5e132d..7f039de 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -920,8 +920,8 @@
 	desc->lld.mbr_cfg = chan_cc;
 
 	dev_dbg(chan2dev(chan),
-		"%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
-		__func__, desc->lld.mbr_sa, desc->lld.mbr_da,
+		"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
 		desc->lld.mbr_ubc, desc->lld.mbr_cfg);
 
 	/* Chain lld. */
@@ -953,8 +953,8 @@
 	if ((xt->numf > 1) && (xt->frame_size > 1))
 		return NULL;
 
-	dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
-		__func__, xt->src_start, xt->dst_start,	xt->numf,
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start,	xt->numf,
 		xt->frame_size, flags);
 
 	src_addr = xt->src_start;
@@ -1179,8 +1179,8 @@
 	desc->lld.mbr_cfg = chan_cc;
 
 	dev_dbg(chan2dev(chan),
-		"%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
-		__func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+		"%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc,
 		desc->lld.mbr_cfg);
 
 	return desc;
@@ -1193,8 +1193,8 @@
 	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
 	struct at_xdmac_desc	*desc;
 
-	dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
-		__func__, dest, len, value, flags);
+	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+		__func__, &dest, len, value, flags);
 
 	if (unlikely(!len))
 		return NULL;
@@ -1229,8 +1229,8 @@
 
 	/* Prepare descriptors. */
 	for_each_sg(sgl, sg, sg_len, i) {
-		dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
-			__func__, sg_dma_address(sg), sg_dma_len(sg),
+		dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+			__func__, &sg_dma_address(sg), sg_dma_len(sg),
 			value, flags);
 		desc = at_xdmac_memset_create_desc(chan, atchan,
 						   sg_dma_address(sg),
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 6b03e4e..0675e26 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -107,7 +107,7 @@
 
 /* CCCFG register */
 #define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
-#define GET_NUM_QDMACH(x)	(x & 0x70 >> 4) /* bits 4-6 */
+#define GET_NUM_QDMACH(x)	((x & 0x70) >> 4) /* bits 4-6 */
 #define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
 #define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
 #define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
@@ -1565,7 +1565,7 @@
 	struct platform_device *tc_pdev;
 	int ret;
 
-	if (!tc)
+	if (!IS_ENABLED(CONFIG_OF) || !tc)
 		return;
 
 	tc_pdev = of_find_device_by_node(tc->node);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 7058d58..0f6fd42 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1462,7 +1462,7 @@
 
 #define EVENT_REMAP_CELLS 3
 
-static int __init sdma_event_remap(struct sdma_engine *sdma)
+static int sdma_event_remap(struct sdma_engine *sdma)
 {
 	struct device_node *np = sdma->dev->of_node;
 	struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index ebd8a5f..f1bcc2a 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -679,8 +679,11 @@
 	struct usb_dmac *dmac = dev_get_drvdata(dev);
 	int i;
 
-	for (i = 0; i < dmac->n_channels; ++i)
+	for (i = 0; i < dmac->n_channels; ++i) {
+		if (!dmac->channels[i].iomem)
+			break;
 		usb_dmac_chan_halt(&dmac->channels[i]);
+	}
 
 	return 0;
 }
@@ -799,11 +802,10 @@
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
-		return ret;
+		goto error_pm;
 	}
 
 	ret = usb_dmac_init(dmac);
-	pm_runtime_put(&pdev->dev);
 
 	if (ret) {
 		dev_err(&pdev->dev, "failed to reset device\n");
@@ -851,10 +853,13 @@
 	if (ret < 0)
 		goto error;
 
+	pm_runtime_put(&pdev->dev);
 	return 0;
 
 error:
 	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_put(&pdev->dev);
+error_pm:
 	pm_runtime_disable(&pdev->dev);
 	return ret;
 }
diff --git a/drivers/gpio/gpio-74xx-mmio.c b/drivers/gpio/gpio-74xx-mmio.c
index 6ed7c0f..6b18682 100644
--- a/drivers/gpio/gpio-74xx-mmio.c
+++ b/drivers/gpio/gpio-74xx-mmio.c
@@ -113,13 +113,16 @@
 
 static int mmio_74xx_gpio_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id =
-		of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+	const struct of_device_id *of_id;
 	struct mmio_74xx_gpio_priv *priv;
 	struct resource *res;
 	void __iomem *dat;
 	int err;
 
+	of_id = of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+	if (!of_id)
+		return -ENODEV;
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 56d2d02..f7fbb46 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1122,8 +1122,6 @@
 	/* MPUIO is a bit different, reading IRQ status clears it */
 	if (bank->is_mpuio) {
 		irqc->irq_ack = dummy_irq_chip.irq_ack;
-		irqc->irq_mask = irq_gc_mask_set_bit;
-		irqc->irq_unmask = irq_gc_mask_clr_bit;
 		if (!bank->regs->wkup_en)
 			irqc->irq_set_wake = NULL;
 	}
diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index 171a638..52b447c 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -167,6 +167,8 @@
 	const struct palmas_device_data *dev_data;
 
 	match = of_match_device(of_palmas_gpio_match, &pdev->dev);
+	if (!match)
+		return -ENODEV;
 	dev_data = match->data;
 	if (!dev_data)
 		dev_data = &palmas_dev_data;
diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 045a952..7b25fdf 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -187,11 +187,15 @@
 static int syscon_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *of_id = of_match_device(syscon_gpio_ids, dev);
+	const struct of_device_id *of_id;
 	struct syscon_gpio_priv *priv;
 	struct device_node *np = dev->of_node;
 	int ret;
 
+	of_id = of_match_device(syscon_gpio_ids, dev);
+	if (!of_id)
+		return -ENODEV;
+
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 027e5f4..896bf29 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -375,6 +375,60 @@
 }
 #endif
 
+#ifdef	CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int dbg_gpio_show(struct seq_file *s, void *unused)
+{
+	int i;
+	int j;
+
+	for (i = 0; i < tegra_gpio_bank_count; i++) {
+		for (j = 0; j < 4; j++) {
+			int gpio = tegra_gpio_compose(i, j, 0);
+			seq_printf(s,
+				"%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
+				i, j,
+				tegra_gpio_readl(GPIO_CNF(gpio)),
+				tegra_gpio_readl(GPIO_OE(gpio)),
+				tegra_gpio_readl(GPIO_OUT(gpio)),
+				tegra_gpio_readl(GPIO_IN(gpio)),
+				tegra_gpio_readl(GPIO_INT_STA(gpio)),
+				tegra_gpio_readl(GPIO_INT_ENB(gpio)),
+				tegra_gpio_readl(GPIO_INT_LVL(gpio)));
+		}
+	}
+	return 0;
+}
+
+static int dbg_gpio_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dbg_gpio_show, &inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= dbg_gpio_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void tegra_gpio_debuginit(void)
+{
+	(void) debugfs_create_file("tegra_gpio", S_IRUGO,
+					NULL, NULL, &debug_fops);
+}
+
+#else
+
+static inline void tegra_gpio_debuginit(void)
+{
+}
+
+#endif
+
 static struct irq_chip tegra_gpio_irq_chip = {
 	.name		= "GPIO",
 	.irq_ack	= tegra_gpio_irq_ack,
@@ -519,6 +573,8 @@
 			spin_lock_init(&bank->lvl_lock[j]);
 	}
 
+	tegra_gpio_debuginit();
+
 	return 0;
 }
 
@@ -536,52 +592,3 @@
 	return platform_driver_register(&tegra_gpio_driver);
 }
 postcore_initcall(tegra_gpio_init);
-
-#ifdef	CONFIG_DEBUG_FS
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-static int dbg_gpio_show(struct seq_file *s, void *unused)
-{
-	int i;
-	int j;
-
-	for (i = 0; i < tegra_gpio_bank_count; i++) {
-		for (j = 0; j < 4; j++) {
-			int gpio = tegra_gpio_compose(i, j, 0);
-			seq_printf(s,
-				"%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
-				i, j,
-				tegra_gpio_readl(GPIO_CNF(gpio)),
-				tegra_gpio_readl(GPIO_OE(gpio)),
-				tegra_gpio_readl(GPIO_OUT(gpio)),
-				tegra_gpio_readl(GPIO_IN(gpio)),
-				tegra_gpio_readl(GPIO_INT_STA(gpio)),
-				tegra_gpio_readl(GPIO_INT_ENB(gpio)),
-				tegra_gpio_readl(GPIO_INT_LVL(gpio)));
-		}
-	}
-	return 0;
-}
-
-static int dbg_gpio_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dbg_gpio_show, &inode->i_private);
-}
-
-static const struct file_operations debug_fops = {
-	.open		= dbg_gpio_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init tegra_gpio_debuginit(void)
-{
-	(void) debugfs_create_file("tegra_gpio", S_IRUGO,
-					NULL, NULL, &debug_fops);
-	return 0;
-}
-late_initcall(tegra_gpio_debuginit);
-#endif
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a18f00f..2a91f32 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -233,7 +233,7 @@
 		for (i = 0; i != chip->ngpio; ++i) {
 			struct gpio_desc *gpio = &chip->desc[i];
 
-			if (!gpio->name)
+			if (!gpio->name || !name)
 				continue;
 
 			if (!strcmp(gpio->name, name)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 615ce6d..5a5f04d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -389,7 +389,6 @@
  * Fences.
  */
 struct amdgpu_fence_driver {
-	struct amdgpu_ring		*ring;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
@@ -398,7 +397,7 @@
 	bool				initialized;
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
-	struct delayed_work             lockup_work;
+	struct timer_list		fallback_timer;
 	wait_queue_head_t		fence_queue;
 };
 
@@ -497,6 +496,7 @@
 
 /* bo virtual addresses in a specific vm */
 struct amdgpu_bo_va {
+	struct mutex		        mutex;
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
 	struct fence		        *last_pt_update;
@@ -539,6 +539,7 @@
 	/* Constant after initialization */
 	struct amdgpu_device		*adev;
 	struct drm_gem_object		gem_base;
+	struct amdgpu_bo		*parent;
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	pid_t				pid;
@@ -917,8 +918,8 @@
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
 struct amdgpu_vm_pt {
-	struct amdgpu_bo		*bo;
-	uint64_t			addr;
+	struct amdgpu_bo	*bo;
+	uint64_t		addr;
 };
 
 struct amdgpu_vm_id {
@@ -926,13 +927,9 @@
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	struct fence	        *flushed_updates;
-	/* last use of vmid */
-	struct fence		*last_id_use;
 };
 
 struct amdgpu_vm {
-	struct mutex		mutex;
-
 	struct rb_root		va;
 
 	/* protecting invalidated */
@@ -957,24 +954,72 @@
 
 	/* for id and flush management per ring */
 	struct amdgpu_vm_id	ids[AMDGPU_MAX_RINGS];
+	/* for interval tree */
+	spinlock_t		it_lock;
+	/* protecting freed */
+	spinlock_t		freed_lock;
 };
 
 struct amdgpu_vm_manager {
-	struct fence			*active[AMDGPU_NUM_VM];
-	uint32_t			max_pfn;
+	struct {
+		struct fence	*active;
+		atomic_long_t	owner;
+	} ids[AMDGPU_NUM_VM];
+
+	uint32_t				max_pfn;
 	/* number of VMIDs */
-	unsigned			nvm;
+	unsigned				nvm;
 	/* vram base address for page table entry  */
-	u64				vram_base_offset;
+	u64					vram_base_offset;
 	/* is vm enabled? */
-	bool				enabled;
-	/* for hw to save the PD addr on suspend/resume */
-	uint32_t			saved_table_addr[AMDGPU_NUM_VM];
+	bool					enabled;
 	/* vm pte handling */
 	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
 	struct amdgpu_ring                      *vm_pte_funcs_ring;
 };
 
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
+					       struct amdgpu_vm *vm,
+					       struct list_head *head);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		      struct amdgpu_sync *sync);
+void amdgpu_vm_flush(struct amdgpu_ring *ring,
+		     struct amdgpu_vm *vm,
+		     struct fence *updates);
+void amdgpu_vm_fence(struct amdgpu_device *adev,
+		     struct amdgpu_vm *vm,
+		     struct fence *fence);
+uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
+int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
+				    struct amdgpu_vm *vm);
+int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
+			  struct amdgpu_vm *vm);
+int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			     struct amdgpu_sync *sync);
+int amdgpu_vm_bo_update(struct amdgpu_device *adev,
+			struct amdgpu_bo_va *bo_va,
+			struct ttm_mem_reg *mem);
+void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
+			     struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
+				       struct amdgpu_bo *bo);
+struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
+				      struct amdgpu_vm *vm,
+				      struct amdgpu_bo *bo);
+int amdgpu_vm_bo_map(struct amdgpu_device *adev,
+		     struct amdgpu_bo_va *bo_va,
+		     uint64_t addr, uint64_t offset,
+		     uint64_t size, uint32_t flags);
+int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
+		       struct amdgpu_bo_va *bo_va,
+		       uint64_t addr);
+void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
+		      struct amdgpu_bo_va *bo_va);
+int amdgpu_vm_free_job(struct amdgpu_job *job);
+
 /*
  * context related structures
  */
@@ -1211,6 +1256,7 @@
 	/* relocations */
 	struct amdgpu_bo_list_entry	*vm_bos;
 	struct list_head	validated;
+	struct fence		*fence;
 
 	struct amdgpu_ib	*ibs;
 	uint32_t		num_ibs;
@@ -1226,7 +1272,7 @@
 	struct amdgpu_device	*adev;
 	struct amdgpu_ib	*ibs;
 	uint32_t		num_ibs;
-	struct mutex            job_lock;
+	void			*owner;
 	struct amdgpu_user_fence uf;
 	int (*free_job)(struct amdgpu_job *job);
 };
@@ -2257,11 +2303,6 @@
 bool amdgpu_card_posted(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
-						 struct drm_file *filp,
-						 struct amdgpu_ctx *ctx,
-						 struct amdgpu_ib *ibs,
-						 uint32_t num_ibs);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2319,49 +2360,6 @@
 			     unsigned long arg);
 
 /*
- * vm
- */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
-					  struct amdgpu_vm *vm,
-					  struct list_head *head);
-int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync);
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     struct amdgpu_vm *vm,
-		     struct fence *updates);
-void amdgpu_vm_fence(struct amdgpu_device *adev,
-		     struct amdgpu_vm *vm,
-		     struct amdgpu_fence *fence);
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
-int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm);
-int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm);
-int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm, struct amdgpu_sync *sync);
-int amdgpu_vm_bo_update(struct amdgpu_device *adev,
-			struct amdgpu_bo_va *bo_va,
-			struct ttm_mem_reg *mem);
-void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
-				       struct amdgpu_bo *bo);
-struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
-				      struct amdgpu_vm *vm,
-				      struct amdgpu_bo *bo);
-int amdgpu_vm_bo_map(struct amdgpu_device *adev,
-		     struct amdgpu_bo_va *bo_va,
-		     uint64_t addr, uint64_t offset,
-		     uint64_t size, uint32_t flags);
-int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
-		       struct amdgpu_bo_va *bo_va,
-		       uint64_t addr);
-void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
-		      struct amdgpu_bo_va *bo_va);
-int amdgpu_vm_free_job(struct amdgpu_job *job);
-/*
  * functions used by amdgpu_encoder.c
  */
 struct amdgpu_afmt_acr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dfc4d02..4f352ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -127,30 +127,6 @@
 	return 0;
 }
 
-struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
-                                               struct drm_file *filp,
-                                               struct amdgpu_ctx *ctx,
-                                               struct amdgpu_ib *ibs,
-                                               uint32_t num_ibs)
-{
-	struct amdgpu_cs_parser *parser;
-	int i;
-
-	parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
-	if (!parser)
-		return NULL;
-
-	parser->adev = adev;
-	parser->filp = filp;
-	parser->ctx = ctx;
-	parser->ibs = ibs;
-	parser->num_ibs = num_ibs;
-	for (i = 0; i < num_ibs; i++)
-		ibs[i].ctx = ctx;
-
-	return parser;
-}
-
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
 	union drm_amdgpu_cs *cs = data;
@@ -246,6 +222,8 @@
 				}
 
 				p->uf.bo = gem_to_amdgpu_bo(gobj);
+				amdgpu_bo_ref(p->uf.bo);
+				drm_gem_object_unreference_unlocked(gobj);
 				p->uf.offset = fence_data->offset;
 			} else {
 				ret = -EINVAL;
@@ -463,8 +441,18 @@
 	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
 }
 
-static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff)
+/**
+ * cs_parser_fini() - clean parser states
+ * @parser:	parser structure holding parsing context.
+ * @error:	error number
+ *
+ * If error is set than unvalidate buffer, otherwise just free memory
+ * used by parsing context.
+ **/
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
 {
+	unsigned i;
+
 	if (!error) {
 		/* Sort the buffer list from the smallest to largest buffer,
 		 * which affects the order of buffers in the LRU list.
@@ -479,17 +467,14 @@
 		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
 
 		ttm_eu_fence_buffer_objects(&parser->ticket,
-				&parser->validated,
-				&parser->ibs[parser->num_ibs-1].fence->base);
+					    &parser->validated,
+					    parser->fence);
 	} else if (backoff) {
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 	}
-}
+	fence_put(parser->fence);
 
-static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
-{
-	unsigned i;
 	if (parser->ctx)
 		amdgpu_ctx_put(parser->ctx);
 	if (parser->bo_list)
@@ -499,31 +484,12 @@
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
-	if (!amdgpu_enable_scheduler)
-	{
-		if (parser->ibs)
-			for (i = 0; i < parser->num_ibs; i++)
-				amdgpu_ib_free(parser->adev, &parser->ibs[i]);
-		kfree(parser->ibs);
-		if (parser->uf.bo)
-			drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
-	}
-
-	kfree(parser);
-}
-
-/**
- * cs_parser_fini() - clean parser states
- * @parser:	parser structure holding parsing context.
- * @error:	error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
-{
-       amdgpu_cs_parser_fini_early(parser, error, backoff);
-       amdgpu_cs_parser_fini_late(parser);
+	if (parser->ibs)
+		for (i = 0; i < parser->num_ibs; i++)
+			amdgpu_ib_free(parser->adev, &parser->ibs[i]);
+	kfree(parser->ibs);
+	if (parser->uf.bo)
+		amdgpu_bo_unref(&parser->uf.bo);
 }
 
 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -610,15 +576,9 @@
 	}
 
 	r = amdgpu_bo_vm_update_pte(parser, vm);
-	if (r) {
-		goto out;
-	}
-	amdgpu_cs_sync_rings(parser);
-	if (!amdgpu_enable_scheduler)
-		r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
-				       parser->filp);
+	if (!r)
+		amdgpu_cs_sync_rings(parser);
 
-out:
 	return r;
 }
 
@@ -818,7 +778,7 @@
 			amdgpu_ib_free(job->adev, &job->ibs[i]);
 	kfree(job->ibs);
 	if (job->uf.bo)
-		drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
+		amdgpu_bo_unref(&job->uf.bo);
 	return 0;
 }
 
@@ -826,38 +786,35 @@
 {
 	struct amdgpu_device *adev = dev->dev_private;
 	union drm_amdgpu_cs *cs = data;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_cs_parser *parser;
+	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
 	int i, r;
 
 	if (!adev->accel_working)
 		return -EBUSY;
 
-	parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0);
-	if (!parser)
-		return -ENOMEM;
-	r = amdgpu_cs_parser_init(parser, data);
+	parser.adev = adev;
+	parser.filp = filp;
+
+	r = amdgpu_cs_parser_init(&parser, data);
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
-		amdgpu_cs_parser_fini(parser, r, false);
+		amdgpu_cs_parser_fini(&parser, r, false);
 		r = amdgpu_cs_handle_lockup(adev, r);
 		return r;
 	}
-	mutex_lock(&vm->mutex);
-	r = amdgpu_cs_parser_relocs(parser);
+	r = amdgpu_cs_parser_relocs(&parser);
 	if (r == -ENOMEM)
 		DRM_ERROR("Not enough memory for command submission!\n");
 	else if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Failed to process the buffer list %d!\n", r);
 	else if (!r) {
 		reserved_buffers = true;
-		r = amdgpu_cs_ib_fill(adev, parser);
+		r = amdgpu_cs_ib_fill(adev, &parser);
 	}
 
 	if (!r) {
-		r = amdgpu_cs_dependencies(adev, parser);
+		r = amdgpu_cs_dependencies(adev, &parser);
 		if (r)
 			DRM_ERROR("Failed in the dependencies handling %d!\n", r);
 	}
@@ -865,63 +822,71 @@
 	if (r)
 		goto out;
 
-	for (i = 0; i < parser->num_ibs; i++)
-		trace_amdgpu_cs(parser, i);
+	for (i = 0; i < parser.num_ibs; i++)
+		trace_amdgpu_cs(&parser, i);
 
-	r = amdgpu_cs_ib_vm_chunk(adev, parser);
+	r = amdgpu_cs_ib_vm_chunk(adev, &parser);
 	if (r)
 		goto out;
 
-	if (amdgpu_enable_scheduler && parser->num_ibs) {
+	if (amdgpu_enable_scheduler && parser.num_ibs) {
+		struct amdgpu_ring * ring = parser.ibs->ring;
+		struct amd_sched_fence *fence;
 		struct amdgpu_job *job;
-		struct amdgpu_ring * ring =  parser->ibs->ring;
+
 		job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
 		if (!job) {
 			r = -ENOMEM;
 			goto out;
 		}
+
 		job->base.sched = &ring->sched;
-		job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
-		job->adev = parser->adev;
-		job->ibs = parser->ibs;
-		job->num_ibs = parser->num_ibs;
-		job->base.owner = parser->filp;
-		mutex_init(&job->job_lock);
+		job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
+		job->adev = parser.adev;
+		job->owner = parser.filp;
+		job->free_job = amdgpu_cs_free_job;
+
+		job->ibs = parser.ibs;
+		job->num_ibs = parser.num_ibs;
+		parser.ibs = NULL;
+		parser.num_ibs = 0;
+
 		if (job->ibs[job->num_ibs - 1].user) {
-			memcpy(&job->uf,  &parser->uf,
-			       sizeof(struct amdgpu_user_fence));
+			job->uf = parser.uf;
 			job->ibs[job->num_ibs - 1].user = &job->uf;
+			parser.uf.bo = NULL;
 		}
 
-		job->free_job = amdgpu_cs_free_job;
-		mutex_lock(&job->job_lock);
-		r = amd_sched_entity_push_job(&job->base);
-		if (r) {
-			mutex_unlock(&job->job_lock);
+		fence = amd_sched_fence_create(job->base.s_entity,
+					       parser.filp);
+		if (!fence) {
+			r = -ENOMEM;
 			amdgpu_cs_free_job(job);
 			kfree(job);
 			goto out;
 		}
-		cs->out.handle =
-			amdgpu_ctx_add_fence(parser->ctx, ring,
-					     &job->base.s_fence->base);
-		parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
+		job->base.s_fence = fence;
+		parser.fence = fence_get(&fence->base);
 
-		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
-		ttm_eu_fence_buffer_objects(&parser->ticket,
-				&parser->validated,
-				&job->base.s_fence->base);
+		cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
+						      &fence->base);
+		job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
 
-		mutex_unlock(&job->job_lock);
-		amdgpu_cs_parser_fini_late(parser);
-		mutex_unlock(&vm->mutex);
-		return 0;
+		trace_amdgpu_cs_ioctl(job);
+		amd_sched_entity_push_job(&job->base);
+
+	} else {
+		struct amdgpu_fence *fence;
+
+		r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
+				       parser.filp);
+		fence = parser.ibs[parser.num_ibs - 1].fence;
+		parser.fence = fence_get(&fence->base);
+		cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
 	}
 
-	cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
 out:
-	amdgpu_cs_parser_fini(parser, r, reserved_buffers);
-	mutex_unlock(&vm->mutex);
+	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	r = amdgpu_cs_handle_lockup(adev, r);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index e173a5a..acd066d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -73,6 +73,8 @@
 	struct drm_crtc *crtc = &amdgpuCrtc->base;
 	unsigned long flags;
 	unsigned i;
+	int vpos, hpos, stat, min_udelay;
+	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
 	amdgpu_flip_wait_fence(adev, &work->excl);
 	for (i = 0; i < work->shared_count; ++i)
@@ -81,6 +83,41 @@
 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
+	/* If this happens to execute within the "virtually extended" vblank
+	 * interval before the start of the real vblank interval then it needs
+	 * to delay programming the mmio flip until the real vblank is entered.
+	 * This prevents completing a flip too early due to the way we fudge
+	 * our vblank counter and vblank timestamps in order to work around the
+	 * problem that the hw fires vblank interrupts before actual start of
+	 * vblank (when line buffer refilling is done for a frame). It
+	 * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for
+	 * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts.
+	 *
+	 * In practice this won't execute very often unless on very fast
+	 * machines because the time window for this to happen is very small.
+	 */
+	for (;;) {
+		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+		 * start in hpos, and to the "fudged earlier" vblank start in
+		 * vpos.
+		 */
+		stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id,
+						  GET_DISTANCE_TO_VBLANKSTART,
+						  &vpos, &hpos, NULL, NULL,
+						  &crtc->hwmode);
+
+		if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+		    !(vpos >= 0 && hpos <= 0))
+			break;
+
+		/* Sleep at least until estimated real start of hw vblank */
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		usleep_range(min_udelay, 2 * min_udelay);
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+	};
+
 	/* do the flip (mmio) */
 	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */
@@ -109,7 +146,7 @@
 	} else
 		DRM_ERROR("failed to reserve buffer after flip\n");
 
-	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	amdgpu_bo_unref(&work->old_rbo);
 	kfree(work->shared);
 	kfree(work);
 }
@@ -148,8 +185,8 @@
 	obj = old_amdgpu_fb->obj;
 
 	/* take a reference to the old object */
-	drm_gem_object_reference(obj);
 	work->old_rbo = gem_to_amdgpu_bo(obj);
+	amdgpu_bo_ref(work->old_rbo);
 
 	new_amdgpu_fb = to_amdgpu_framebuffer(fb);
 	obj = new_amdgpu_fb->obj;
@@ -222,7 +259,7 @@
 	amdgpu_bo_unreserve(new_rbo);
 
 cleanup:
-	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	amdgpu_bo_unref(&work->old_rbo);
 	fence_put(work->excl);
 	for (i = 0; i < work->shared_count; ++i)
 		fence_put(work->shared[i]);
@@ -481,7 +518,7 @@
 int
 amdgpu_framebuffer_init(struct drm_device *dev,
 			struct amdgpu_framebuffer *rfb,
-			struct drm_mode_fb_cmd2 *mode_cmd,
+			const struct drm_mode_fb_cmd2 *mode_cmd,
 			struct drm_gem_object *obj)
 {
 	int ret;
@@ -498,7 +535,7 @@
 static struct drm_framebuffer *
 amdgpu_user_framebuffer_create(struct drm_device *dev,
 			       struct drm_file *file_priv,
-			       struct drm_mode_fb_cmd2 *mode_cmd)
+			       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct amdgpu_framebuffer *amdgpu_fb;
@@ -712,6 +749,15 @@
  * \param dev Device to query.
  * \param pipe Crtc to query.
  * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ *              For driver internal use only also supports these flags:
+ *
+ *              USE_REAL_VBLANKSTART to use the real start of vblank instead
+ *              of a fudged earlier start of vblank.
+ *
+ *              GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ *              fudged earlier start of vblank in *vpos and the distance
+ *              to true start of vblank in *hpos.
+ *
  * \param *vpos Location where vertical scanout position should be stored.
  * \param *hpos Location where horizontal scanout position should go.
  * \param *stime Target location for timestamp taken immediately before
@@ -776,10 +822,40 @@
 		vbl_end = 0;
 	}
 
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+	    /* Caller wants distance from real vbl_start in *hpos */
+	    *hpos = *vpos - vbl_start;
+	}
+
+	/* Fudge vblank to start a few scanlines earlier to handle the
+	 * problem that vblank irqs fire a few scanlines before start
+	 * of vblank. Some driver internal callers need the true vblank
+	 * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+	 *
+	 * The cause of the "early" vblank irq is that the irq is triggered
+	 * by the line buffer logic when the line buffer read position enters
+	 * the vblank, whereas our crtc scanout position naturally lags the
+	 * line buffer read position.
+	 */
+	if (!(flags & USE_REAL_VBLANKSTART))
+		vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
 	/* Test scanout position against vblank region. */
 	if ((*vpos < vbl_start) && (*vpos >= vbl_end))
 		in_vbl = false;
 
+	/* In vblank? */
+	if (in_vbl)
+	    ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+		/* Caller wants distance from fudged earlier vbl_start */
+		*vpos -= vbl_start;
+		return ret;
+	}
+
 	/* Check if inside vblank area and apply corrective offsets:
 	 * vpos will then be >=0 in video scanout area, but negative
 	 * within vblank area, counting down the number of lines until
@@ -795,32 +871,6 @@
 	/* Correct for shifted end of vbl at vbl_end. */
 	*vpos = *vpos - vbl_end;
 
-	/* In vblank? */
-	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
-	/* Is vpos outside nominal vblank area, but less than
-	 * 1/100 of a frame height away from start of vblank?
-	 * If so, assume this isn't a massively delayed vblank
-	 * interrupt, but a vblank interrupt that fired a few
-	 * microseconds before true start of vblank. Compensate
-	 * by adding a full frame duration to the final timestamp.
-	 * Happens, e.g., on ATI R500, R600.
-	 *
-	 * We only do this if DRM_CALLED_FROM_VBLIRQ.
-	 */
-	if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
-		vbl_start = mode->crtc_vdisplay;
-		vtotal = mode->crtc_vtotal;
-
-		if (vbl_start - *vpos < vtotal / 100) {
-			*vpos -= vtotal;
-
-			/* Signal this correction as "applied". */
-			ret |= 0x8;
-		}
-	}
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 093a8c6..6fcbbcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -45,7 +45,6 @@
 struct amdgpu_fbdev {
 	struct drm_fb_helper helper;
 	struct amdgpu_framebuffer rfb;
-	struct list_head fbdev_list;
 	struct amdgpu_device *adev;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 257d722..3671f9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,6 +47,9 @@
  * that the the relevant GPU caches have been flushed.
  */
 
+static struct kmem_cache *amdgpu_fence_slab;
+static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+
 /**
  * amdgpu_fence_write - write a fence value
  *
@@ -85,24 +88,6 @@
 }
 
 /**
- * amdgpu_fence_schedule_check - schedule lockup check
- *
- * @ring: pointer to struct amdgpu_ring
- *
- * Queues a delayed work item to check for lockups.
- */
-static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
-{
-	/*
-	 * Do not reset the timer here with mod_delayed_work,
-	 * this can livelock in an interaction with TTM delayed destroy.
-	 */
-	queue_delayed_work(system_power_efficient_wq,
-		&ring->fence_drv.lockup_work,
-		AMDGPU_FENCE_JIFFIES_TIMEOUT);
-}
-
-/**
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
  * @ring: ring the fence is associated with
@@ -118,7 +103,7 @@
 	struct amdgpu_device *adev = ring->adev;
 
 	/* we are protected by the ring emission mutex */
-	*fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
+	*fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
 	if ((*fence) == NULL) {
 		return -ENOMEM;
 	}
@@ -132,11 +117,23 @@
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 			       (*fence)->seq,
 			       AMDGPU_FENCE_FLAG_INT);
-	trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
 	return 0;
 }
 
 /**
+ * amdgpu_fence_schedule_fallback - schedule fallback check
+ *
+ * @ring: pointer to struct amdgpu_ring
+ *
+ * Start a timer as fallback to our interrupts.
+ */
+static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+{
+	mod_timer(&ring->fence_drv.fallback_timer,
+		  jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
+}
+
+/**
  * amdgpu_fence_activity - check for fence activity
  *
  * @ring: pointer to struct amdgpu_ring
@@ -202,33 +199,12 @@
 	} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
 
 	if (seq < last_emitted)
-		amdgpu_fence_schedule_check(ring);
+		amdgpu_fence_schedule_fallback(ring);
 
 	return wake;
 }
 
 /**
- * amdgpu_fence_check_lockup - check for hardware lockup
- *
- * @work: delayed work item
- *
- * Checks for fence activity and if there is none probe
- * the hardware if a lockup occured.
- */
-static void amdgpu_fence_check_lockup(struct work_struct *work)
-{
-	struct amdgpu_fence_driver *fence_drv;
-	struct amdgpu_ring *ring;
-
-	fence_drv = container_of(work, struct amdgpu_fence_driver,
-				lockup_work.work);
-	ring = fence_drv->ring;
-
-	if (amdgpu_fence_activity(ring))
-		wake_up_all(&ring->fence_drv.fence_queue);
-}
-
-/**
  * amdgpu_fence_process - process a fence
  *
  * @adev: amdgpu_device pointer
@@ -244,6 +220,20 @@
 }
 
 /**
+ * amdgpu_fence_fallback - fallback for hardware interrupts
+ *
+ * @work: delayed work item
+ *
+ * Checks for fence activity.
+ */
+static void amdgpu_fence_fallback(unsigned long arg)
+{
+	struct amdgpu_ring *ring = (void *)arg;
+
+	amdgpu_fence_process(ring);
+}
+
+/**
  * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
  *
  * @ring: ring the fence is associated with
@@ -290,7 +280,7 @@
 	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 		return 0;
 
-	amdgpu_fence_schedule_check(ring);
+	amdgpu_fence_schedule_fallback(ring);
 	wait_event(ring->fence_drv.fence_queue, (
 		   (signaled = amdgpu_fence_seq_signaled(ring, seq))));
 
@@ -491,9 +481,8 @@
 	atomic64_set(&ring->fence_drv.last_seq, 0);
 	ring->fence_drv.initialized = false;
 
-	INIT_DELAYED_WORK(&ring->fence_drv.lockup_work,
-			amdgpu_fence_check_lockup);
-	ring->fence_drv.ring = ring;
+	setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+		    (unsigned long)ring);
 
 	init_waitqueue_head(&ring->fence_drv.fence_queue);
 
@@ -536,6 +525,13 @@
  */
 int amdgpu_fence_driver_init(struct amdgpu_device *adev)
 {
+	if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
+		amdgpu_fence_slab = kmem_cache_create(
+			"amdgpu_fence", sizeof(struct amdgpu_fence), 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+		if (!amdgpu_fence_slab)
+			return -ENOMEM;
+	}
 	if (amdgpu_debugfs_fence_init(adev))
 		dev_err(adev->dev, "fence debugfs file creation failed\n");
 
@@ -554,9 +550,12 @@
 {
 	int i, r;
 
+	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+		kmem_cache_destroy(amdgpu_fence_slab);
 	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
+
 		if (!ring || !ring->fence_drv.initialized)
 			continue;
 		r = amdgpu_fence_wait_empty(ring);
@@ -568,6 +567,7 @@
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 		amd_sched_fini(&ring->sched);
+		del_timer_sync(&ring->fence_drv.fallback_timer);
 		ring->fence_drv.initialized = false;
 	}
 	mutex_unlock(&adev->ring_lock);
@@ -751,18 +751,25 @@
 	fence->fence_wake.func = amdgpu_fence_check_signaled;
 	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
 	fence_get(f);
-	amdgpu_fence_schedule_check(ring);
+	if (!timer_pending(&ring->fence_drv.fallback_timer))
+		amdgpu_fence_schedule_fallback(ring);
 	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
 	return true;
 }
 
+static void amdgpu_fence_release(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	kmem_cache_free(amdgpu_fence_slab, fence);
+}
+
 const struct fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
 	.signaled = amdgpu_fence_is_signaled,
 	.wait = fence_default_wait,
-	.release = NULL,
+	.release = amdgpu_fence_release,
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 0873328..f6ea4b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -115,12 +115,9 @@
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va *bo_va;
 	int r;
-	mutex_lock(&vm->mutex);
 	r = amdgpu_bo_reserve(rbo, false);
-	if (r) {
-		mutex_unlock(&vm->mutex);
+	if (r)
 		return r;
-	}
 
 	bo_va = amdgpu_vm_bo_find(vm, rbo);
 	if (!bo_va) {
@@ -129,7 +126,6 @@
 		++bo_va->ref_count;
 	}
 	amdgpu_bo_unreserve(rbo);
-	mutex_unlock(&vm->mutex);
 	return 0;
 }
 
@@ -142,10 +138,8 @@
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va *bo_va;
 	int r;
-	mutex_lock(&vm->mutex);
 	r = amdgpu_bo_reserve(rbo, true);
 	if (r) {
-		mutex_unlock(&vm->mutex);
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
 		return;
@@ -157,7 +151,6 @@
 		}
 	}
 	amdgpu_bo_unreserve(rbo);
-	mutex_unlock(&vm->mutex);
 }
 
 static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -242,8 +235,9 @@
 	    AMDGPU_GEM_USERPTR_REGISTER))
 		return -EINVAL;
 
-	if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
-		   !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
+	     !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
+	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
 
 		/* if we want to write to it we must require anonymous
 		   memory and install a MMU notifier */
@@ -483,6 +477,9 @@
 		if (domain == AMDGPU_GEM_DOMAIN_CPU)
 			goto error_unreserve;
 	}
+	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
+	if (r)
+		goto error_unreserve;
 
 	r = amdgpu_vm_clear_freed(adev, bo_va->vm);
 	if (r)
@@ -512,6 +509,9 @@
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct amdgpu_bo *rbo;
 	struct amdgpu_bo_va *bo_va;
+	struct ttm_validate_buffer tv, tv_pd;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
 	uint32_t invalid_flags, va_flags = 0;
 	int r = 0;
 
@@ -547,19 +547,28 @@
 	gobj = drm_gem_object_lookup(dev, filp, args->handle);
 	if (gobj == NULL)
 		return -ENOENT;
-	mutex_lock(&fpriv->vm.mutex);
 	rbo = gem_to_amdgpu_bo(gobj);
-	r = amdgpu_bo_reserve(rbo, false);
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+	tv.bo = &rbo->tbo;
+	tv.shared = true;
+	list_add(&tv.head, &list);
+
+	if (args->operation == AMDGPU_VA_OP_MAP) {
+		tv_pd.bo = &fpriv->vm.page_directory->tbo;
+		tv_pd.shared = true;
+		list_add(&tv_pd.head, &list);
+	}
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r) {
-		mutex_unlock(&fpriv->vm.mutex);
 		drm_gem_object_unreference_unlocked(gobj);
 		return r;
 	}
 
 	bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
 	if (!bo_va) {
-		amdgpu_bo_unreserve(rbo);
-		mutex_unlock(&fpriv->vm.mutex);
+		ttm_eu_backoff_reservation(&ticket, &list);
+		drm_gem_object_unreference_unlocked(gobj);
 		return -ENOENT;
 	}
 
@@ -581,10 +590,10 @@
 	default:
 		break;
 	}
-
+	ttm_eu_backoff_reservation(&ticket, &list);
 	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
 		amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
-	mutex_unlock(&fpriv->vm.mutex);
+
 	drm_gem_object_unreference_unlocked(gobj);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index e659877..9e25eda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -62,7 +62,7 @@
 	int r;
 
 	if (size) {
-		r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+		r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
 				      &ib->sa_bo, size, 256);
 		if (r) {
 			dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
@@ -216,7 +216,7 @@
 	}
 
 	if (ib->vm)
-		amdgpu_vm_fence(adev, ib->vm, ib->fence);
+		amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
 
 	amdgpu_ring_unlock_commit(ring);
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1618e22..e23843f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -611,13 +611,59 @@
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	int vpos, hpos, stat;
+	u32 count;
 
 	if (pipe >= adev->mode_info.num_crtc) {
 		DRM_ERROR("Invalid crtc %u\n", pipe);
 		return -EINVAL;
 	}
 
-	return amdgpu_display_vblank_get_counter(adev, pipe);
+	/* The hw increments its frame counter at start of vsync, not at start
+	 * of vblank, as is required by DRM core vblank counter handling.
+	 * Cook the hw count here to make it appear to the caller as if it
+	 * incremented at start of vblank. We measure distance to start of
+	 * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+	 * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+	 * result by 1 to give the proper appearance to caller.
+	 */
+	if (adev->mode_info.crtcs[pipe]) {
+		/* Repeat readout if needed to provide stable result if
+		 * we cross start of vsync during the queries.
+		 */
+		do {
+			count = amdgpu_display_vblank_get_counter(adev, pipe);
+			/* Ask amdgpu_get_crtc_scanoutpos to return vpos as
+			 * distance to start of vblank, instead of regular
+			 * vertical scanout pos.
+			 */
+			stat = amdgpu_get_crtc_scanoutpos(
+				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+				&vpos, &hpos, NULL, NULL,
+				&adev->mode_info.crtcs[pipe]->base.hwmode);
+		} while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+		if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+			DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+		} else {
+			DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+				      pipe, vpos);
+
+			/* Bump counter if we are at >= leading edge of vblank,
+			 * but before vsync where vpos would turn negative and
+			 * the hw counter really increments.
+			 */
+			if (vpos >= 0)
+				count++;
+		}
+	} else {
+		/* Fallback to use value as is. */
+		count = amdgpu_display_vblank_get_counter(adev, pipe);
+		DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+	}
+
+	return count;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index b62c171..fdc1be8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -35,6 +35,7 @@
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -407,6 +408,7 @@
 	u32 line_time;
 	u32 wm_low;
 	u32 wm_high;
+	u32 lb_vblank_lead_lines;
 	struct drm_display_mode hw_mode;
 };
 
@@ -528,6 +530,10 @@
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
+/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART 		(1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
+
 void amdgpu_link_encoder_connector(struct drm_device *dev);
 
 struct drm_connector *
@@ -551,7 +557,7 @@
 
 int amdgpu_framebuffer_init(struct drm_device *dev,
 			     struct amdgpu_framebuffer *rfb,
-			     struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
 			     struct drm_gem_object *obj);
 
 int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0d52438..c3ce103 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,6 +100,7 @@
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->adev->gem.mutex);
 	drm_gem_object_release(&bo->gem_base);
+	amdgpu_bo_unref(&bo->parent);
 	kfree(bo->metadata);
 	kfree(bo);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 3c2ff45..ea756e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -189,10 +189,9 @@
 				      struct amdgpu_sa_manager *sa_manager);
 int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
 					struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
-			    struct amdgpu_sa_manager *sa_manager,
-			    struct amdgpu_sa_bo **sa_bo,
-			    unsigned size, unsigned align);
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
+		     struct amdgpu_sa_bo **sa_bo,
+		     unsigned size, unsigned align);
 void amdgpu_sa_bo_free(struct amdgpu_device *adev,
 			      struct amdgpu_sa_bo **sa_bo,
 			      struct fence *fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 0212b31..8b88edb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -311,8 +311,7 @@
 	return false;
 }
 
-int amdgpu_sa_bo_new(struct amdgpu_device *adev,
-		     struct amdgpu_sa_manager *sa_manager,
+int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index dcf4a8a..438c052 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 
 static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
 {
@@ -44,11 +45,8 @@
 		return NULL;
 	}
 	job = to_amdgpu_job(sched_job);
-	mutex_lock(&job->job_lock);
-	r = amdgpu_ib_schedule(job->adev,
-			       job->num_ibs,
-			       job->ibs,
-			       job->base.owner);
+	trace_amdgpu_sched_run_job(job);
+	r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
 	if (r) {
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		goto err;
@@ -61,8 +59,6 @@
 	if (job->free_job)
 		job->free_job(job);
 
-	mutex_unlock(&job->job_lock);
-	fence_put(&job->base.s_fence->base);
 	kfree(job);
 	return fence ? &fence->base : NULL;
 }
@@ -88,21 +84,19 @@
 			return -ENOMEM;
 		job->base.sched = &ring->sched;
 		job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
+		job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+		if (!job->base.s_fence) {
+			kfree(job);
+			return -ENOMEM;
+		}
+		*f = fence_get(&job->base.s_fence->base);
+
 		job->adev = adev;
 		job->ibs = ibs;
 		job->num_ibs = num_ibs;
-		job->base.owner = owner;
-		mutex_init(&job->job_lock);
+		job->owner = owner;
 		job->free_job = free_job;
-		mutex_lock(&job->job_lock);
-		r = amd_sched_entity_push_job(&job->base);
-		if (r) {
-			mutex_unlock(&job->job_lock);
-			kfree(job);
-			return r;
-		}
-		*f = fence_get(&job->base.s_fence->base);
-		mutex_unlock(&job->job_lock);
+		amd_sched_entity_push_job(&job->base);
 	} else {
 		r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
index ff3ca52..1caaf20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
@@ -40,7 +40,7 @@
 	if (*semaphore == NULL) {
 		return -ENOMEM;
 	}
-	r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo,
+	r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
 			     &(*semaphore)->sa_bo, 8, 8);
 	if (r) {
 		kfree(*semaphore);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index a6697fd..dd005c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -302,8 +302,14 @@
 			return -EINVAL;
 		}
 
-		if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores ||
-		    (count >= AMDGPU_NUM_SYNCS)) {
+		if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
+			r = fence_wait(&fence->base, true);
+			if (r)
+				return r;
+			continue;
+		}
+
+		if (count >= AMDGPU_NUM_SYNCS) {
 			/* not enough room, wait manually */
 			r = fence_wait(&fence->base, false);
 			if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 76ecbaf..8f9834ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -48,6 +48,57 @@
 		      __entry->fences)
 );
 
+TRACE_EVENT(amdgpu_cs_ioctl,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_device *, adev)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct amdgpu_ib *, ib)
+			     __field(struct fence *, fence)
+			     __field(char *, ring_name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->adev = job->adev;
+			   __entry->sched_job = &job->base;
+			   __entry->ib = job->ibs;
+			   __entry->fence = &job->base.s_fence->base;
+			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+		      __entry->adev, __entry->sched_job, __entry->ib,
+		      __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_device *, adev)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct amdgpu_ib *, ib)
+			     __field(struct fence *, fence)
+			     __field(char *, ring_name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->adev = job->adev;
+			   __entry->sched_job = &job->base;
+			   __entry->ib = job->ibs;
+			   __entry->fence = &job->base.s_fence->base;
+			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+		      __entry->adev, __entry->sched_job, __entry->ib,
+		      __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+
 TRACE_EVENT(amdgpu_vm_grab_id,
 	    TP_PROTO(unsigned vmid, int ring),
 	    TP_ARGS(vmid, ring),
@@ -196,49 +247,6 @@
 	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
 );
 
-DECLARE_EVENT_CLASS(amdgpu_fence_request,
-
-	    TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
-	    TP_ARGS(dev, ring, seqno),
-
-	    TP_STRUCT__entry(
-			     __field(u32, dev)
-			     __field(int, ring)
-			     __field(u32, seqno)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->dev = dev->primary->index;
-			   __entry->ring = ring;
-			   __entry->seqno = seqno;
-			   ),
-
-	    TP_printk("dev=%u, ring=%d, seqno=%u",
-		      __entry->dev, __entry->ring, __entry->seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
-
-	    TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
-	    TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
-
-	    TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
-	    TP_ARGS(dev, ring, seqno)
-);
-
-DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
-
-	    TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
-
-	    TP_ARGS(dev, ring, seqno)
-);
-
 DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
 
 	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 81bb8e9..8a1752f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -587,9 +587,13 @@
 	uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
 	int r;
 
-	if (gtt->userptr)
-		amdgpu_ttm_tt_pin_userptr(ttm);
-
+	if (gtt->userptr) {
+		r = amdgpu_ttm_tt_pin_userptr(ttm);
+		if (r) {
+			DRM_ERROR("failed to pin userptr\n");
+			return r;
+		}
+	}
 	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
 	if (!ttm->num_pages) {
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -797,11 +801,12 @@
 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
 		flags |= AMDGPU_PTE_VALID;
 
-	if (mem && mem->mem_type == TTM_PL_TT)
+	if (mem && mem->mem_type == TTM_PL_TT) {
 		flags |= AMDGPU_PTE_SYSTEM;
 
-	if (!ttm || ttm->caching_state == tt_cached)
-		flags |= AMDGPU_PTE_SNOOPED;
+		if (ttm->caching_state == tt_cached)
+			flags |= AMDGPU_PTE_SNOOPED;
+	}
 
 	if (adev->asic_type >= CHIP_TOPAZ)
 		flags |= AMDGPU_PTE_EXECUTABLE;
@@ -1073,10 +1078,10 @@
 	ret = drm_mm_dump_table(m, mm);
 	spin_unlock(&glob->lru_lock);
 	if (ttm_pl == TTM_PL_VRAM)
-		seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n",
+		seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
 			   adev->mman.bdev.man[ttm_pl].size,
-			   atomic64_read(&adev->vram_usage) >> 20,
-			   atomic64_read(&adev->vram_vis_usage) >> 20);
+			   (u64)atomic64_read(&adev->vram_usage) >> 20,
+			   (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 03f0c3b..a745eee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -392,7 +392,10 @@
 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 	ib->ptr[ib->length_dw++] = handle;
 
-	ib->ptr[ib->length_dw++] = 0x00000030; /* len */
+	if ((ring->adev->vce.fw_version >> 24) >= 52)
+		ib->ptr[ib->length_dw++] = 0x00000040; /* len */
+	else
+		ib->ptr[ib->length_dw++] = 0x00000030; /* len */
 	ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */
 	ib->ptr[ib->length_dw++] = 0x00000000;
 	ib->ptr[ib->length_dw++] = 0x00000042;
@@ -404,6 +407,12 @@
 	ib->ptr[ib->length_dw++] = 0x00000100;
 	ib->ptr[ib->length_dw++] = 0x0000000c;
 	ib->ptr[ib->length_dw++] = 0x00000000;
+	if ((ring->adev->vce.fw_version >> 24) >= 52) {
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+		ib->ptr[ib->length_dw++] = 0x00000000;
+	}
 
 	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
 	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 633a32a..b53d273 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -143,10 +143,15 @@
 	unsigned i;
 
 	/* check if the id is still valid */
-	if (vm_id->id && vm_id->last_id_use &&
-	    vm_id->last_id_use == adev->vm_manager.active[vm_id->id]) {
-		trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
-		return 0;
+	if (vm_id->id) {
+		unsigned id = vm_id->id;
+		long owner;
+
+		owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
+		if (owner == (long)vm) {
+			trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
+			return 0;
+		}
 	}
 
 	/* we definately need to flush */
@@ -154,7 +159,7 @@
 
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.nvm; ++i) {
-		struct fence *fence = adev->vm_manager.active[i];
+		struct fence *fence = adev->vm_manager.ids[i].active;
 		struct amdgpu_ring *fring;
 
 		if (fence == NULL) {
@@ -176,7 +181,7 @@
 		if (choices[i]) {
 			struct fence *fence;
 
-			fence  = adev->vm_manager.active[choices[i]];
+			fence  = adev->vm_manager.ids[choices[i]].active;
 			vm_id->id = choices[i];
 
 			trace_amdgpu_vm_grab_id(choices[i], ring->idx);
@@ -207,24 +212,21 @@
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
 	struct fence *flushed_updates = vm_id->flushed_updates;
-	bool is_earlier = false;
+	bool is_later;
 
-	if (flushed_updates && updates) {
-		BUG_ON(flushed_updates->context != updates->context);
-		is_earlier = (updates->seqno - flushed_updates->seqno <=
-			      INT_MAX) ? true : false;
-	}
+	if (!flushed_updates)
+		is_later = true;
+	else if (!updates)
+		is_later = false;
+	else
+		is_later = fence_is_later(updates, flushed_updates);
 
-	if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
-	    is_earlier) {
-
+	if (pd_addr != vm_id->pd_gpu_addr || is_later) {
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
-		if (is_earlier) {
+		if (is_later) {
 			vm_id->flushed_updates = fence_get(updates);
 			fence_put(flushed_updates);
 		}
-		if (!flushed_updates)
-			vm_id->flushed_updates = fence_get(updates);
 		vm_id->pd_gpu_addr = pd_addr;
 		amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
 	}
@@ -244,16 +246,14 @@
  */
 void amdgpu_vm_fence(struct amdgpu_device *adev,
 		     struct amdgpu_vm *vm,
-		     struct amdgpu_fence *fence)
+		     struct fence *fence)
 {
-	unsigned ridx = fence->ring->idx;
-	unsigned vm_id = vm->ids[ridx].id;
+	struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
+	unsigned vm_id = vm->ids[ring->idx].id;
 
-	fence_put(adev->vm_manager.active[vm_id]);
-	adev->vm_manager.active[vm_id] = fence_get(&fence->base);
-
-	fence_put(vm->ids[ridx].last_id_use);
-	vm->ids[ridx].last_id_use = fence_get(&fence->base);
+	fence_put(adev->vm_manager.ids[vm_id].active);
+	adev->vm_manager.ids[vm_id].active = fence_get(fence);
+	atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
 }
 
 /**
@@ -332,6 +332,8 @@
  *
  * @adev: amdgpu_device pointer
  * @bo: bo to clear
+ *
+ * need to reserve bo first before calling it.
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_bo *bo)
@@ -343,24 +345,20 @@
 	uint64_t addr;
 	int r;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (r)
-		return r;
-
 	r = reservation_object_reserve_shared(bo->tbo.resv);
 	if (r)
 		return r;
 
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 	if (r)
-		goto error_unreserve;
+		goto error;
 
 	addr = amdgpu_bo_gpu_offset(bo);
 	entries = amdgpu_bo_size(bo) / 8;
 
 	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
 	if (!ib)
-		goto error_unreserve;
+		goto error;
 
 	r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
 	if (r)
@@ -378,16 +376,14 @@
 	if (!r)
 		amdgpu_bo_fence(bo, fence, true);
 	fence_put(fence);
-	if (amdgpu_enable_scheduler) {
-		amdgpu_bo_unreserve(bo);
+	if (amdgpu_enable_scheduler)
 		return 0;
-	}
+
 error_free:
 	amdgpu_ib_free(adev, ib);
 	kfree(ib);
 
-error_unreserve:
-	amdgpu_bo_unreserve(bo);
+error:
 	return r;
 }
 
@@ -889,17 +885,21 @@
 	struct amdgpu_bo_va_mapping *mapping;
 	int r;
 
+	spin_lock(&vm->freed_lock);
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
-
+		spin_unlock(&vm->freed_lock);
 		r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
 		kfree(mapping);
 		if (r)
 			return r;
 
+		spin_lock(&vm->freed_lock);
 	}
+	spin_unlock(&vm->freed_lock);
+
 	return 0;
 
 }
@@ -926,8 +926,9 @@
 		bo_va = list_first_entry(&vm->invalidated,
 			struct amdgpu_bo_va, vm_status);
 		spin_unlock(&vm->status_lock);
-
+		mutex_lock(&bo_va->mutex);
 		r = amdgpu_vm_bo_update(adev, bo_va, NULL);
+		mutex_unlock(&bo_va->mutex);
 		if (r)
 			return r;
 
@@ -971,7 +972,7 @@
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 	INIT_LIST_HEAD(&bo_va->vm_status);
-
+	mutex_init(&bo_va->mutex);
 	list_add_tail(&bo_va->bo_list, &bo->va);
 
 	return bo_va;
@@ -989,7 +990,7 @@
  * Add a mapping of the BO at the specefied addr into the VM.
  * Returns 0 for success, error for failure.
  *
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
  */
 int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		     struct amdgpu_bo_va *bo_va,
@@ -1005,30 +1006,27 @@
 
 	/* validate the parameters */
 	if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
-	    size == 0 || size & AMDGPU_GPU_PAGE_MASK) {
-		amdgpu_bo_unreserve(bo_va->bo);
+	    size == 0 || size & AMDGPU_GPU_PAGE_MASK)
 		return -EINVAL;
-	}
 
 	/* make sure object fit at this offset */
 	eaddr = saddr + size;
-	if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) {
-		amdgpu_bo_unreserve(bo_va->bo);
+	if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo)))
 		return -EINVAL;
-	}
 
 	last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
 	if (last_pfn > adev->vm_manager.max_pfn) {
 		dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n",
 			last_pfn, adev->vm_manager.max_pfn);
-		amdgpu_bo_unreserve(bo_va->bo);
 		return -EINVAL;
 	}
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
+	spin_lock(&vm->it_lock);
 	it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1);
+	spin_unlock(&vm->it_lock);
 	if (it) {
 		struct amdgpu_bo_va_mapping *tmp;
 		tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1036,14 +1034,12 @@
 		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
 			"0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
 			tmp->it.start, tmp->it.last + 1);
-		amdgpu_bo_unreserve(bo_va->bo);
 		r = -EINVAL;
 		goto error;
 	}
 
 	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
 	if (!mapping) {
-		amdgpu_bo_unreserve(bo_va->bo);
 		r = -ENOMEM;
 		goto error;
 	}
@@ -1054,8 +1050,12 @@
 	mapping->offset = offset;
 	mapping->flags = flags;
 
+	mutex_lock(&bo_va->mutex);
 	list_add(&mapping->list, &bo_va->invalids);
+	mutex_unlock(&bo_va->mutex);
+	spin_lock(&vm->it_lock);
 	interval_tree_insert(&mapping->it, &vm->va);
+	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	/* Make sure the page tables are allocated */
@@ -1067,8 +1067,6 @@
 	if (eaddr > vm->max_pde_used)
 		vm->max_pde_used = eaddr;
 
-	amdgpu_bo_unreserve(bo_va->bo);
-
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
 		struct reservation_object *resv = vm->page_directory->tbo.resv;
@@ -1077,16 +1075,19 @@
 		if (vm->page_tables[pt_idx].bo)
 			continue;
 
-		ww_mutex_lock(&resv->lock, NULL);
 		r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
 				     AMDGPU_GPU_PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
 				     NULL, resv, &pt);
-		ww_mutex_unlock(&resv->lock);
 		if (r)
 			goto error_free;
 
+		/* Keep a reference to the page table to avoid freeing
+		 * them up in the wrong order.
+		 */
+		pt->parent = amdgpu_bo_ref(vm->page_directory);
+
 		r = amdgpu_vm_clear_bo(adev, pt);
 		if (r) {
 			amdgpu_bo_unref(&pt);
@@ -1101,7 +1102,9 @@
 
 error_free:
 	list_del(&mapping->list);
+	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
+	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 	kfree(mapping);
 
@@ -1119,7 +1122,7 @@
  * Remove a mapping of the BO at the specefied addr from the VM.
  * Returns 0 for success, error for failure.
  *
- * Object has to be reserved and gets unreserved by this function!
+ * Object has to be reserved and unreserved outside!
  */
 int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 		       struct amdgpu_bo_va *bo_va,
@@ -1130,7 +1133,7 @@
 	bool valid = true;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
-
+	mutex_lock(&bo_va->mutex);
 	list_for_each_entry(mapping, &bo_va->valids, list) {
 		if (mapping->it.start == saddr)
 			break;
@@ -1145,20 +1148,24 @@
 		}
 
 		if (&mapping->list == &bo_va->invalids) {
-			amdgpu_bo_unreserve(bo_va->bo);
+			mutex_unlock(&bo_va->mutex);
 			return -ENOENT;
 		}
 	}
-
+	mutex_unlock(&bo_va->mutex);
 	list_del(&mapping->list);
+	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
+	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
-	if (valid)
+	if (valid) {
+		spin_lock(&vm->freed_lock);
 		list_add(&mapping->list, &vm->freed);
-	else
+		spin_unlock(&vm->freed_lock);
+	} else {
 		kfree(mapping);
-	amdgpu_bo_unreserve(bo_va->bo);
+	}
 
 	return 0;
 }
@@ -1187,17 +1194,23 @@
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
+		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
+		spin_unlock(&vm->it_lock);
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
+		spin_lock(&vm->freed_lock);
 		list_add(&mapping->list, &vm->freed);
+		spin_unlock(&vm->freed_lock);
 	}
 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
 		list_del(&mapping->list);
+		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
+		spin_unlock(&vm->it_lock);
 		kfree(mapping);
 	}
-
 	fence_put(bo_va->last_pt_update);
+	mutex_destroy(&bo_va->mutex);
 	kfree(bo_va);
 }
 
@@ -1241,15 +1254,14 @@
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		vm->ids[i].id = 0;
 		vm->ids[i].flushed_updates = NULL;
-		vm->ids[i].last_id_use = NULL;
 	}
-	mutex_init(&vm->mutex);
 	vm->va = RB_ROOT;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
-
+	spin_lock_init(&vm->it_lock);
+	spin_lock_init(&vm->freed_lock);
 	pd_size = amdgpu_vm_directory_size(adev);
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
@@ -1269,8 +1281,14 @@
 			     NULL, NULL, &vm->page_directory);
 	if (r)
 		return r;
-
+	r = amdgpu_bo_reserve(vm->page_directory, false);
+	if (r) {
+		amdgpu_bo_unref(&vm->page_directory);
+		vm->page_directory = NULL;
+		return r;
+	}
 	r = amdgpu_vm_clear_bo(adev, vm->page_directory);
+	amdgpu_bo_unreserve(vm->page_directory);
 	if (r) {
 		amdgpu_bo_unref(&vm->page_directory);
 		vm->page_directory = NULL;
@@ -1313,11 +1331,27 @@
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
-
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		unsigned id = vm->ids[i].id;
+
+		atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
+				    (long)vm, 0);
 		fence_put(vm->ids[i].flushed_updates);
-		fence_put(vm->ids[i].last_id_use);
 	}
 
-	mutex_destroy(&vm->mutex);
+}
+
+/**
+ * amdgpu_vm_manager_fini - cleanup VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Cleanup the VM manager and free resources.
+ */
+void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	for (i = 0; i < AMDGPU_NUM_VM; ++i)
+		fence_put(adev->vm_manager.ids[i].active);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a1a35a5..57a2e34 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -6569,12 +6569,12 @@
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
 			cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
-			cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+			cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
 			WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
 			cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
-			cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
+			cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
 			WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
 			break;
 		default:
@@ -6586,12 +6586,12 @@
 		switch (state) {
 		case AMDGPU_IRQ_STATE_DISABLE:
 			cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
-			cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+			cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
 			WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
 			break;
 		case AMDGPU_IRQ_STATE_ENABLE:
 			cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
-			cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
+			cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
 			WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
 			break;
 		default:
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index cb0f774..093599a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1250,7 +1250,7 @@
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1333,6 +1333,7 @@
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1357,6 +1358,8 @@
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3726,7 +3729,7 @@
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v10_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3737,15 +3740,15 @@
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v10_0_dig_helper_funcs);
@@ -3763,13 +3766,13 @@
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v10_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 5af3721..8701661 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1238,7 +1238,7 @@
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1321,6 +1321,7 @@
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1345,6 +1346,8 @@
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3719,7 +3722,7 @@
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3730,15 +3733,15 @@
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
@@ -3756,13 +3759,13 @@
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 4f7b49a..d0e128c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1193,7 +1193,7 @@
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1276,6 +1276,7 @@
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1302,6 +1303,8 @@
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3656,7 +3659,7 @@
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v8_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3667,15 +3670,15 @@
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v8_0_dig_helper_funcs);
@@ -3693,13 +3696,13 @@
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v8_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6776cf7..e1dcab9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -268,7 +268,6 @@
 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
-	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -296,10 +295,6 @@
 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
-	mmPCIE_INDEX, 0xffffffff, 0x0140001c,
-	mmPCIE_DATA, 0x000f0000, 0x00000000,
-	mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
-	mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 };
 
@@ -1000,7 +995,7 @@
 		adev->gfx.config.max_cu_per_sh = 16;
 		adev->gfx.config.max_sh_per_se = 1;
 		adev->gfx.config.max_backends_per_se = 4;
-		adev->gfx.config.max_texture_channel_caches = 8;
+		adev->gfx.config.max_texture_channel_caches = 16;
 		adev->gfx.config.max_gprs = 256;
 		adev->gfx.config.max_gs_threads = 32;
 		adev->gfx.config.max_hw_contexts = 8;
@@ -1613,6 +1608,296 @@
 			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
 		}
 	case CHIP_FIJI:
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
+			switch (reg_offset) {
+			case 0:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 1:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 2:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 3:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 4:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 5:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 6:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 7:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+				break;
+			case 8:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+				break;
+			case 9:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 10:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 11:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			case 12:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			case 13:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 14:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 15:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 16:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			case 17:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			case 18:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 19:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 20:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 21:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 22:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 23:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 24:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 25:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 26:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+				break;
+			case 27:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 28:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+				break;
+			case 29:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			case 30:
+				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+				break;
+			default:
+				gb_tile_moden = 0;
+				break;
+			}
+			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
+			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
+		}
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
+			switch (reg_offset) {
+			case 0:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 1:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 2:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 3:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 4:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 5:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 6:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 8:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 9:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 10:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 11:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 12:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 13:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+						NUM_BANKS(ADDR_SURF_8_BANK));
+				break;
+			case 14:
+				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+						NUM_BANKS(ADDR_SURF_4_BANK));
+				break;
+			case 7:
+				/* unused idx */
+				continue;
+			default:
+				gb_tile_moden = 0;
+				break;
+			}
+			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
+			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
+		}
+		break;
 	case CHIP_TONGA:
 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
 			switch (reg_offset) {
@@ -2971,10 +3256,13 @@
 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
 	switch (adev->asic_type) {
 	case CHIP_TONGA:
-	case CHIP_FIJI:
 		amdgpu_ring_write(ring, 0x16000012);
 		amdgpu_ring_write(ring, 0x0000002A);
 		break;
+	case CHIP_FIJI:
+		amdgpu_ring_write(ring, 0x3a00161a);
+		amdgpu_ring_write(ring, 0x0000002e);
+		break;
 	case CHIP_TOPAZ:
 	case CHIP_CARRIZO:
 		amdgpu_ring_write(ring, 0x00000002);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 85bbcdc..ed8abb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -40,7 +40,7 @@
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 
-MODULE_FIRMWARE("radeon/boniare_mc.bin");
+MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
 
 /**
@@ -501,6 +501,7 @@
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
 	WREG32(mmVM_L2_CNTL, tmp);
 	tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
@@ -512,7 +513,7 @@
 	WREG32(mmVM_L2_CNTL3, tmp);
 	/* setup context0 */
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 			(u32)(adev->dummy_page.addr >> 12));
@@ -960,12 +961,10 @@
 
 static int gmc_v7_0_sw_fini(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->vm_manager.enabled) {
-		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			fence_put(adev->vm_manager.active[i]);
+		amdgpu_vm_manager_fini(adev);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@@ -1010,12 +1009,10 @@
 
 static int gmc_v7_0_suspend(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->vm_manager.enabled) {
-		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			fence_put(adev->vm_manager.active[i]);
+		amdgpu_vm_manager_fini(adev);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1bcc4e7..d390284 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -629,6 +629,7 @@
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
+	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
 	WREG32(mmVM_L2_CNTL, tmp);
 	tmp = RREG32(mmVM_L2_CNTL2);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
@@ -656,7 +657,7 @@
 	WREG32(mmVM_L2_CNTL4, tmp);
 	/* setup context0 */
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 			(u32)(adev->dummy_page.addr >> 12));
@@ -979,12 +980,10 @@
 
 static int gmc_v8_0_sw_fini(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->vm_manager.enabled) {
-		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			fence_put(adev->vm_manager.active[i]);
+		amdgpu_vm_manager_fini(adev);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@@ -1031,12 +1030,10 @@
 
 static int gmc_v8_0_suspend(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	if (adev->vm_manager.enabled) {
-		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			fence_put(adev->vm_manager.active[i]);
+		amdgpu_vm_manager_fini(adev);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6a52db6..370c6c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -40,6 +40,9 @@
 
 #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT	0x04
 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK	0x10
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 	0x8616
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 	0x8617
+#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 	0x8618
 
 #define VCE_V3_0_FW_SIZE	(384 * 1024)
 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
@@ -130,9 +133,11 @@
 
 		/* set BUSY flag */
 		WREG32_P(mmVCE_STATUS, 1, ~1);
-
-		WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
-			~VCE_VCPU_CNTL__CLK_EN_MASK);
+		if (adev->asic_type >= CHIP_STONEY)
+			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
+		else
+			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
+				~VCE_VCPU_CNTL__CLK_EN_MASK);
 
 		WREG32_P(mmVCE_SOFT_RESET,
 			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
@@ -391,8 +396,12 @@
 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 	WREG32(mmVCE_LMI_VM_CTRL, 0);
-
-	WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
+	if (adev->asic_type >= CHIP_STONEY) {
+		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
+		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
+		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR2, (adev->vce.gpu_addr >> 8));
+	} else
+		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR, (adev->vce.gpu_addr >> 8));
 	offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 	size = VCE_V3_0_FW_SIZE;
 	WREG32(mmVCE_VCPU_CACHE_OFFSET0, offset & 0x7fffffff);
@@ -576,6 +585,11 @@
 				      struct amdgpu_iv_entry *entry)
 {
 	DRM_DEBUG("IH: VCE\n");
+
+	WREG32_P(mmVCE_SYS_INT_STATUS,
+		VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK,
+		~VCE_SYS_INT_STATUS__VCE_SYS_INT_TRAP_INTERRUPT_INT_MASK);
+
 	switch (entry->src_data) {
 	case 0:
 		amdgpu_fence_process(&adev->vce.ring[0]);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index 144f50a..c89dc77 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -16,6 +16,8 @@
 	    TP_ARGS(sched_job),
 	    TP_STRUCT__entry(
 			     __field(struct amd_sched_entity *, entity)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct fence *, fence)
 			     __field(const char *, name)
 			     __field(u32, job_count)
 			     __field(int, hw_job_count)
@@ -23,16 +25,32 @@
 
 	    TP_fast_assign(
 			   __entry->entity = sched_job->s_entity;
+			   __entry->sched_job = sched_job;
+			   __entry->fence = &sched_job->s_fence->base;
 			   __entry->name = sched_job->sched->name;
 			   __entry->job_count = kfifo_len(
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
 			   __entry->hw_job_count = atomic_read(
 				   &sched_job->sched->hw_rq_count);
 			   ),
-	    TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->name, __entry->job_count,
-		      __entry->hw_job_count)
+	    TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
+		      __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
+		      __entry->job_count, __entry->hw_job_count)
 );
+
+TRACE_EVENT(amd_sched_process_job,
+	    TP_PROTO(struct amd_sched_fence *fence),
+	    TP_ARGS(fence),
+	    TP_STRUCT__entry(
+		    __field(struct fence *, fence)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->fence = &fence->base;
+		    ),
+	    TP_printk("fence=%p signaled", __entry->fence)
+);
+
 #endif
 
 /* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 89619a5..3a4820e 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -30,10 +30,12 @@
 #define CREATE_TRACE_POINTS
 #include "gpu_sched_trace.h"
 
-static struct amd_sched_job *
-amd_sched_entity_pop_job(struct amd_sched_entity *entity);
+static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
 
+struct kmem_cache *sched_fence_slab;
+atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
+
 /* Initialize a given run queue struct */
 static void amd_sched_rq_init(struct amd_sched_rq *rq)
 {
@@ -61,36 +63,36 @@
 }
 
 /**
- * Select next job from a specified run queue with round robin policy.
- * Return NULL if nothing available.
+ * Select an entity which could provide a job to run
+ *
+ * @rq		The run queue to check.
+ *
+ * Try to find a ready entity, returns NULL if none found.
  */
-static struct amd_sched_job *
-amd_sched_rq_select_job(struct amd_sched_rq *rq)
+static struct amd_sched_entity *
+amd_sched_rq_select_entity(struct amd_sched_rq *rq)
 {
 	struct amd_sched_entity *entity;
-	struct amd_sched_job *sched_job;
 
 	spin_lock(&rq->lock);
 
 	entity = rq->current_entity;
 	if (entity) {
 		list_for_each_entry_continue(entity, &rq->entities, list) {
-			sched_job = amd_sched_entity_pop_job(entity);
-			if (sched_job) {
+			if (amd_sched_entity_is_ready(entity)) {
 				rq->current_entity = entity;
 				spin_unlock(&rq->lock);
-				return sched_job;
+				return entity;
 			}
 		}
 	}
 
 	list_for_each_entry(entity, &rq->entities, list) {
 
-		sched_job = amd_sched_entity_pop_job(entity);
-		if (sched_job) {
+		if (amd_sched_entity_is_ready(entity)) {
 			rq->current_entity = entity;
 			spin_unlock(&rq->lock);
-			return sched_job;
+			return entity;
 		}
 
 		if (entity == rq->current_entity)
@@ -174,6 +176,24 @@
 }
 
 /**
+ * Check if entity is ready
+ *
+ * @entity	The pointer to a valid scheduler entity
+ *
+ * Return true if entity could provide a job.
+ */
+static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
+{
+	if (kfifo_is_empty(&entity->job_queue))
+		return false;
+
+	if (ACCESS_ONCE(entity->dependency))
+		return false;
+
+	return true;
+}
+
+/**
  * Destroy a context entity
  *
  * @sched       Pointer to scheduler instance
@@ -208,32 +228,53 @@
 	amd_sched_wakeup(entity->sched);
 }
 
+static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
+{
+	struct amd_gpu_scheduler *sched = entity->sched;
+	struct fence * fence = entity->dependency;
+	struct amd_sched_fence *s_fence;
+
+	if (fence->context == entity->fence_context) {
+		/* We can ignore fences from ourself */
+		fence_put(entity->dependency);
+		return false;
+	}
+
+	s_fence = to_amd_sched_fence(fence);
+	if (s_fence && s_fence->sched == sched) {
+		/* Fence is from the same scheduler */
+		if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
+			/* Ignore it when it is already scheduled */
+			fence_put(entity->dependency);
+			return false;
+		}
+
+		/* Wait for fence to be scheduled */
+		entity->cb.func = amd_sched_entity_wakeup;
+		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
+		return true;
+	}
+
+	if (!fence_add_callback(entity->dependency, &entity->cb,
+				amd_sched_entity_wakeup))
+		return true;
+
+	fence_put(entity->dependency);
+	return false;
+}
+
 static struct amd_sched_job *
 amd_sched_entity_pop_job(struct amd_sched_entity *entity)
 {
 	struct amd_gpu_scheduler *sched = entity->sched;
 	struct amd_sched_job *sched_job;
 
-	if (ACCESS_ONCE(entity->dependency))
-		return NULL;
-
 	if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
 		return NULL;
 
-	while ((entity->dependency = sched->ops->dependency(sched_job))) {
-
-		if (entity->dependency->context == entity->fence_context) {
-			/* We can ignore fences from ourself */
-			fence_put(entity->dependency);
-			continue;
-		}
-
-		if (fence_add_callback(entity->dependency, &entity->cb,
-				       amd_sched_entity_wakeup))
-			fence_put(entity->dependency);
-		else
+	while ((entity->dependency = sched->ops->dependency(sched_job)))
+		if (amd_sched_entity_add_dependency_cb(entity))
 			return NULL;
-	}
 
 	return sched_job;
 }
@@ -247,6 +288,7 @@
  */
 static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 {
+	struct amd_gpu_scheduler *sched = sched_job->sched;
 	struct amd_sched_entity *entity = sched_job->s_entity;
 	bool added, first = false;
 
@@ -261,7 +303,7 @@
 
 	/* first job wakes up scheduler */
 	if (first)
-		amd_sched_wakeup(sched_job->sched);
+		amd_sched_wakeup(sched);
 
 	return added;
 }
@@ -273,22 +315,13 @@
  *
  * Returns 0 for success, negative error code otherwise.
  */
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
-	struct amd_sched_fence *fence = amd_sched_fence_create(
-		entity, sched_job->owner);
 
-	if (!fence)
-		return -ENOMEM;
-
-	fence_get(&fence->base);
-	sched_job->s_fence = fence;
-
+	trace_amd_sched_job(sched_job);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
-	trace_amd_sched_job(sched_job);
-	return 0;
 }
 
 /**
@@ -310,22 +343,22 @@
 }
 
 /**
- * Select next to run
+ * Select next entity to process
 */
-static struct amd_sched_job *
-amd_sched_select_job(struct amd_gpu_scheduler *sched)
+static struct amd_sched_entity *
+amd_sched_select_entity(struct amd_gpu_scheduler *sched)
 {
-	struct amd_sched_job *sched_job;
+	struct amd_sched_entity *entity;
 
 	if (!amd_sched_ready(sched))
 		return NULL;
 
 	/* Kernel run queue has higher priority than normal run queue*/
-	sched_job = amd_sched_rq_select_job(&sched->kernel_rq);
-	if (sched_job == NULL)
-		sched_job = amd_sched_rq_select_job(&sched->sched_rq);
+	entity = amd_sched_rq_select_entity(&sched->kernel_rq);
+	if (entity == NULL)
+		entity = amd_sched_rq_select_entity(&sched->sched_rq);
 
-	return sched_job;
+	return entity;
 }
 
 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
@@ -343,6 +376,7 @@
 		list_del_init(&s_fence->list);
 		spin_unlock_irqrestore(&sched->fence_list_lock, flags);
 	}
+	trace_amd_sched_process_job(s_fence);
 	fence_put(&s_fence->base);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
@@ -386,13 +420,16 @@
 		unsigned long flags;
 
 		wait_event_interruptible(sched->wake_up_worker,
-			kthread_should_stop() ||
-			(sched_job = amd_sched_select_job(sched)));
+			(entity = amd_sched_select_entity(sched)) ||
+			kthread_should_stop());
 
+		if (!entity)
+			continue;
+
+		sched_job = amd_sched_entity_pop_job(entity);
 		if (!sched_job)
 			continue;
 
-		entity = sched_job->s_entity;
 		s_fence = sched_job->s_fence;
 
 		if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
@@ -405,6 +442,7 @@
 
 		atomic_inc(&sched->hw_rq_count);
 		fence = sched->ops->run_job(sched_job);
+		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
 			r = fence_add_callback(fence, &s_fence->cb,
 					       amd_sched_process_job);
@@ -450,6 +488,13 @@
 	init_waitqueue_head(&sched->wake_up_worker);
 	init_waitqueue_head(&sched->job_scheduled);
 	atomic_set(&sched->hw_rq_count, 0);
+	if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
+		sched_fence_slab = kmem_cache_create(
+			"amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+			SLAB_HWCACHE_ALIGN, NULL);
+		if (!sched_fence_slab)
+			return -ENOMEM;
+	}
 
 	/* Each scheduler will run on a seperate kernel thread */
 	sched->thread = kthread_run(amd_sched_main, sched, sched->name);
@@ -470,4 +515,6 @@
 {
 	if (sched->thread)
 		kthread_stop(sched->thread);
+	if (atomic_dec_and_test(&sched_fence_slab_ref))
+		kmem_cache_destroy(sched_fence_slab);
 }
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 929e9ac..a0f0ae5 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,9 +27,14 @@
 #include <linux/kfifo.h>
 #include <linux/fence.h>
 
+#define AMD_SCHED_FENCE_SCHEDULED_BIT	FENCE_FLAG_USER_BITS
+
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
+extern struct kmem_cache *sched_fence_slab;
+extern atomic_t sched_fence_slab_ref;
+
 /**
  * A scheduler entity is a wrapper around a job queue or a group
  * of other entities. Entities take turns emitting jobs from their 
@@ -65,6 +70,7 @@
 struct amd_sched_fence {
 	struct fence                    base;
 	struct fence_cb                 cb;
+	struct list_head		scheduled_cb;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
@@ -76,7 +82,6 @@
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
-	void		                *owner;
 };
 
 extern const struct fence_ops amd_sched_fence_ops;
@@ -128,11 +133,11 @@
 			  uint32_t jobs);
 void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 			   struct amd_sched_entity *entity);
-int amd_sched_entity_push_job(struct amd_sched_job *sched_job);
+void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 
 struct amd_sched_fence *amd_sched_fence_create(
 	struct amd_sched_entity *s_entity, void *owner);
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
 void amd_sched_fence_signal(struct amd_sched_fence *fence);
 
-
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index d802638..87c78ee 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -32,9 +32,11 @@
 	struct amd_sched_fence *fence = NULL;
 	unsigned seq;
 
-	fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL);
+	fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
 	if (fence == NULL)
 		return NULL;
+
+	INIT_LIST_HEAD(&fence->scheduled_cb);
 	fence->owner = owner;
 	fence->sched = s_entity->sched;
 	spin_lock_init(&fence->lock);
@@ -55,6 +57,17 @@
 		FENCE_TRACE(&fence->base, "was already signaled\n");
 }
 
+void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+{
+	struct fence_cb *cur, *tmp;
+
+	set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
+	list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
+		list_del_init(&cur->node);
+		cur->func(&s_fence->base, cur);
+	}
+}
+
 static const char *amd_sched_fence_get_driver_name(struct fence *fence)
 {
 	return "amd_sched";
@@ -71,11 +84,17 @@
 	return true;
 }
 
+static void amd_sched_fence_release(struct fence *f)
+{
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+	kmem_cache_free(sched_fence_slab, fence);
+}
+
 const struct fence_ops amd_sched_fence_ops = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = fence_default_wait,
-	.release = NULL,
+	.release = amd_sched_fence_release,
 };
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index cebcab5..9bdc28c 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -1216,14 +1216,14 @@
 				       &armada_primary_plane_funcs,
 				       armada_primary_formats,
 				       ARRAY_SIZE(armada_primary_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		return ret;
 	}
 
 	ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
-					&armada_crtc_funcs);
+					&armada_crtc_funcs, NULL);
 	if (ret)
 		goto err_crtc_init;
 
diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c
index 1c90969..5fa4bf2 100644
--- a/drivers/gpu/drm/armada/armada_fb.c
+++ b/drivers/gpu/drm/armada/armada_fb.c
@@ -35,7 +35,7 @@
 };
 
 struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev,
-	struct drm_mode_fb_cmd2 *mode, struct armada_gem_object *obj)
+	const struct drm_mode_fb_cmd2 *mode, struct armada_gem_object *obj)
 {
 	struct armada_framebuffer *dfb;
 	uint8_t format, config;
@@ -101,7 +101,7 @@
 }
 
 static struct drm_framebuffer *armada_fb_create(struct drm_device *dev,
-	struct drm_file *dfile, struct drm_mode_fb_cmd2 *mode)
+	struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode)
 {
 	struct armada_gem_object *obj;
 	struct armada_framebuffer *dfb;
diff --git a/drivers/gpu/drm/armada/armada_fb.h b/drivers/gpu/drm/armada/armada_fb.h
index ce3f12e..48073c4 100644
--- a/drivers/gpu/drm/armada/armada_fb.h
+++ b/drivers/gpu/drm/armada/armada_fb.h
@@ -19,6 +19,6 @@
 #define drm_fb_obj(fb) drm_fb_to_armada_fb(fb)->obj
 
 struct armada_framebuffer *armada_framebuffer_create(struct drm_device *,
-	struct drm_mode_fb_cmd2 *, struct armada_gem_object *);
+	const struct drm_mode_fb_cmd2 *, struct armada_gem_object *);
 
 #endif
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index 5c22b38..148e8a4 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -460,7 +460,7 @@
 				       &armada_ovl_plane_funcs,
 				       armada_ovl_formats,
 				       ARRAY_SIZE(armada_ovl_formats),
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (ret) {
 		kfree(dplane);
 		return ret;
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 05f6522..eb57159 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -256,7 +256,6 @@
 struct ast_fbdev {
 	struct drm_fb_helper helper;
 	struct ast_framebuffer afb;
-	struct list_head fbdev_list;
 	void *sysram;
 	int size;
 	struct ttm_bo_kmap_obj mapping;
@@ -309,7 +308,7 @@
 
 int ast_framebuffer_init(struct drm_device *dev,
 			 struct ast_framebuffer *ast_fb,
-			 struct drm_mode_fb_cmd2 *mode_cmd,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
 			 struct drm_gem_object *obj);
 
 int ast_fbdev_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index a37e7ea..5320f8c 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -163,7 +163,7 @@
 };
 
 static int astfb_create_object(struct ast_fbdev *afbdev,
-			       struct drm_mode_fb_cmd2 *mode_cmd,
+			       const struct drm_mode_fb_cmd2 *mode_cmd,
 			       struct drm_gem_object **gobj_p)
 {
 	struct drm_device *dev = afbdev->helper.dev;
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 541a610..9759009 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -309,7 +309,7 @@
 
 int ast_framebuffer_init(struct drm_device *dev,
 			 struct ast_framebuffer *ast_fb,
-			 struct drm_mode_fb_cmd2 *mode_cmd,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
 			 struct drm_gem_object *obj)
 {
 	int ret;
@@ -327,7 +327,7 @@
 static struct drm_framebuffer *
 ast_user_framebuffer_create(struct drm_device *dev,
 	       struct drm_file *filp,
-	       struct drm_mode_fb_cmd2 *mode_cmd)
+	       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct ast_framebuffer *ast_fb;
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 69d19f3..0123458 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -751,7 +751,7 @@
 		return -ENOMEM;
 
 	drm_encoder_init(dev, &ast_encoder->base, &ast_enc_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(&ast_encoder->base, &ast_enc_helper_funcs);
 
 	ast_encoder->base.possible_crtcs = 1;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 9f6e234..468a14f 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -344,7 +344,7 @@
 	ret = drm_crtc_init_with_planes(dev, &crtc->base,
 				&planes->primary->base,
 				planes->cursor ? &planes->cursor->base : NULL,
-				&atmel_hlcdc_crtc_funcs);
+				&atmel_hlcdc_crtc_funcs, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 244df0a..8168954 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -402,7 +402,7 @@
 }
 
 static struct drm_framebuffer *atmel_hlcdc_fb_create(struct drm_device *dev,
-		struct drm_file *file_priv, struct drm_mode_fb_cmd2 *mode_cmd)
+		struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	return drm_fb_cma_create(dev, file_priv, mode_cmd);
 }
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 067e4c1..d112900 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -256,7 +256,7 @@
 			       &atmel_hlcdc_panel_encoder_helper_funcs);
 	ret = drm_encoder_init(dev, &panel->base.encoder,
 			       &atmel_hlcdc_panel_encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index d0299ae..1ffe9c3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -941,7 +941,7 @@
 	ret = drm_universal_plane_init(dev, &plane->base, 0,
 				       &layer_plane_funcs,
 				       desc->formats->formats,
-				       desc->formats->nformats, type);
+				       desc->formats->nformats, type, NULL);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 71f2687..19b5ada 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -149,7 +149,7 @@
 
 int bochs_framebuffer_init(struct drm_device *dev,
 			   struct bochs_framebuffer *gfb,
-			   struct drm_mode_fb_cmd2 *mode_cmd,
+			   const struct drm_mode_fb_cmd2 *mode_cmd,
 			   struct drm_gem_object *obj);
 int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag, u64 *gpu_addr);
 int bochs_bo_unpin(struct bochs_bo *bo);
diff --git a/drivers/gpu/drm/bochs/bochs_fbdev.c b/drivers/gpu/drm/bochs/bochs_fbdev.c
index 09a0637..7520bf8 100644
--- a/drivers/gpu/drm/bochs/bochs_fbdev.c
+++ b/drivers/gpu/drm/bochs/bochs_fbdev.c
@@ -34,7 +34,7 @@
 };
 
 static int bochsfb_create_object(struct bochs_device *bochs,
-				 struct drm_mode_fb_cmd2 *mode_cmd,
+				 const struct drm_mode_fb_cmd2 *mode_cmd,
 				 struct drm_gem_object **gobj_p)
 {
 	struct drm_device *dev = bochs->dev;
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 26bcd03..a88be6d 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -196,7 +196,7 @@
 
 	encoder->possible_crtcs = 0x1;
 	drm_encoder_init(dev, encoder, &bochs_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &bochs_encoder_helper_funcs);
 }
 
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index f69e6bf..d812ad0 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -484,7 +484,7 @@
 
 int bochs_framebuffer_init(struct drm_device *dev,
 			   struct bochs_framebuffer *gfb,
-			   struct drm_mode_fb_cmd2 *mode_cmd,
+			   const struct drm_mode_fb_cmd2 *mode_cmd,
 			   struct drm_gem_object *obj)
 {
 	int ret;
@@ -502,7 +502,7 @@
 static struct drm_framebuffer *
 bochs_user_framebuffer_create(struct drm_device *dev,
 			      struct drm_file *filp,
-			      struct drm_mode_fb_cmd2 *mode_cmd)
+			      const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct bochs_framebuffer *bochs_fb;
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
index 7050615..b774d63 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.h
@@ -153,7 +153,6 @@
 struct cirrus_fbdev {
 	struct drm_fb_helper helper;
 	struct cirrus_framebuffer gfb;
-	struct list_head fbdev_list;
 	void *sysram;
 	int size;
 	int x1, y1, x2, y2; /* dirty rect */
@@ -207,7 +206,7 @@
 
 int cirrus_framebuffer_init(struct drm_device *dev,
 			   struct cirrus_framebuffer *gfb,
-			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    const struct drm_mode_fb_cmd2 *mode_cmd,
 			    struct drm_gem_object *obj);
 
 bool cirrus_check_framebuffer(struct cirrus_device *cdev, int width, int height,
diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
index 589103b..3b5be72 100644
--- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c
+++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
@@ -135,7 +135,7 @@
 };
 
 static int cirrusfb_create_object(struct cirrus_fbdev *afbdev,
-			       struct drm_mode_fb_cmd2 *mode_cmd,
+			       const struct drm_mode_fb_cmd2 *mode_cmd,
 			       struct drm_gem_object **gobj_p)
 {
 	struct drm_device *dev = afbdev->helper.dev;
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 055fd86..0907715 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -29,7 +29,7 @@
 
 int cirrus_framebuffer_init(struct drm_device *dev,
 			    struct cirrus_framebuffer *gfb,
-			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    const struct drm_mode_fb_cmd2 *mode_cmd,
 			    struct drm_gem_object *obj)
 {
 	int ret;
@@ -47,7 +47,7 @@
 static struct drm_framebuffer *
 cirrus_user_framebuffer_create(struct drm_device *dev,
 			       struct drm_file *filp,
-			       struct drm_mode_fb_cmd2 *mode_cmd)
+			       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct cirrus_device *cdev = dev->dev_private;
 	struct drm_gem_object *obj;
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index 61385f2..276719e 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -489,7 +489,7 @@
 	encoder->possible_crtcs = 0x1;
 
 	drm_encoder_init(dev, encoder, &cirrus_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &cirrus_encoder_helper_funcs);
 
 	return encoder;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 7bb3845..6a21e5c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -288,8 +288,8 @@
 	state->crtcs[index] = crtc;
 	crtc_state->state = state;
 
-	DRM_DEBUG_ATOMIC("Added [CRTC:%d] %p state to %p\n",
-			 crtc->base.id, crtc_state, state);
+	DRM_DEBUG_ATOMIC("Added [CRTC:%d:%s] %p state to %p\n",
+			 crtc->base.id, crtc->name, crtc_state, state);
 
 	return crtc_state;
 }
@@ -316,8 +316,7 @@
 	if (mode && memcmp(&state->mode, mode, sizeof(*mode)) == 0)
 		return 0;
 
-	if (state->mode_blob)
-		drm_property_unreference_blob(state->mode_blob);
+	drm_property_unreference_blob(state->mode_blob);
 	state->mode_blob = NULL;
 
 	if (mode) {
@@ -363,8 +362,7 @@
 	if (blob == state->mode_blob)
 		return 0;
 
-	if (state->mode_blob)
-		drm_property_unreference_blob(state->mode_blob);
+	drm_property_unreference_blob(state->mode_blob);
 	state->mode_blob = NULL;
 
 	if (blob) {
@@ -419,8 +417,7 @@
 		struct drm_property_blob *mode =
 			drm_property_lookup_blob(dev, val);
 		ret = drm_atomic_set_mode_prop_for_crtc(state, mode);
-		if (mode)
-			drm_property_unreference_blob(mode);
+		drm_property_unreference_blob(mode);
 		return ret;
 	}
 	else if (crtc->funcs->atomic_set_property)
@@ -432,11 +429,20 @@
 }
 EXPORT_SYMBOL(drm_atomic_crtc_set_property);
 
-/*
+/**
+ * drm_atomic_crtc_get_property - get property value from CRTC state
+ * @crtc: the drm CRTC to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_crtc_get_property(struct drm_crtc *crtc,
@@ -480,8 +486,8 @@
 	 */
 
 	if (state->active && !state->enable) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] active without enabled\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active without enabled\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
@@ -490,15 +496,15 @@
 	 * be able to trigger. */
 	if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) &&
 	    WARN_ON(state->enable && !state->mode_blob)) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] enabled without mode blob\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled without mode blob\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
 	if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) &&
 	    WARN_ON(!state->enable && state->mode_blob)) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] disabled with mode blob\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] disabled with mode blob\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
@@ -543,8 +549,8 @@
 	state->planes[index] = plane;
 	plane_state->state = state;
 
-	DRM_DEBUG_ATOMIC("Added [PLANE:%d] %p state to %p\n",
-			 plane->base.id, plane_state, state);
+	DRM_DEBUG_ATOMIC("Added [PLANE:%d:%s] %p state to %p\n",
+			 plane->base.id, plane->name, plane_state, state);
 
 	if (plane_state->crtc) {
 		struct drm_crtc_state *crtc_state;
@@ -619,11 +625,20 @@
 }
 EXPORT_SYMBOL(drm_atomic_plane_set_property);
 
-/*
+/**
+ * drm_atomic_plane_get_property - get property value from plane state
+ * @plane: the drm plane to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_plane_get_property(struct drm_plane *plane,
@@ -755,8 +770,8 @@
 	}
 
 	if (plane_switching_crtc(state->state, plane, state)) {
-		DRM_DEBUG_ATOMIC("[PLANE:%d] switching CRTC directly\n",
-				 plane->base.id);
+		DRM_DEBUG_ATOMIC("[PLANE:%d:%s] switching CRTC directly\n",
+				 plane->base.id, plane->name);
 		return -EINVAL;
 	}
 
@@ -875,11 +890,20 @@
 }
 EXPORT_SYMBOL(drm_atomic_connector_set_property);
 
-/*
+/**
+ * drm_atomic_connector_get_property - get property value from connector state
+ * @connector: the drm connector to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_connector_get_property(struct drm_connector *connector,
@@ -980,8 +1004,8 @@
 	}
 
 	if (crtc)
-		DRM_DEBUG_ATOMIC("Link plane state %p to [CRTC:%d]\n",
-				 plane_state, crtc->base.id);
+		DRM_DEBUG_ATOMIC("Link plane state %p to [CRTC:%d:%s]\n",
+				 plane_state, crtc->base.id, crtc->name);
 	else
 		DRM_DEBUG_ATOMIC("Link plane state %p to [NOCRTC]\n",
 				 plane_state);
@@ -1048,8 +1072,8 @@
 	conn_state->crtc = crtc;
 
 	if (crtc)
-		DRM_DEBUG_ATOMIC("Link connector state %p to [CRTC:%d]\n",
-				 conn_state, crtc->base.id);
+		DRM_DEBUG_ATOMIC("Link connector state %p to [CRTC:%d:%s]\n",
+				 conn_state, crtc->base.id, crtc->name);
 	else
 		DRM_DEBUG_ATOMIC("Link connector state %p to [NOCRTC]\n",
 				 conn_state);
@@ -1088,8 +1112,8 @@
 	if (ret)
 		return ret;
 
-	DRM_DEBUG_ATOMIC("Adding all current connectors for [CRTC:%d] to %p\n",
-			 crtc->base.id, state);
+	DRM_DEBUG_ATOMIC("Adding all current connectors for [CRTC:%d:%s] to %p\n",
+			 crtc->base.id, crtc->name, state);
 
 	/*
 	 * Changed connectors are already in @state, so only need to look at the
@@ -1169,8 +1193,9 @@
 			num_connected_connectors++;
 	}
 
-	DRM_DEBUG_ATOMIC("State %p has %i connectors for [CRTC:%d]\n",
-			 state, num_connected_connectors, crtc->base.id);
+	DRM_DEBUG_ATOMIC("State %p has %i connectors for [CRTC:%d:%s]\n",
+			 state, num_connected_connectors,
+			 crtc->base.id, crtc->name);
 
 	return num_connected_connectors;
 }
@@ -1191,12 +1216,7 @@
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex,
-			       state->acquire_ctx);
-	if (ret)
-		goto retry;
-	ret = drm_modeset_lock_all_crtcs(state->dev,
-					 state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
 }
@@ -1228,8 +1248,8 @@
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		ret = drm_atomic_plane_check(plane, plane_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[PLANE:%d] atomic core check failed\n",
-					 plane->base.id);
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic core check failed\n",
+					 plane->base.id, plane->name);
 			return ret;
 		}
 	}
@@ -1237,8 +1257,8 @@
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		ret = drm_atomic_crtc_check(crtc, crtc_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] atomic core check failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic core check failed\n",
+					 crtc->base.id, crtc->name);
 			return ret;
 		}
 	}
@@ -1249,8 +1269,8 @@
 	if (!state->allow_modeset) {
 		for_each_crtc_in_state(state, crtc, crtc_state, i) {
 			if (drm_atomic_crtc_needs_modeset(crtc_state)) {
-				DRM_DEBUG_ATOMIC("[CRTC:%d] requires full modeset\n",
-						 crtc->base.id);
+				DRM_DEBUG_ATOMIC("[CRTC:%d:%s] requires full modeset\n",
+						 crtc->base.id, crtc->name);
 				return -EINVAL;
 			}
 		}
@@ -1432,6 +1452,45 @@
 	return ret;
 }
 
+/**
+ * drm_atomic_clean_old_fb -- Unset old_fb pointers and set plane->fb pointers.
+ *
+ * @dev: drm device to check.
+ * @plane_mask: plane mask for planes that were updated.
+ * @ret: return value, can be -EDEADLK for a retry.
+ *
+ * Before doing an update plane->old_fb is set to plane->fb,
+ * but before dropping the locks old_fb needs to be set to NULL
+ * and plane->fb updated. This is a common operation for each
+ * atomic update, so this call is split off as a helper.
+ */
+void drm_atomic_clean_old_fb(struct drm_device *dev,
+			     unsigned plane_mask,
+			     int ret)
+{
+	struct drm_plane *plane;
+
+	/* if succeeded, fixup legacy plane crtc/fb ptrs before dropping
+	 * locks (ie. while it is still safe to deref plane->state).  We
+	 * need to do this here because the driver entry points cannot
+	 * distinguish between legacy and atomic ioctls.
+	 */
+	drm_for_each_plane_mask(plane, dev, plane_mask) {
+		if (ret == 0) {
+			struct drm_framebuffer *new_fb = plane->state->fb;
+			if (new_fb)
+				drm_framebuffer_reference(new_fb);
+			plane->fb = new_fb;
+			plane->crtc = plane->state->crtc;
+
+			if (plane->old_fb)
+				drm_framebuffer_unreference(plane->old_fb);
+		}
+		plane->old_fb = NULL;
+	}
+}
+EXPORT_SYMBOL(drm_atomic_clean_old_fb);
+
 int drm_mode_atomic_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv)
 {
@@ -1446,7 +1505,7 @@
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
-	unsigned plane_mask = 0;
+	unsigned plane_mask;
 	int ret = 0;
 	unsigned int i, j;
 
@@ -1486,6 +1545,7 @@
 	state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET);
 
 retry:
+	plane_mask = 0;
 	copied_objs = 0;
 	copied_props = 0;
 
@@ -1576,24 +1636,7 @@
 	}
 
 out:
-	/* if succeeded, fixup legacy plane crtc/fb ptrs before dropping
-	 * locks (ie. while it is still safe to deref plane->state).  We
-	 * need to do this here because the driver entry points cannot
-	 * distinguish between legacy and atomic ioctls.
-	 */
-	drm_for_each_plane_mask(plane, dev, plane_mask) {
-		if (ret == 0) {
-			struct drm_framebuffer *new_fb = plane->state->fb;
-			if (new_fb)
-				drm_framebuffer_reference(new_fb);
-			plane->fb = new_fb;
-			plane->crtc = plane->state->crtc;
-
-			if (plane->old_fb)
-				drm_framebuffer_unreference(plane->old_fb);
-		}
-		plane->old_fb = NULL;
-	}
+	drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret && arg->flags & DRM_MODE_PAGE_FLIP_EVENT) {
 		/*
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 0c6f621..63f925b 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -52,6 +52,12 @@
  * drm_atomic_helper_disable_plane(), drm_atomic_helper_disable_plane() and the
  * various functions to implement set_property callbacks. New drivers must not
  * implement these functions themselves but must use the provided helpers.
+ *
+ * The atomic helper uses the same function table structures as all other
+ * modesetting helpers. See the documentation for struct &drm_crtc_helper_funcs,
+ * struct &drm_encoder_helper_funcs and struct &drm_connector_helper_funcs. It
+ * also shares the struct &drm_plane_helper_funcs function table with the plane
+ * helpers.
  */
 static void
 drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
@@ -80,6 +86,27 @@
 	}
 }
 
+static bool
+check_pending_encoder_assignment(struct drm_atomic_state *state,
+				 struct drm_encoder *new_encoder,
+				 struct drm_connector *new_connector)
+{
+	struct drm_connector *connector;
+	struct drm_connector_state *conn_state;
+	int i;
+
+	for_each_connector_in_state(state, connector, conn_state, i) {
+		if (conn_state->best_encoder != new_encoder)
+			continue;
+
+		/* encoder already assigned and we're trying to re-steal it! */
+		if (connector->state->best_encoder != conn_state->best_encoder)
+			return false;
+	}
+
+	return true;
+}
+
 static struct drm_crtc *
 get_current_crtc_for_encoder(struct drm_device *dev,
 			     struct drm_encoder *encoder)
@@ -116,9 +143,9 @@
 	 */
 	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
 
-	DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d], stealing it\n",
+	DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n",
 			 encoder->base.id, encoder->name,
-			 encoder_crtc->base.id);
+			 encoder_crtc->base.id, encoder_crtc->name);
 
 	crtc_state = drm_atomic_get_crtc_state(state, encoder_crtc);
 	if (IS_ERR(crtc_state))
@@ -210,17 +237,33 @@
 		return -EINVAL;
 	}
 
+	if (!drm_encoder_crtc_ok(new_encoder, connector_state->crtc)) {
+		DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] incompatible with [CRTC:%d]\n",
+				 new_encoder->base.id,
+				 new_encoder->name,
+				 connector_state->crtc->base.id);
+		return -EINVAL;
+	}
+
 	if (new_encoder == connector_state->best_encoder) {
-		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d]\n",
+		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d:%s]\n",
 				 connector->base.id,
 				 connector->name,
 				 new_encoder->base.id,
 				 new_encoder->name,
-				 connector_state->crtc->base.id);
+				 connector_state->crtc->base.id,
+				 connector_state->crtc->name);
 
 		return 0;
 	}
 
+	if (!check_pending_encoder_assignment(state, new_encoder, connector)) {
+		DRM_DEBUG_ATOMIC("Encoder for [CONNECTOR:%d:%s] already assigned\n",
+				 connector->base.id,
+				 connector->name);
+		return -EINVAL;
+	}
+
 	encoder_crtc = get_current_crtc_for_encoder(state->dev,
 						    new_encoder);
 
@@ -243,12 +286,13 @@
 	crtc_state = state->crtc_states[idx];
 	crtc_state->connectors_changed = true;
 
-	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d]\n",
+	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
 			 connector->base.id,
 			 connector->name,
 			 new_encoder->base.id,
 			 new_encoder->name,
-			 connector_state->crtc->base.id);
+			 connector_state->crtc->base.id,
+			 connector_state->crtc->name);
 
 	return 0;
 }
@@ -332,8 +376,8 @@
 		ret = funcs->mode_fixup(crtc, &crtc_state->mode,
 					&crtc_state->adjusted_mode);
 		if (!ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] fixup failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] fixup failed\n",
+					 crtc->base.id, crtc->name);
 			return -EINVAL;
 		}
 	}
@@ -380,14 +424,14 @@
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		if (!drm_mode_equal(&crtc->state->mode, &crtc_state->mode)) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] mode changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n",
+					 crtc->base.id, crtc->name);
 			crtc_state->mode_changed = true;
 		}
 
 		if (crtc->state->enable != crtc_state->enable) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] enable changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enable changed\n",
+					 crtc->base.id, crtc->name);
 
 			/*
 			 * For clarity this assignment is done here, but
@@ -428,18 +472,18 @@
 		 * a full modeset because update_connector_routing force that.
 		 */
 		if (crtc->state->active != crtc_state->active) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] active changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active changed\n",
+					 crtc->base.id, crtc->name);
 			crtc_state->active_changed = true;
 		}
 
 		if (!drm_atomic_crtc_needs_modeset(crtc_state))
 			continue;
 
-		DRM_DEBUG_ATOMIC("[CRTC:%d] needs all connectors, enable: %c, active: %c\n",
-				 crtc->base.id,
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] needs all connectors, enable: %c, active: %c\n",
+				 crtc->base.id, crtc->name,
 				 crtc_state->enable ? 'y' : 'n',
-			      crtc_state->active ? 'y' : 'n');
+				 crtc_state->active ? 'y' : 'n');
 
 		ret = drm_atomic_add_affected_connectors(state, crtc);
 		if (ret != 0)
@@ -453,8 +497,8 @@
 								crtc);
 
 		if (crtc_state->enable != !!num_connectors) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] enabled/connectors mismatch\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled/connectors mismatch\n",
+					 crtc->base.id, crtc->name);
 
 			return -EINVAL;
 		}
@@ -501,8 +545,8 @@
 
 		ret = funcs->atomic_check(plane, plane_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[PLANE:%d] atomic driver check failed\n",
-					 plane->base.id);
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n",
+					 plane->base.id, plane->name);
 			return ret;
 		}
 	}
@@ -517,8 +561,8 @@
 
 		ret = funcs->atomic_check(crtc, state->crtc_states[i]);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] atomic driver check failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n",
+					 crtc->base.id, crtc->name);
 			return ret;
 		}
 	}
@@ -631,8 +675,8 @@
 
 		funcs = crtc->helper_private;
 
-		DRM_DEBUG_ATOMIC("disabling [CRTC:%d]\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("disabling [CRTC:%d:%s]\n",
+				 crtc->base.id, crtc->name);
 
 
 		/* Right function depends upon target state. */
@@ -743,8 +787,8 @@
 		funcs = crtc->helper_private;
 
 		if (crtc->state->enable && funcs->mode_set_nofb) {
-			DRM_DEBUG_ATOMIC("modeset on [CRTC:%d]\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("modeset on [CRTC:%d:%s]\n",
+					 crtc->base.id, crtc->name);
 
 			funcs->mode_set_nofb(crtc);
 		}
@@ -843,8 +887,8 @@
 		funcs = crtc->helper_private;
 
 		if (crtc->state->enable) {
-			DRM_DEBUG_ATOMIC("enabling [CRTC:%d]\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
+					 crtc->base.id, crtc->name);
 
 			if (funcs->enable)
 				funcs->enable(crtc);
@@ -1334,6 +1378,49 @@
 EXPORT_SYMBOL(drm_atomic_helper_commit_planes_on_crtc);
 
 /**
+ * drm_atomic_helper_disable_planes_on_crtc - helper to disable CRTC's planes
+ * @crtc: CRTC
+ * @atomic: if set, synchronize with CRTC's atomic_begin/flush hooks
+ *
+ * Disables all planes associated with the given CRTC. This can be
+ * used for instance in the CRTC helper disable callback to disable
+ * all planes before shutting down the display pipeline.
+ *
+ * If the atomic-parameter is set the function calls the CRTC's
+ * atomic_begin hook before and atomic_flush hook after disabling the
+ * planes.
+ *
+ * It is a bug to call this function without having implemented the
+ * ->atomic_disable() plane hook.
+ */
+void drm_atomic_helper_disable_planes_on_crtc(struct drm_crtc *crtc,
+					      bool atomic)
+{
+	const struct drm_crtc_helper_funcs *crtc_funcs =
+		crtc->helper_private;
+	struct drm_plane *plane;
+
+	if (atomic && crtc_funcs && crtc_funcs->atomic_begin)
+		crtc_funcs->atomic_begin(crtc, NULL);
+
+	drm_for_each_plane(plane, crtc->dev) {
+		const struct drm_plane_helper_funcs *plane_funcs =
+			plane->helper_private;
+
+		if (plane->state->crtc != crtc || !plane_funcs)
+			continue;
+
+		WARN_ON(!plane_funcs->atomic_disable);
+		if (plane_funcs->atomic_disable)
+			plane_funcs->atomic_disable(plane, NULL);
+	}
+
+	if (atomic && crtc_funcs && crtc_funcs->atomic_flush)
+		crtc_funcs->atomic_flush(crtc, NULL);
+}
+EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc);
+
+/**
  * drm_atomic_helper_cleanup_planes - cleanup plane resources after commit
  * @dev: DRM device
  * @old_state: atomic state object with old state structures
@@ -1477,12 +1564,12 @@
 	drm_atomic_set_fb_for_plane(plane_state, fb);
 	plane_state->crtc_x = crtc_x;
 	plane_state->crtc_y = crtc_y;
-	plane_state->crtc_h = crtc_h;
 	plane_state->crtc_w = crtc_w;
+	plane_state->crtc_h = crtc_h;
 	plane_state->src_x = src_x;
 	plane_state->src_y = src_y;
-	plane_state->src_h = src_h;
 	plane_state->src_w = src_w;
+	plane_state->src_h = src_h;
 
 	if (plane == crtc->cursor)
 		state->legacy_cursor_update = true;
@@ -1553,6 +1640,9 @@
 		goto fail;
 	}
 
+	if (plane_state->crtc && (plane == plane->crtc->cursor))
+		plane_state->state->legacy_cursor_update = true;
+
 	ret = __drm_atomic_helper_disable_plane(plane, plane_state);
 	if (ret != 0)
 		goto fail;
@@ -1598,15 +1688,12 @@
 	drm_atomic_set_fb_for_plane(plane_state, NULL);
 	plane_state->crtc_x = 0;
 	plane_state->crtc_y = 0;
-	plane_state->crtc_h = 0;
 	plane_state->crtc_w = 0;
+	plane_state->crtc_h = 0;
 	plane_state->src_x = 0;
 	plane_state->src_y = 0;
-	plane_state->src_h = 0;
 	plane_state->src_w = 0;
-
-	if (plane->crtc && (plane == plane->crtc->cursor))
-		plane_state->state->legacy_cursor_update = true;
+	plane_state->src_h = 0;
 
 	return 0;
 }
@@ -1741,6 +1828,7 @@
 	struct drm_crtc_state *crtc_state;
 	struct drm_plane_state *primary_state;
 	struct drm_crtc *crtc = set->crtc;
+	int hdisplay, vdisplay;
 	int ret;
 
 	crtc_state = drm_atomic_get_crtc_state(state, crtc);
@@ -1783,19 +1871,21 @@
 	if (ret != 0)
 		return ret;
 
+	drm_crtc_get_hv_timing(set->mode, &hdisplay, &vdisplay);
+
 	drm_atomic_set_fb_for_plane(primary_state, set->fb);
 	primary_state->crtc_x = 0;
 	primary_state->crtc_y = 0;
-	primary_state->crtc_h = set->mode->vdisplay;
-	primary_state->crtc_w = set->mode->hdisplay;
+	primary_state->crtc_w = hdisplay;
+	primary_state->crtc_h = vdisplay;
 	primary_state->src_x = set->x << 16;
 	primary_state->src_y = set->y << 16;
 	if (primary_state->rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270))) {
-		primary_state->src_h = set->mode->hdisplay << 16;
-		primary_state->src_w = set->mode->vdisplay << 16;
+		primary_state->src_w = vdisplay << 16;
+		primary_state->src_h = hdisplay << 16;
 	} else {
-		primary_state->src_h = set->mode->vdisplay << 16;
-		primary_state->src_w = set->mode->hdisplay << 16;
+		primary_state->src_w = hdisplay << 16;
+		primary_state->src_h = vdisplay << 16;
 	}
 
 commit:
@@ -1807,6 +1897,161 @@
 }
 
 /**
+ * drm_atomic_helper_disable_all - disable all currently active outputs
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * Loops through all connectors, finding those that aren't turned off and then
+ * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
+ * that they are connected to.
+ *
+ * This is used for example in suspend/resume to disable all currently active
+ * functions when suspending.
+ *
+ * Note that if callers haven't already acquired all modeset locks this might
+ * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
+ */
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+				  struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_atomic_state *state;
+	struct drm_connector *conn;
+	int err;
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state)
+		return -ENOMEM;
+
+	state->acquire_ctx = ctx;
+
+	drm_for_each_connector(conn, dev) {
+		struct drm_crtc *crtc = conn->state->crtc;
+		struct drm_crtc_state *crtc_state;
+
+		if (!crtc || conn->dpms != DRM_MODE_DPMS_ON)
+			continue;
+
+		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (IS_ERR(crtc_state)) {
+			err = PTR_ERR(crtc_state);
+			goto free;
+		}
+
+		crtc_state->active = false;
+	}
+
+	err = drm_atomic_commit(state);
+
+free:
+	if (err < 0)
+		drm_atomic_state_free(state);
+
+	return err;
+}
+EXPORT_SYMBOL(drm_atomic_helper_disable_all);
+
+/**
+ * drm_atomic_helper_suspend - subsystem-level suspend helper
+ * @dev: DRM device
+ *
+ * Duplicates the current atomic state, disables all active outputs and then
+ * returns a pointer to the original atomic state to the caller. Drivers can
+ * pass this pointer to the drm_atomic_helper_resume() helper upon resume to
+ * restore the output configuration that was active at the time the system
+ * entered suspend.
+ *
+ * Note that it is potentially unsafe to use this. The atomic state object
+ * returned by this function is assumed to be persistent. Drivers must ensure
+ * that this holds true. Before calling this function, drivers must make sure
+ * to suspend fbdev emulation so that nothing can be using the device.
+ *
+ * Returns:
+ * A pointer to a copy of the state before suspend on success or an ERR_PTR()-
+ * encoded error code on failure. Drivers should store the returned atomic
+ * state object and pass it to the drm_atomic_helper_resume() helper upon
+ * resume.
+ *
+ * See also:
+ * drm_atomic_helper_duplicate_state(), drm_atomic_helper_disable_all(),
+ * drm_atomic_helper_resume()
+ */
+struct drm_atomic_state *drm_atomic_helper_suspend(struct drm_device *dev)
+{
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_atomic_state *state;
+	int err;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+retry:
+	err = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (err < 0) {
+		state = ERR_PTR(err);
+		goto unlock;
+	}
+
+	state = drm_atomic_helper_duplicate_state(dev, &ctx);
+	if (IS_ERR(state))
+		goto unlock;
+
+	err = drm_atomic_helper_disable_all(dev, &ctx);
+	if (err < 0) {
+		drm_atomic_state_free(state);
+		state = ERR_PTR(err);
+		goto unlock;
+	}
+
+unlock:
+	if (PTR_ERR(state) == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+	return state;
+}
+EXPORT_SYMBOL(drm_atomic_helper_suspend);
+
+/**
+ * drm_atomic_helper_resume - subsystem-level resume helper
+ * @dev: DRM device
+ * @state: atomic state to resume to
+ *
+ * Calls drm_mode_config_reset() to synchronize hardware and software states,
+ * grabs all modeset locks and commits the atomic state object. This can be
+ * used in conjunction with the drm_atomic_helper_suspend() helper to
+ * implement suspend/resume for drivers that support atomic mode-setting.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend()
+ */
+int drm_atomic_helper_resume(struct drm_device *dev,
+			     struct drm_atomic_state *state)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	int err;
+
+	drm_mode_config_reset(dev);
+	drm_modeset_lock_all(dev);
+	state->acquire_ctx = config->acquire_ctx;
+	err = drm_atomic_commit(state);
+	drm_modeset_unlock_all(dev);
+
+	return err;
+}
+EXPORT_SYMBOL(drm_atomic_helper_resume);
+
+/**
  * drm_atomic_helper_crtc_set_property - helper for crtc properties
  * @crtc: DRM crtc
  * @property: DRM property
@@ -2162,6 +2407,12 @@
  * The simpler solution is to just reset the software state to everything off,
  * which is easiest to do by calling drm_mode_config_reset(). To facilitate this
  * the atomic helpers provide default reset implementations for all hooks.
+ *
+ * On the upside the precise state tracking of atomic simplifies system suspend
+ * and resume a lot. For drivers using drm_mode_config_reset() a complete recipe
+ * is implemented in drm_atomic_helper_suspend() and drm_atomic_helper_resume().
+ * For other drivers the building blocks are split out, see the documentation
+ * for these functions.
  */
 
 /**
@@ -2173,7 +2424,7 @@
  */
 void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc)
 {
-	if (crtc->state && crtc->state->mode_blob)
+	if (crtc->state)
 		drm_property_unreference_blob(crtc->state->mode_blob);
 	kfree(crtc->state);
 	crtc->state = kzalloc(sizeof(*crtc->state), GFP_KERNEL);
@@ -2241,8 +2492,7 @@
 void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
 					    struct drm_crtc_state *state)
 {
-	if (state->mode_blob)
-		drm_property_unreference_blob(state->mode_blob);
+	drm_property_unreference_blob(state->mode_blob);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_crtc_destroy_state);
 
@@ -2419,7 +2669,9 @@
  * @ctx: lock acquisition context
  *
  * Makes a copy of the current atomic state by looping over all objects and
- * duplicating their respective states.
+ * duplicating their respective states. This is used for example by suspend/
+ * resume support code to save the state prior to suspend such that it can
+ * be restored upon resume.
  *
  * Note that this treats atomic state as persistent between save and restore.
  * Drivers must make sure that this is possible and won't result in confusion
@@ -2431,6 +2683,9 @@
  * Returns:
  * A pointer to the copy of the atomic state object on success or an
  * ERR_PTR()-encoded error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
  */
 struct drm_atomic_state *
 drm_atomic_helper_duplicate_state(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 6b8f721..bd93453 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -31,14 +31,14 @@
 /**
  * DOC: overview
  *
- * drm_bridge represents a device that hangs on to an encoder. These are handy
- * when a regular drm_encoder entity isn't enough to represent the entire
+ * struct &drm_bridge represents a device that hangs on to an encoder. These are
+ * handy when a regular &drm_encoder entity isn't enough to represent the entire
  * encoder chain.
  *
- * A bridge is always associated to a single drm_encoder at a time, but can be
+ * A bridge is always attached to a single &drm_encoder at a time, but can be
  * either connected to it directly, or through an intermediate bridge:
  *
- * encoder ---> bridge B ---> bridge A
+ *     encoder ---> bridge B ---> bridge A
  *
  * Here, the output of the encoder feeds to bridge B, and that furthers feeds to
  * bridge A.
@@ -46,11 +46,16 @@
  * The driver using the bridge is responsible to make the associations between
  * the encoder and bridges. Once these links are made, the bridges will
  * participate along with encoder functions to perform mode_set/enable/disable
- * through the ops provided in drm_bridge_funcs.
+ * through the ops provided in &drm_bridge_funcs.
  *
  * drm_bridge, like drm_panel, aren't drm_mode_object entities like planes,
- * crtcs, encoders or connectors. They just provide additional hooks to get the
- * desired output at the end of the encoder chain.
+ * CRTCs, encoders or connectors and hence are not visible to userspace. They
+ * just provide additional hooks to get the desired output at the end of the
+ * encoder chain.
+ *
+ * Bridges can also be chained up using the next pointer in struct &drm_bridge.
+ *
+ * Both legacy CRTC helpers and the new atomic modeset helpers support bridges.
  */
 
 static DEFINE_MUTEX(bridge_lock);
@@ -122,34 +127,12 @@
 /**
  * DOC: bridge callbacks
  *
- * The drm_bridge_funcs ops are populated by the bridge driver. The drm
- * internals(atomic and crtc helpers) use the helpers defined in drm_bridge.c
- * These helpers call a specific drm_bridge_funcs op for all the bridges
+ * The &drm_bridge_funcs ops are populated by the bridge driver. The DRM
+ * internals (atomic and CRTC helpers) use the helpers defined in drm_bridge.c
+ * These helpers call a specific &drm_bridge_funcs op for all the bridges
  * during encoder configuration.
  *
- * When creating a bridge driver, one can implement drm_bridge_funcs op with
- * the help of these rough rules:
- *
- * pre_enable: this contains things needed to be done for the bridge before
- * its clock and timings are enabled by its source. For a bridge, its source
- * is generally the encoder or bridge just before it in the encoder chain.
- *
- * enable: this contains things needed to be done for the bridge once its
- * source is enabled. In other words, enable is called once the source is
- * ready with clock and timing needed by the bridge.
- *
- * disable: this contains things needed to be done for the bridge assuming
- * that its source is still enabled, i.e. clock and timings are still on.
- *
- * post_disable: this contains things needed to be done for the bridge once
- * its source is disabled, i.e. once clocks and timings are off.
- *
- * mode_fixup: this should fixup the given mode for the bridge. It is called
- * after the encoder's mode fixup. mode_fixup can also reject a mode completely
- * if it's unsuitable for the hardware.
- *
- * mode_set: this sets up the mode for the bridge. It assumes that its source
- * (an encoder or a bridge) has set the mode too.
+ * For detailed specification of the bridge callbacks see &drm_bridge_funcs.
  */
 
 /**
@@ -159,7 +142,7 @@
  * @mode: desired mode to be set for the bridge
  * @adjusted_mode: updated mode that works for this bridge
  *
- * Calls 'mode_fixup' drm_bridge_funcs op for all the bridges in the
+ * Calls ->mode_fixup() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last.
  *
  * Note: the bridge passed should be the one closest to the encoder
@@ -186,11 +169,11 @@
 EXPORT_SYMBOL(drm_bridge_mode_fixup);
 
 /**
- * drm_bridge_disable - calls 'disable' drm_bridge_funcs op for all
+ * drm_bridge_disable - calls ->disable() &drm_bridge_funcs op for all
  *			bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'disable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->disable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the last bridge to the first. These are called before
  * calling the encoder's prepare op.
  *
@@ -208,11 +191,11 @@
 EXPORT_SYMBOL(drm_bridge_disable);
 
 /**
- * drm_bridge_post_disable - calls 'post_disable' drm_bridge_funcs op for
+ * drm_bridge_post_disable - calls ->post_disable() &drm_bridge_funcs op for
  *			     all bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'post_disable' drm_bridge_funcs op for all the bridges in the
+ * Calls ->post_disable() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last. These are called
  * after completing the encoder's prepare op.
  *
@@ -236,7 +219,7 @@
  * @mode: desired mode to be set for the bridge
  * @adjusted_mode: updated mode that works for this bridge
  *
- * Calls 'mode_set' drm_bridge_funcs op for all the bridges in the
+ * Calls ->mode_set() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last.
  *
  * Note: the bridge passed should be the one closest to the encoder
@@ -256,11 +239,11 @@
 EXPORT_SYMBOL(drm_bridge_mode_set);
 
 /**
- * drm_bridge_pre_enable - calls 'pre_enable' drm_bridge_funcs op for all
+ * drm_bridge_pre_enable - calls ->pre_enable() &drm_bridge_funcs op for all
  *			   bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'pre_enable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->pre_enable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the last bridge to the first. These are called
  * before calling the encoder's commit op.
  *
@@ -278,11 +261,11 @@
 EXPORT_SYMBOL(drm_bridge_pre_enable);
 
 /**
- * drm_bridge_enable - calls 'enable' drm_bridge_funcs op for all bridges
+ * drm_bridge_enable - calls ->enable() &drm_bridge_funcs op for all bridges
  *		       in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'enable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->enable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the first bridge to the last. These are called
  * after completing the encoder's commit op.
  *
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 24c5434..62fa95f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -45,7 +45,7 @@
 
 static struct drm_framebuffer *
 internal_framebuffer_create(struct drm_device *dev,
-			    struct drm_mode_fb_cmd2 *r,
+			    const struct drm_mode_fb_cmd2 *r,
 			    struct drm_file *file_priv);
 
 /* Avoid boilerplate.  I'm tired of typing. */
@@ -649,6 +649,18 @@
 
 DEFINE_WW_CLASS(crtc_ww_class);
 
+static unsigned int drm_num_crtcs(struct drm_device *dev)
+{
+	unsigned int num = 0;
+	struct drm_crtc *tmp;
+
+	drm_for_each_crtc(tmp, dev) {
+		num++;
+	}
+
+	return num;
+}
+
 /**
  * drm_crtc_init_with_planes - Initialise a new CRTC object with
  *    specified primary and cursor planes.
@@ -657,6 +669,7 @@
  * @primary: Primary plane for CRTC
  * @cursor: Cursor plane for CRTC
  * @funcs: callbacks for the new CRTC
+ * @name: printf style format string for the CRTC name, or NULL for default name
  *
  * Inits a new object created as base part of a driver crtc object.
  *
@@ -666,7 +679,8 @@
 int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 			      struct drm_plane *primary,
 			      struct drm_plane *cursor,
-			      const struct drm_crtc_funcs *funcs)
+			      const struct drm_crtc_funcs *funcs,
+			      const char *name, ...)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
@@ -682,6 +696,21 @@
 	if (ret)
 		return ret;
 
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		crtc->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		crtc->name = kasprintf(GFP_KERNEL, "crtc-%d",
+				       drm_num_crtcs(dev));
+	}
+	if (!crtc->name) {
+		drm_mode_object_put(dev, &crtc->base);
+		return -ENOMEM;
+	}
+
 	crtc->base.properties = &crtc->properties;
 
 	list_add_tail(&crtc->head, &config->crtc_list);
@@ -728,6 +757,8 @@
 	if (crtc->state && crtc->funcs->atomic_destroy_state)
 		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
 
+	kfree(crtc->name);
+
 	memset(crtc, 0, sizeof(*crtc));
 }
 EXPORT_SYMBOL(drm_crtc_cleanup);
@@ -1075,6 +1106,7 @@
  * @encoder: the encoder to init
  * @funcs: callbacks for this encoder
  * @encoder_type: user visible type of the encoder
+ * @name: printf style format string for the encoder name, or NULL for default name
  *
  * Initialises a preallocated encoder. Encoder should be
  * subclassed as part of driver encoder objects.
@@ -1085,7 +1117,7 @@
 int drm_encoder_init(struct drm_device *dev,
 		      struct drm_encoder *encoder,
 		      const struct drm_encoder_funcs *funcs,
-		      int encoder_type)
+		      int encoder_type, const char *name, ...)
 {
 	int ret;
 
@@ -1098,9 +1130,17 @@
 	encoder->dev = dev;
 	encoder->encoder_type = encoder_type;
 	encoder->funcs = funcs;
-	encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
-				  drm_encoder_enum_list[encoder_type].name,
-				  encoder->base.id);
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		encoder->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
+					  drm_encoder_enum_list[encoder_type].name,
+					  encoder->base.id);
+	}
 	if (!encoder->name) {
 		ret = -ENOMEM;
 		goto out_put;
@@ -1141,6 +1181,18 @@
 }
 EXPORT_SYMBOL(drm_encoder_cleanup);
 
+static unsigned int drm_num_planes(struct drm_device *dev)
+{
+	unsigned int num = 0;
+	struct drm_plane *tmp;
+
+	drm_for_each_plane(tmp, dev) {
+		num++;
+	}
+
+	return num;
+}
+
 /**
  * drm_universal_plane_init - Initialize a new universal plane object
  * @dev: DRM device
@@ -1150,6 +1202,7 @@
  * @formats: array of supported formats (%DRM_FORMAT_*)
  * @format_count: number of elements in @formats
  * @type: type of plane (overlay, primary, cursor)
+ * @name: printf style format string for the plane name, or NULL for default name
  *
  * Initializes a plane object of type @type.
  *
@@ -1160,7 +1213,8 @@
 			     unsigned long possible_crtcs,
 			     const struct drm_plane_funcs *funcs,
 			     const uint32_t *formats, unsigned int format_count,
-			     enum drm_plane_type type)
+			     enum drm_plane_type type,
+			     const char *name, ...)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
@@ -1182,6 +1236,22 @@
 		return -ENOMEM;
 	}
 
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		plane->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		plane->name = kasprintf(GFP_KERNEL, "plane-%d",
+					drm_num_planes(dev));
+	}
+	if (!plane->name) {
+		kfree(plane->format_types);
+		drm_mode_object_put(dev, &plane->base);
+		return -ENOMEM;
+	}
+
 	memcpy(plane->format_types, formats, format_count * sizeof(uint32_t));
 	plane->format_count = format_count;
 	plane->possible_crtcs = possible_crtcs;
@@ -1240,7 +1310,7 @@
 
 	type = is_primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	return drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
-					formats, format_count, type);
+					formats, format_count, type, NULL);
 }
 EXPORT_SYMBOL(drm_plane_init);
 
@@ -1272,6 +1342,8 @@
 	if (plane->state && plane->funcs->atomic_destroy_state)
 		plane->funcs->atomic_destroy_state(plane, plane->state);
 
+	kfree(plane->name);
+
 	memset(plane, 0, sizeof(*plane));
 }
 EXPORT_SYMBOL(drm_plane_cleanup);
@@ -1801,7 +1873,8 @@
 		copied = 0;
 		crtc_id = (uint32_t __user *)(unsigned long)card_res->crtc_id_ptr;
 		drm_for_each_crtc(crtc, dev) {
-			DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+			DRM_DEBUG_KMS("[CRTC:%d:%s]\n",
+				      crtc->base.id, crtc->name);
 			if (put_user(crtc->base.id, crtc_id + copied)) {
 				ret = -EFAULT;
 				goto out;
@@ -2646,7 +2719,7 @@
 		ret = -ENOENT;
 		goto out;
 	}
-	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
 	if (crtc_req->mode_valid) {
 		/* If we have a mode we need a framebuffer. */
@@ -3235,7 +3308,7 @@
 
 static struct drm_framebuffer *
 internal_framebuffer_create(struct drm_device *dev,
-			    struct drm_mode_fb_cmd2 *r,
+			    const struct drm_mode_fb_cmd2 *r,
 			    struct drm_file *file_priv)
 {
 	struct drm_mode_config *config = &dev->mode_config;
@@ -4785,9 +4858,7 @@
 
 	/* Do DPMS ourselves */
 	if (property == connector->dev->mode_config.dpms_property) {
-		ret = 0;
-		if (connector->funcs->dpms)
-			ret = (*connector->funcs->dpms)(connector, (int)value);
+		ret = (*connector->funcs->dpms)(connector, (int)value);
 	} else if (connector->funcs->set_property)
 		ret = connector->funcs->set_property(connector, property, value);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index ef53475..a02a7f9 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -51,6 +51,11 @@
  * the same callbacks which drivers can use to e.g. restore the modeset
  * configuration on resume with drm_helper_resume_force_mode().
  *
+ * Note that this helper library doesn't track the current power state of CRTCs
+ * and encoders. It can call callbacks like ->dpms() even though the hardware is
+ * already in the desired state. This deficiency has been fixed in the atomic
+ * helpers.
+ *
  * The driver callbacks are mostly compatible with the atomic modeset helpers,
  * except for the handling of the primary plane: Atomic helpers require that the
  * primary plane is implemented as a real standalone plane and not directly tied
@@ -62,6 +67,11 @@
  * converting to the plane helpers). New drivers must not use these functions
  * but need to implement the atomic interface instead, potentially using the
  * atomic helpers for that.
+ *
+ * These legacy modeset helpers use the same function table structures as
+ * all other modesetting helpers. See the documentation for struct
+ * &drm_crtc_helper_funcs, struct &drm_encoder_helper_funcs and struct
+ * &drm_connector_helper_funcs.
  */
 MODULE_AUTHOR("David Airlie, Jesse Barnes");
 MODULE_DESCRIPTION("DRM KMS helper");
@@ -206,8 +216,8 @@
  * @dev: DRM device
  *
  * This function walks through the entire mode setting configuration of @dev. It
- * will remove any crtc links of unused encoders and encoder links of
- * disconnected connectors. Then it will disable all unused encoders and crtcs
+ * will remove any CRTC links of unused encoders and encoder links of
+ * disconnected connectors. Then it will disable all unused encoders and CRTCs
  * either by calling their disable callback if available or by calling their
  * dpms callback with DRM_MODE_DPMS_OFF.
  */
@@ -329,7 +339,7 @@
 		DRM_DEBUG_KMS("CRTC fixup failed\n");
 		goto done;
 	}
-	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
 	crtc->hwmode = *adjusted_mode;
 
@@ -445,11 +455,36 @@
  * drm_crtc_helper_set_config - set a new config from userspace
  * @set: mode set configuration
  *
- * Setup a new configuration, provided by the upper layers (either an ioctl call
- * from userspace or internally e.g. from the fbdev support code) in @set, and
- * enable it. This is the main helper functions for drivers that implement
- * kernel mode setting with the crtc helper functions and the assorted
- * ->prepare(), ->modeset() and ->commit() helper callbacks.
+ * The drm_crtc_helper_set_config() helper function implements the set_config
+ * callback of struct &drm_crtc_funcs for drivers using the legacy CRTC helpers.
+ *
+ * It first tries to locate the best encoder for each connector by calling the
+ * connector ->best_encoder() (struct &drm_connector_helper_funcs) helper
+ * operation.
+ *
+ * After locating the appropriate encoders, the helper function will call the
+ * mode_fixup encoder and CRTC helper operations to adjust the requested mode,
+ * or reject it completely in which case an error will be returned to the
+ * application. If the new configuration after mode adjustment is identical to
+ * the current configuration the helper function will return without performing
+ * any other operation.
+ *
+ * If the adjusted mode is identical to the current mode but changes to the
+ * frame buffer need to be applied, the drm_crtc_helper_set_config() function
+ * will call the CRTC ->mode_set_base() (struct &drm_crtc_helper_funcs) helper
+ * operation.
+ *
+ * If the adjusted mode differs from the current mode, or if the
+ * ->mode_set_base() helper operation is not provided, the helper function
+ * performs a full mode set sequence by calling the ->prepare(), ->mode_set()
+ * and ->commit() CRTC and encoder helper operations, in that order.
+ * Alternatively it can also use the dpms and disable helper operations. For
+ * details see struct &drm_crtc_helper_funcs and struct
+ * &drm_encoder_helper_funcs.
+ *
+ * This function is deprecated.  New drivers must implement atomic modeset
+ * support, for which this function is unsuitable. Instead drivers should use
+ * drm_atomic_helper_set_config().
  *
  * Returns:
  * Returns 0 on success, negative errno numbers on failure.
@@ -484,11 +519,13 @@
 		set->fb = NULL;
 
 	if (set->fb) {
-		DRM_DEBUG_KMS("[CRTC:%d] [FB:%d] #connectors=%d (x y) (%i %i)\n",
-				set->crtc->base.id, set->fb->base.id,
-				(int)set->num_connectors, set->x, set->y);
+		DRM_DEBUG_KMS("[CRTC:%d:%s] [FB:%d] #connectors=%d (x y) (%i %i)\n",
+			      set->crtc->base.id, set->crtc->name,
+			      set->fb->base.id,
+			      (int)set->num_connectors, set->x, set->y);
 	} else {
-		DRM_DEBUG_KMS("[CRTC:%d] [NOFB]\n", set->crtc->base.id);
+		DRM_DEBUG_KMS("[CRTC:%d:%s] [NOFB]\n",
+			      set->crtc->base.id, set->crtc->name);
 		drm_crtc_helper_disable(set->crtc);
 		return 0;
 	}
@@ -628,12 +665,12 @@
 			connector->encoder->crtc = new_crtc;
 		}
 		if (new_crtc) {
-			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d]\n",
-				connector->base.id, connector->name,
-				new_crtc->base.id);
+			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d:%s]\n",
+				      connector->base.id, connector->name,
+				      new_crtc->base.id, new_crtc->name);
 		} else {
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [NOCRTC]\n",
-				connector->base.id, connector->name);
+				      connector->base.id, connector->name);
 		}
 	}
 
@@ -650,8 +687,8 @@
 			if (!drm_crtc_helper_set_mode(set->crtc, set->mode,
 						      set->x, set->y,
 						      save_set.fb)) {
-				DRM_ERROR("failed to set mode on [CRTC:%d]\n",
-					  set->crtc->base.id);
+				DRM_ERROR("failed to set mode on [CRTC:%d:%s]\n",
+					  set->crtc->base.id, set->crtc->name);
 				set->crtc->primary->fb = save_set.fb;
 				ret = -EINVAL;
 				goto fail;
@@ -758,10 +795,18 @@
  * @connector: affected connector
  * @mode: DPMS mode
  *
- * This is the main helper function provided by the crtc helper framework for
+ * The drm_helper_connector_dpms() helper function implements the ->dpms()
+ * callback of struct &drm_connector_funcs for drivers using the legacy CRTC helpers.
+ *
+ * This is the main helper function provided by the CRTC helper framework for
  * implementing the DPMS connector attribute. It computes the new desired DPMS
- * state for all encoders and crtcs in the output mesh and calls the ->dpms()
- * callback provided by the driver appropriately.
+ * state for all encoders and CRTCs in the output mesh and calls the ->dpms()
+ * callbacks provided by the driver in struct &drm_crtc_helper_funcs and struct
+ * &drm_encoder_helper_funcs appropriately.
+ *
+ * This function is deprecated.  New drivers must implement atomic modeset
+ * support, for which this function is unsuitable. Instead drivers should use
+ * drm_atomic_helper_connector_dpms().
  *
  * Returns:
  * Always returns 0.
@@ -818,7 +863,7 @@
  * metadata fields.
  */
 void drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
-				    struct drm_mode_fb_cmd2 *mode_cmd)
+				    const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	int i;
 
@@ -855,6 +900,12 @@
  * due to slight differences in allocating shared resources when the
  * configuration is restored in a different order than when userspace set it up)
  * need to use their own restore logic.
+ *
+ * This function is deprecated. New drivers should implement atomic mode-
+ * setting and use the atomic suspend/resume helpers.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
  */
 void drm_helper_resume_force_mode(struct drm_device *dev)
 {
@@ -913,9 +964,9 @@
  * @old_fb: previous framebuffer
  *
  * This function implements a callback useable as the ->mode_set callback
- * required by the crtc helpers. Besides the atomic plane helper functions for
+ * required by the CRTC helpers. Besides the atomic plane helper functions for
  * the primary plane the driver must also provide the ->mode_set_nofb callback
- * to set up the crtc.
+ * to set up the CRTC.
  *
  * This is a transitional helper useful for converting drivers to the atomic
  * interfaces.
@@ -979,7 +1030,7 @@
  * @old_fb: previous framebuffer
  *
  * This function implements a callback useable as the ->mode_set_base used
- * required by the crtc helpers. The driver must provide the atomic plane helper
+ * required by the CRTC helpers. The driver must provide the atomic plane helper
  * functions for the primary plane.
  *
  * This is a transitional helper useful for converting drivers to the atomic
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 9362609..7dd6728 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -160,6 +160,11 @@
 		goto out_unlock;
 	}
 
+	if (!file_priv->allowed_master) {
+		ret = drm_new_set_master(dev, file_priv);
+		goto out_unlock;
+	}
+
 	file_priv->minor->master = drm_master_get(file_priv->master);
 	file_priv->is_master = 1;
 	if (dev->driver->master_set) {
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d5d2c03..c214f12 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2545,6 +2545,33 @@
 	return clock;
 }
 
+static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match,
+					     unsigned int clock_tolerance)
+{
+	u8 mode;
+
+	if (!to_match->clock)
+		return 0;
+
+	for (mode = 0; mode < ARRAY_SIZE(edid_cea_modes); mode++) {
+		const struct drm_display_mode *cea_mode = &edid_cea_modes[mode];
+		unsigned int clock1, clock2;
+
+		/* Check both 60Hz and 59.94Hz */
+		clock1 = cea_mode->clock;
+		clock2 = cea_mode_alternate_clock(cea_mode);
+
+		if (abs(to_match->clock - clock1) > clock_tolerance &&
+		    abs(to_match->clock - clock2) > clock_tolerance)
+			continue;
+
+		if (drm_mode_equal_no_clocks(to_match, cea_mode))
+			return mode + 1;
+	}
+
+	return 0;
+}
+
 /**
  * drm_match_cea_mode - look for a CEA mode matching given mode
  * @to_match: display mode
@@ -2609,6 +2636,33 @@
 	return cea_mode_alternate_clock(hdmi_mode);
 }
 
+static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match,
+					      unsigned int clock_tolerance)
+{
+	u8 mode;
+
+	if (!to_match->clock)
+		return 0;
+
+	for (mode = 0; mode < ARRAY_SIZE(edid_4k_modes); mode++) {
+		const struct drm_display_mode *hdmi_mode = &edid_4k_modes[mode];
+		unsigned int clock1, clock2;
+
+		/* Make sure to also match alternate clocks */
+		clock1 = hdmi_mode->clock;
+		clock2 = hdmi_mode_alternate_clock(hdmi_mode);
+
+		if (abs(to_match->clock - clock1) > clock_tolerance &&
+		    abs(to_match->clock - clock2) > clock_tolerance)
+			continue;
+
+		if (drm_mode_equal_no_clocks(to_match, hdmi_mode))
+			return mode + 1;
+	}
+
+	return 0;
+}
+
 /*
  * drm_match_hdmi_mode - look for a HDMI mode matching given mode
  * @to_match: display mode
@@ -3119,14 +3173,18 @@
 	u8 mode_idx;
 	const char *type;
 
-	mode_idx = drm_match_cea_mode(mode) - 1;
+	/*
+	 * allow 5kHz clock difference either way to account for
+	 * the 10kHz clock resolution limit of detailed timings.
+	 */
+	mode_idx = drm_match_cea_mode_clock_tolerance(mode, 5) - 1;
 	if (mode_idx < ARRAY_SIZE(edid_cea_modes)) {
 		type = "CEA";
 		cea_mode = &edid_cea_modes[mode_idx];
 		clock1 = cea_mode->clock;
 		clock2 = cea_mode_alternate_clock(cea_mode);
 	} else {
-		mode_idx = drm_match_hdmi_mode(mode) - 1;
+		mode_idx = drm_match_hdmi_mode_clock_tolerance(mode, 5) - 1;
 		if (mode_idx < ARRAY_SIZE(edid_4k_modes)) {
 			type = "HDMI";
 			cea_mode = &edid_4k_modes[mode_idx];
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c19a625..b7d5b84 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -74,7 +74,7 @@
 };
 
 static struct drm_fb_cma *drm_fb_cma_alloc(struct drm_device *dev,
-	struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_cma_object **obj,
+	const const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_cma_object **obj,
 	unsigned int num_planes)
 {
 	struct drm_fb_cma *fb_cma;
@@ -107,7 +107,7 @@
  * checked before calling this function.
  */
 struct drm_framebuffer *drm_fb_cma_create(struct drm_device *dev,
-	struct drm_file *file_priv, struct drm_mode_fb_cmd2 *mode_cmd)
+	struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_fb_cma *fb_cma;
 	struct drm_gem_cma_object *objs[4];
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index e673c13..69cbab5 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -342,6 +342,7 @@
 	struct drm_plane *plane;
 	struct drm_atomic_state *state;
 	int i, ret;
+	unsigned plane_mask;
 
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
@@ -349,11 +350,10 @@
 
 	state->acquire_ctx = dev->mode_config.acquire_ctx;
 retry:
+	plane_mask = 0;
 	drm_for_each_plane(plane, dev) {
 		struct drm_plane_state *plane_state;
 
-		plane->old_fb = plane->fb;
-
 		plane_state = drm_atomic_get_plane_state(state, plane);
 		if (IS_ERR(plane_state)) {
 			ret = PTR_ERR(plane_state);
@@ -362,6 +362,9 @@
 
 		plane_state->rotation = BIT(DRM_ROTATE_0);
 
+		plane->old_fb = plane->fb;
+		plane_mask |= 1 << drm_plane_index(plane);
+
 		/* disable non-primary: */
 		if (plane->type == DRM_PLANE_TYPE_PRIMARY)
 			continue;
@@ -382,19 +385,7 @@
 	ret = drm_atomic_commit(state);
 
 fail:
-	drm_for_each_plane(plane, dev) {
-		if (ret == 0) {
-			struct drm_framebuffer *new_fb = plane->state->fb;
-			if (new_fb)
-				drm_framebuffer_reference(new_fb);
-			plane->fb = new_fb;
-			plane->crtc = plane->state->crtc;
-
-			if (plane->old_fb)
-				drm_framebuffer_unreference(plane->old_fb);
-		}
-		plane->old_fb = NULL;
-	}
+	drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret == -EDEADLK)
 		goto backoff;
@@ -1236,7 +1227,9 @@
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_atomic_state *state;
+	struct drm_plane *plane;
 	int i, ret;
+	unsigned plane_mask;
 
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
@@ -1244,19 +1237,22 @@
 
 	state->acquire_ctx = dev->mode_config.acquire_ctx;
 retry:
+	plane_mask = 0;
 	for(i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set;
 
 		mode_set = &fb_helper->crtc_info[i].mode_set;
 
-		mode_set->crtc->primary->old_fb = mode_set->crtc->primary->fb;
-
 		mode_set->x = var->xoffset;
 		mode_set->y = var->yoffset;
 
 		ret = __drm_atomic_helper_set_config(mode_set, state);
 		if (ret != 0)
 			goto fail;
+
+		plane = mode_set->crtc->primary;
+		plane_mask |= drm_plane_index(plane);
+		plane->old_fb = plane->fb;
 	}
 
 	ret = drm_atomic_commit(state);
@@ -1268,26 +1264,7 @@
 
 
 fail:
-	for(i = 0; i < fb_helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set;
-		struct drm_plane *plane;
-
-		mode_set = &fb_helper->crtc_info[i].mode_set;
-		plane = mode_set->crtc->primary;
-
-		if (ret == 0) {
-			struct drm_framebuffer *new_fb = plane->state->fb;
-
-			if (new_fb)
-				drm_framebuffer_reference(new_fb);
-			plane->fb = new_fb;
-			plane->crtc = plane->state->crtc;
-
-			if (plane->old_fb)
-				drm_framebuffer_unreference(plane->old_fb);
-		}
-		plane->old_fb = NULL;
-	}
+	drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret == -EDEADLK)
 		goto backoff;
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index c59ce4d..1ea8790 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -126,6 +126,60 @@
 }
 
 /**
+ * drm_new_set_master - Allocate a new master object and become master for the
+ * associated master realm.
+ *
+ * @dev: The associated device.
+ * @fpriv: File private identifying the client.
+ *
+ * This function must be called with dev::struct_mutex held.
+ * Returns negative error code on failure. Zero on success.
+ */
+int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
+{
+	struct drm_master *old_master;
+	int ret;
+
+	lockdep_assert_held_once(&dev->master_mutex);
+
+	/* create a new master */
+	fpriv->minor->master = drm_master_create(fpriv->minor);
+	if (!fpriv->minor->master)
+		return -ENOMEM;
+
+	/* take another reference for the copy in the local file priv */
+	old_master = fpriv->master;
+	fpriv->master = drm_master_get(fpriv->minor->master);
+
+	if (dev->driver->master_create) {
+		ret = dev->driver->master_create(dev, fpriv->master);
+		if (ret)
+			goto out_err;
+	}
+	if (dev->driver->master_set) {
+		ret = dev->driver->master_set(dev, fpriv, true);
+		if (ret)
+			goto out_err;
+	}
+
+	fpriv->is_master = 1;
+	fpriv->allowed_master = 1;
+	fpriv->authenticated = 1;
+	if (old_master)
+		drm_master_put(&old_master);
+
+	return 0;
+
+out_err:
+	/* drop both references and restore old master on failure */
+	drm_master_put(&fpriv->minor->master);
+	drm_master_put(&fpriv->master);
+	fpriv->master = old_master;
+
+	return ret;
+}
+
+/**
  * Called whenever a process opens /dev/drm.
  *
  * \param filp file pointer.
@@ -172,6 +226,8 @@
 	init_waitqueue_head(&priv->event_wait);
 	priv->event_space = 4096; /* set aside 4k for event buffer */
 
+	mutex_init(&priv->event_read_lock);
+
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_open(dev, priv);
 
@@ -189,35 +245,9 @@
 	mutex_lock(&dev->master_mutex);
 	if (drm_is_primary_client(priv) && !priv->minor->master) {
 		/* create a new master */
-		priv->minor->master = drm_master_create(priv->minor);
-		if (!priv->minor->master) {
-			ret = -ENOMEM;
+		ret = drm_new_set_master(dev, priv);
+		if (ret)
 			goto out_close;
-		}
-
-		priv->is_master = 1;
-		/* take another reference for the copy in the local file priv */
-		priv->master = drm_master_get(priv->minor->master);
-		priv->authenticated = 1;
-
-		if (dev->driver->master_create) {
-			ret = dev->driver->master_create(dev, priv->master);
-			if (ret) {
-				/* drop both references if this fails */
-				drm_master_put(&priv->minor->master);
-				drm_master_put(&priv->master);
-				goto out_close;
-			}
-		}
-		if (dev->driver->master_set) {
-			ret = dev->driver->master_set(dev, priv, true);
-			if (ret) {
-				/* drop both references if this fails */
-				drm_master_put(&priv->minor->master);
-				drm_master_put(&priv->master);
-				goto out_close;
-			}
-		}
 	} else if (drm_is_primary_client(priv)) {
 		/* get a reference to the master */
 		priv->master = drm_master_get(priv->minor->master);
@@ -483,14 +513,28 @@
 {
 	struct drm_file *file_priv = filp->private_data;
 	struct drm_device *dev = file_priv->minor->dev;
-	ssize_t ret = 0;
+	ssize_t ret;
 
 	if (!access_ok(VERIFY_WRITE, buffer, count))
 		return -EFAULT;
 
-	spin_lock_irq(&dev->event_lock);
+	ret = mutex_lock_interruptible(&file_priv->event_read_lock);
+	if (ret)
+		return ret;
+
 	for (;;) {
-		if (list_empty(&file_priv->event_list)) {
+		struct drm_pending_event *e = NULL;
+
+		spin_lock_irq(&dev->event_lock);
+		if (!list_empty(&file_priv->event_list)) {
+			e = list_first_entry(&file_priv->event_list,
+					struct drm_pending_event, link);
+			file_priv->event_space += e->event->length;
+			list_del(&e->link);
+		}
+		spin_unlock_irq(&dev->event_lock);
+
+		if (e == NULL) {
 			if (ret)
 				break;
 
@@ -499,36 +543,36 @@
 				break;
 			}
 
-			spin_unlock_irq(&dev->event_lock);
+			mutex_unlock(&file_priv->event_read_lock);
 			ret = wait_event_interruptible(file_priv->event_wait,
 						       !list_empty(&file_priv->event_list));
-			spin_lock_irq(&dev->event_lock);
-			if (ret < 0)
-				break;
-
-			ret = 0;
+			if (ret >= 0)
+				ret = mutex_lock_interruptible(&file_priv->event_read_lock);
+			if (ret)
+				return ret;
 		} else {
-			struct drm_pending_event *e;
+			unsigned length = e->event->length;
 
-			e = list_first_entry(&file_priv->event_list,
-					     struct drm_pending_event, link);
-			if (e->event->length + ret > count)
-				break;
-
-			if (__copy_to_user_inatomic(buffer + ret,
-						    e->event, e->event->length)) {
-				if (ret == 0)
-					ret = -EFAULT;
+			if (length > count - ret) {
+put_back_event:
+				spin_lock_irq(&dev->event_lock);
+				file_priv->event_space -= length;
+				list_add(&e->link, &file_priv->event_list);
+				spin_unlock_irq(&dev->event_lock);
 				break;
 			}
 
-			file_priv->event_space += e->event->length;
-			ret += e->event->length;
-			list_del(&e->link);
+			if (copy_to_user(buffer + ret, e->event, length)) {
+				if (ret == 0)
+					ret = -EFAULT;
+				goto put_back_event;
+			}
+
+			ret += length;
 			e->destroy(e);
 		}
 	}
-	spin_unlock_irq(&dev->event_lock);
+	mutex_unlock(&file_priv->event_read_lock);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index c7de454..2e10bba 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -244,8 +244,9 @@
  * @filp: drm file-private structure to use for the handle look up
  * @handle: userspace handle to delete
  *
- * Removes the GEM handle from the @filp lookup table and if this is the last
- * handle also cleans up linked resources like GEM names.
+ * Removes the GEM handle from the @filp lookup table which has been added with
+ * drm_gem_handle_create(). If this is the last handle also cleans up linked
+ * resources like GEM names.
  */
 int
 drm_gem_handle_delete(struct drm_file *filp, u32 handle)
@@ -314,6 +315,10 @@
  * This expects the dev->object_name_lock to be held already and will drop it
  * before returning. Used to avoid races in establishing new handles when
  * importing an object from either an flink name or a dma-buf.
+ *
+ * Handles must be release again through drm_gem_handle_delete(). This is done
+ * when userspace closes @file_priv for all attached handles, or through the
+ * GEM_CLOSE ioctl for individual handles.
  */
 int
 drm_gem_handle_create_tail(struct drm_file *file_priv,
@@ -541,7 +546,17 @@
 }
 EXPORT_SYMBOL(drm_gem_put_pages);
 
-/** Returns a reference to the object named by the handle. */
+/**
+ * drm_gem_object_lookup - look up a GEM object from it's handle
+ * @dev: DRM device
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A reference to the object named by the handle if such exists on @filp, NULL
+ * otherwise.
+ */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
 		      u32 handle)
@@ -774,6 +789,13 @@
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
+/**
+ * drm_gem_vm_open - vma->ops->open implementation for GEM
+ * @vma: VM area structure
+ *
+ * This function implements the #vm_operations_struct open() callback for GEM
+ * drivers. This must be used together with drm_gem_vm_close().
+ */
 void drm_gem_vm_open(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
@@ -782,6 +804,13 @@
 }
 EXPORT_SYMBOL(drm_gem_vm_open);
 
+/**
+ * drm_gem_vm_close - vma->ops->close implementation for GEM
+ * @vma: VM area structure
+ *
+ * This function implements the #vm_operations_struct close() callback for GEM
+ * drivers. This must be used together with drm_gem_vm_open().
+ */
 void drm_gem_vm_close(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e109b49..0f7b00b 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -59,11 +59,13 @@
 	struct drm_gem_object *gem_obj;
 	int ret;
 
-	cma_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
-	if (!cma_obj)
+	if (drm->driver->gem_create_object)
+		gem_obj = drm->driver->gem_create_object(drm, size);
+	else
+		gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
+	if (!gem_obj)
 		return ERR_PTR(-ENOMEM);
-
-	gem_obj = &cma_obj->base;
+	cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
 
 	ret = drm_gem_object_init(drm, gem_obj, size);
 	if (ret)
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 2151ea5..607f493 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -980,7 +980,8 @@
 		struct drm_pending_vblank_event *e,
 		unsigned long seq, struct timeval *now)
 {
-	WARN_ON_SMP(!spin_is_locked(&dev->event_lock));
+	assert_spin_locked(&dev->event_lock);
+
 	e->event.sequence = seq;
 	e->event.tv_sec = now->tv_sec;
 	e->event.tv_usec = now->tv_usec;
@@ -993,6 +994,57 @@
 }
 
 /**
+ * drm_arm_vblank_event - arm vblank event after pageflip
+ * @dev: DRM device
+ * @pipe: CRTC index
+ * @e: the event to prepare to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the legacy version of drm_crtc_arm_vblank_event().
+ */
+void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+			  struct drm_pending_vblank_event *e)
+{
+	assert_spin_locked(&dev->event_lock);
+
+	e->pipe = pipe;
+	e->event.sequence = drm_vblank_count(dev, pipe);
+	list_add_tail(&e->base.link, &dev->vblank_event_list);
+}
+EXPORT_SYMBOL(drm_arm_vblank_event);
+
+/**
+ * drm_crtc_arm_vblank_event - arm vblank event after pageflip
+ * @crtc: the source CRTC of the vblank event
+ * @e: the event to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the native KMS version of drm_arm_vblank_event().
+ */
+void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+			       struct drm_pending_vblank_event *e)
+{
+	drm_arm_vblank_event(crtc->dev, drm_crtc_index(crtc), e);
+}
+EXPORT_SYMBOL(drm_crtc_arm_vblank_event);
+
+/**
  * drm_send_vblank_event - helper to send vblank event after pageflip
  * @dev: DRM device
  * @pipe: CRTC index
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index cd74a09..5a8a78d 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -708,7 +708,8 @@
 }
 EXPORT_SYMBOL(drm_mode_set_name);
 
-/** drm_mode_hsync - get the hsync of a mode
+/**
+ * drm_mode_hsync - get the hsync of a mode
  * @mode: mode
  *
  * Returns:
@@ -917,13 +918,30 @@
 	} else if (mode1->clock != mode2->clock)
 		return false;
 
+	return drm_mode_equal_no_clocks(mode1, mode2);
+}
+EXPORT_SYMBOL(drm_mode_equal);
+
+/**
+ * drm_mode_equal_no_clocks - test modes for equality
+ * @mode1: first mode
+ * @mode2: second mode
+ *
+ * Check to see if @mode1 and @mode2 are equivalent, but
+ * don't check the pixel clocks.
+ *
+ * Returns:
+ * True if the modes are equal, false otherwise.
+ */
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+{
 	if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) !=
 	    (mode2->flags & DRM_MODE_FLAG_3D_MASK))
 		return false;
 
 	return drm_mode_equal_no_clocks_no_stereo(mode1, mode2);
 }
-EXPORT_SYMBOL(drm_mode_equal);
+EXPORT_SYMBOL(drm_mode_equal_no_clocks);
 
 /**
  * drm_mode_equal_no_clocks_no_stereo - test modes for equality
@@ -1056,7 +1074,7 @@
 	MODE_STATUS(ONE_SIZE),
 	MODE_STATUS(NO_REDUCED),
 	MODE_STATUS(NO_STEREO),
-	MODE_STATUS(UNVERIFIED),
+	MODE_STATUS(STALE),
 	MODE_STATUS(BAD),
 	MODE_STATUS(ERROR),
 };
@@ -1154,7 +1172,6 @@
 /**
  * drm_mode_connector_list_update - update the mode list for the connector
  * @connector: the connector to update
- * @merge_type_bits: whether to merge or overwrite type bits
  *
  * This moves the modes from the @connector probed_modes list
  * to the actual mode list. It compares the probed mode against the current
@@ -1163,33 +1180,48 @@
  * This is just a helper functions doesn't validate any modes itself and also
  * doesn't prune any invalid modes. Callers need to do that themselves.
  */
-void drm_mode_connector_list_update(struct drm_connector *connector,
-				    bool merge_type_bits)
+void drm_mode_connector_list_update(struct drm_connector *connector)
 {
-	struct drm_display_mode *mode;
 	struct drm_display_mode *pmode, *pt;
-	int found_it;
 
 	WARN_ON(!mutex_is_locked(&connector->dev->mode_config.mutex));
 
-	list_for_each_entry_safe(pmode, pt, &connector->probed_modes,
-				 head) {
-		found_it = 0;
+	list_for_each_entry_safe(pmode, pt, &connector->probed_modes, head) {
+		struct drm_display_mode *mode;
+		bool found_it = false;
+
 		/* go through current modes checking for the new probed mode */
 		list_for_each_entry(mode, &connector->modes, head) {
-			if (drm_mode_equal(pmode, mode)) {
-				found_it = 1;
-				/* if equal delete the probed mode */
-				mode->status = pmode->status;
-				/* Merge type bits together */
-				if (merge_type_bits)
-					mode->type |= pmode->type;
-				else
-					mode->type = pmode->type;
-				list_del(&pmode->head);
-				drm_mode_destroy(connector->dev, pmode);
-				break;
+			if (!drm_mode_equal(pmode, mode))
+				continue;
+
+			found_it = true;
+
+			/*
+			 * If the old matching mode is stale (ie. left over
+			 * from a previous probe) just replace it outright.
+			 * Otherwise just merge the type bits between all
+			 * equal probed modes.
+			 *
+			 * If two probed modes are considered equal, pick the
+			 * actual timings from the one that's marked as
+			 * preferred (in case the match isn't 100%). If
+			 * multiple or zero preferred modes are present, favor
+			 * the mode added to the probed_modes list first.
+			 */
+			if (mode->status == MODE_STALE) {
+				drm_mode_copy(mode, pmode);
+			} else if ((mode->type & DRM_MODE_TYPE_PREFERRED) == 0 &&
+				   (pmode->type & DRM_MODE_TYPE_PREFERRED) != 0) {
+				pmode->type |= mode->type;
+				drm_mode_copy(mode, pmode);
+			} else {
+				mode->type |= pmode->type;
 			}
+
+			list_del(&pmode->head);
+			drm_mode_destroy(connector->dev, pmode);
+			break;
 		}
 
 		if (!found_it) {
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 6675b14..c2f5971 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -57,11 +57,18 @@
 
 /**
  * drm_modeset_lock_all - take all modeset locks
- * @dev: drm device
+ * @dev: DRM device
  *
  * This function takes all modeset locks, suitable where a more fine-grained
- * scheme isn't (yet) implemented. Locks must be dropped with
- * drm_modeset_unlock_all.
+ * scheme isn't (yet) implemented. Locks must be dropped by calling the
+ * drm_modeset_unlock_all() function.
+ *
+ * This function is deprecated. It allocates a lock acquisition context and
+ * stores it in the DRM device's ->mode_config. This facilitate conversion of
+ * existing code because it removes the need to manually deal with the
+ * acquisition context, but it is also brittle because the context is global
+ * and care must be taken not to nest calls. New code should use the
+ * drm_modeset_lock_all_ctx() function and pass in the context explicitly.
  */
 void drm_modeset_lock_all(struct drm_device *dev)
 {
@@ -78,39 +85,43 @@
 	drm_modeset_acquire_init(ctx, 0);
 
 retry:
-	ret = drm_modeset_lock(&config->connection_mutex, ctx);
-	if (ret)
-		goto fail;
-	ret = drm_modeset_lock_all_crtcs(dev, ctx);
-	if (ret)
-		goto fail;
+	ret = drm_modeset_lock_all_ctx(dev, ctx);
+	if (ret < 0) {
+		if (ret == -EDEADLK) {
+			drm_modeset_backoff(ctx);
+			goto retry;
+		}
+
+		drm_modeset_acquire_fini(ctx);
+		kfree(ctx);
+		return;
+	}
 
 	WARN_ON(config->acquire_ctx);
 
-	/* now we hold the locks, so now that it is safe, stash the
-	 * ctx for drm_modeset_unlock_all():
+	/*
+	 * We hold the locks now, so it is safe to stash the acquisition
+	 * context for drm_modeset_unlock_all().
 	 */
 	config->acquire_ctx = ctx;
 
 	drm_warn_on_modeset_not_all_locked(dev);
-
-	return;
-
-fail:
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(ctx);
-		goto retry;
-	}
-
-	kfree(ctx);
 }
 EXPORT_SYMBOL(drm_modeset_lock_all);
 
 /**
  * drm_modeset_unlock_all - drop all modeset locks
- * @dev: device
+ * @dev: DRM device
  *
- * This function drop all modeset locks taken by drm_modeset_lock_all.
+ * This function drops all modeset locks taken by a previous call to the
+ * drm_modeset_lock_all() function.
+ *
+ * This function is deprecated. It uses the lock acquisition context stored
+ * in the DRM device's ->mode_config. This facilitates conversion of existing
+ * code because it removes the need to manually deal with the acquisition
+ * context, but it is also brittle because the context is global and care must
+ * be taken not to nest calls. New code should pass the acquisition context
+ * directly to the drm_modeset_drop_locks() function.
  */
 void drm_modeset_unlock_all(struct drm_device *dev)
 {
@@ -431,14 +442,34 @@
 }
 EXPORT_SYMBOL(drm_modeset_unlock);
 
-/* In some legacy codepaths it's convenient to just grab all the crtc and plane
- * related locks. */
-int drm_modeset_lock_all_crtcs(struct drm_device *dev,
-		struct drm_modeset_acquire_ctx *ctx)
+/**
+ * drm_modeset_lock_all_ctx - take all modeset locks
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * This function takes all modeset locks, suitable where a more fine-grained
+ * scheme isn't (yet) implemented.
+ *
+ * Unlike drm_modeset_lock_all(), it doesn't take the dev->mode_config.mutex
+ * since that lock isn't required for modeset state changes. Callers which
+ * need to grab that lock too need to do so outside of the acquire context
+ * @ctx.
+ *
+ * Locks acquired with this function should be released by calling the
+ * drm_modeset_drop_locks() function on @ctx.
+ *
+ * Returns: 0 on success or a negative error-code on failure.
+ */
+int drm_modeset_lock_all_ctx(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	int ret = 0;
+	int ret;
+
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx);
+	if (ret)
+		return ret;
 
 	drm_for_each_crtc(crtc, dev) {
 		ret = drm_modeset_lock(&crtc->mutex, ctx);
@@ -454,4 +485,4 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(drm_modeset_lock_all_crtcs);
+EXPORT_SYMBOL(drm_modeset_lock_all_ctx);
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index d384ebc..369d2898 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -57,6 +57,10 @@
  * by the atomic helpers.
  *
  * Again drivers are strongly urged to switch to the new interfaces.
+ *
+ * The plane helpers share the function table structures with other helpers,
+ * specifically also the atomic helpers. See struct &drm_plane_helper_funcs for
+ * the details.
  */
 
 /*
@@ -164,6 +168,8 @@
 	vscale = drm_rect_calc_vscale(src, dest, min_scale, max_scale);
 	if (hscale < 0 || vscale < 0) {
 		DRM_DEBUG_KMS("Invalid scaling of plane\n");
+		drm_rect_debug_print("src: ", src, true);
+		drm_rect_debug_print("dst: ", dest, false);
 		return -ERANGE;
 	}
 
@@ -180,6 +186,8 @@
 
 	if (!can_position && !drm_rect_equals(dest, clip)) {
 		DRM_DEBUG_KMS("Plane must cover entire CRTC\n");
+		drm_rect_debug_print("dst: ", dest, false);
+		drm_rect_debug_print("clip: ", clip, false);
 		return -EINVAL;
 	}
 
@@ -367,7 +375,7 @@
 				       &drm_primary_helper_funcs,
 				       safe_modeset_formats,
 				       ARRAY_SIZE(safe_modeset_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		primary = NULL;
@@ -394,7 +402,8 @@
 	struct drm_plane *primary;
 
 	primary = create_primary_plane(dev);
-	return drm_crtc_init_with_planes(dev, crtc, primary, NULL, funcs);
+	return drm_crtc_init_with_planes(dev, crtc, primary, NULL, funcs,
+					 NULL);
 }
 EXPORT_SYMBOL(drm_crtc_init);
 
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index a18164f..e714b5a 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -53,6 +53,9 @@
  * This helper library can be used independently of the modeset helper library.
  * Drivers can also overwrite different parts e.g. use their own hotplug
  * handling code to avoid probing unrelated outputs.
+ *
+ * The probe helpers share the function table structures with other display
+ * helper libraries. See struct &drm_connector_helper_funcs for the details.
  */
 
 static bool drm_kms_helper_poll = true;
@@ -126,9 +129,64 @@
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked);
 
-
-static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector,
-							      uint32_t maxX, uint32_t maxY, bool merge_type_bits)
+/**
+ * drm_helper_probe_single_connector_modes - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * Based on the helper callbacks implemented by @connector in struct
+ * &drm_connector_helper_funcs try to detect all valid modes.  Modes will first
+ * be added to the connector's probed_modes list, then culled (based on validity
+ * and the @maxX, @maxY parameters) and put into the normal modes list.
+ *
+ * Intended to be used as a generic implementation of the ->fill_modes()
+ * @connector vfunc for drivers that use the CRTC helpers for output mode
+ * filtering and detection.
+ *
+ * The basic procedure is as follows
+ *
+ * 1. All modes currently on the connector's modes list are marked as stale
+ *
+ * 2. New modes are added to the connector's probed_modes list with
+ *    drm_mode_probed_add(). New modes start their life with status as OK.
+ *    Modes are added from a single source using the following priority order.
+ *
+ *    - debugfs 'override_edid' (used for testing only)
+ *    - firmware EDID (drm_load_edid_firmware())
+ *    - connector helper ->get_modes() vfunc
+ *    - if the connector status is connector_status_connected, standard
+ *      VESA DMT modes up to 1024x768 are automatically added
+ *      (drm_add_modes_noedid())
+ *
+ *    Finally modes specified via the kernel command line (video=...) are
+ *    added in addition to what the earlier probes produced
+ *    (drm_helper_probe_add_cmdline_mode()). These modes are generated
+ *    using the VESA GTF/CVT formulas.
+ *
+ * 3. Modes are moved from the probed_modes list to the modes list. Potential
+ *    duplicates are merged together (see drm_mode_connector_list_update()).
+ *    After this step the probed_modes list will be empty again.
+ *
+ * 4. Any non-stale mode on the modes list then undergoes validation
+ *
+ *    - drm_mode_validate_basic() performs basic sanity checks
+ *    - drm_mode_validate_size() filters out modes larger than @maxX and @maxY
+ *      (if specified)
+ *    - drm_mode_validate_flag() checks the modes againt basic connector
+ *      capabilites (interlace_allowed,doublescan_allowed,stereo_allowed)
+ *    - the optional connector ->mode_valid() helper can perform driver and/or
+ *      hardware specific checks
+ *
+ * 5. Any mode whose status is not OK is pruned from the connector's modes list,
+ *    accompanied by a debug message indicating the reason for the mode's
+ *    rejection (see drm_mode_prune_invalid()).
+ *
+ * Returns:
+ * The number of modes found on @connector.
+ */
+int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *mode;
@@ -143,9 +201,11 @@
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
 			connector->name);
-	/* set all modes to the unverified state */
+	/* set all old modes to the stale state */
 	list_for_each_entry(mode, &connector->modes, head)
-		mode->status = MODE_UNVERIFIED;
+		mode->status = MODE_STALE;
+
+	old_status = connector->status;
 
 	if (connector->force) {
 		if (connector->force == DRM_FORCE_ON ||
@@ -156,33 +216,32 @@
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		old_status = connector->status;
-
 		connector->status = connector->funcs->detect(connector, true);
+	}
+
+	/*
+	 * Normally either the driver's hpd code or the poll loop should
+	 * pick up any changes and fire the hotplug event. But if
+	 * userspace sneaks in a probe, we might miss a change. Hence
+	 * check here, and if anything changed start the hotplug code.
+	 */
+	if (old_status != connector->status) {
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
+			      connector->base.id,
+			      connector->name,
+			      drm_get_connector_status_name(old_status),
+			      drm_get_connector_status_name(connector->status));
 
 		/*
-		 * Normally either the driver's hpd code or the poll loop should
-		 * pick up any changes and fire the hotplug event. But if
-		 * userspace sneaks in a probe, we might miss a change. Hence
-		 * check here, and if anything changed start the hotplug code.
+		 * The hotplug event code might call into the fb
+		 * helpers, and so expects that we do not hold any
+		 * locks. Fire up the poll struct instead, it will
+		 * disable itself again.
 		 */
-		if (old_status != connector->status) {
-			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
-				      connector->base.id,
-				      connector->name,
-				      old_status, connector->status);
-
-			/*
-			 * The hotplug event code might call into the fb
-			 * helpers, and so expects that we do not hold any
-			 * locks. Fire up the poll struct instead, it will
-			 * disable itself again.
-			 */
-			dev->mode_config.delayed_event = true;
-			if (dev->mode_config.poll_enabled)
-				schedule_delayed_work(&dev->mode_config.output_poll_work,
-						      0);
-		}
+		dev->mode_config.delayed_event = true;
+		if (dev->mode_config.poll_enabled)
+			schedule_delayed_work(&dev->mode_config.output_poll_work,
+					      0);
 	}
 
 	/* Re-enable polling in case the global poll config changed. */
@@ -199,17 +258,16 @@
 		goto prune;
 	}
 
-#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
-	count = drm_load_edid_firmware(connector);
-	if (count == 0)
-#endif
-	{
-		if (connector->override_edid) {
-			struct edid *edid = (struct edid *) connector->edid_blob_ptr->data;
+	if (connector->override_edid) {
+		struct edid *edid = (struct edid *) connector->edid_blob_ptr->data;
 
-			count = drm_add_edid_modes(connector, edid);
-			drm_edid_to_eld(connector, edid);
-		} else
+		count = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
+	} else {
+#ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
+		count = drm_load_edid_firmware(connector);
+		if (count == 0)
+#endif
 			count = (*connector_funcs->get_modes)(connector);
 	}
 
@@ -219,7 +277,7 @@
 	if (count == 0)
 		goto prune;
 
-	drm_mode_connector_list_update(connector, merge_type_bits);
+	drm_mode_connector_list_update(connector);
 
 	if (connector->interlace_allowed)
 		mode_flags |= DRM_MODE_FLAG_INTERLACE;
@@ -229,7 +287,8 @@
 		mode_flags |= DRM_MODE_FLAG_3D_MASK;
 
 	list_for_each_entry(mode, &connector->modes, head) {
-		mode->status = drm_mode_validate_basic(mode);
+		if (mode->status == MODE_OK)
+			mode->status = drm_mode_validate_basic(mode);
 
 		if (mode->status == MODE_OK)
 			mode->status = drm_mode_validate_size(mode, maxX, maxY);
@@ -262,49 +321,9 @@
 
 	return count;
 }
-
-/**
- * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * Based on the helper callbacks implemented by @connector try to detect all
- * valid modes.  Modes will first be added to the connector's probed_modes list,
- * then culled (based on validity and the @maxX, @maxY parameters) and put into
- * the normal modes list.
- *
- * Intended to be use as a generic implementation of the ->fill_modes()
- * @connector vfunc for drivers that use the crtc helpers for output mode
- * filtering and detection.
- *
- * Returns:
- * The number of modes found on @connector.
- */
-int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
-{
-	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, true);
-}
 EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
 
 /**
- * drm_helper_probe_single_connector_modes_nomerge - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * This operates like drm_hehlper_probe_single_connector_modes except it
- * replaces the mode bits instead of merging them for preferred modes.
- */
-int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
-{
-	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, false);
-}
-EXPORT_SYMBOL(drm_helper_probe_single_connector_modes_nomerge);
-
-/**
  * drm_kms_helper_hotplug_event - fire off KMS hotplug events
  * @dev: drm_device whose connector state changed
  *
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index 531ac4c..a8e2c86 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -275,22 +275,23 @@
 
 /**
  * drm_rect_debug_print - print the rectangle information
+ * @prefix: prefix string
  * @r: rectangle to print
  * @fixed_point: rectangle is in 16.16 fixed point format
  */
-void drm_rect_debug_print(const struct drm_rect *r, bool fixed_point)
+void drm_rect_debug_print(const char *prefix, const struct drm_rect *r, bool fixed_point)
 {
 	int w = drm_rect_width(r);
 	int h = drm_rect_height(r);
 
 	if (fixed_point)
-		DRM_DEBUG_KMS("%d.%06ux%d.%06u%+d.%06u%+d.%06u\n",
+		DRM_DEBUG_KMS("%s%d.%06ux%d.%06u%+d.%06u%+d.%06u\n", prefix,
 			      w >> 16, ((w & 0xffff) * 15625) >> 10,
 			      h >> 16, ((h & 0xffff) * 15625) >> 10,
 			      r->x1 >> 16, ((r->x1 & 0xffff) * 15625) >> 10,
 			      r->y1 >> 16, ((r->y1 & 0xffff) * 15625) >> 10);
 	else
-		DRM_DEBUG_KMS("%dx%d%+d%+d\n", w, h, r->x1, r->y1);
+		DRM_DEBUG_KMS("%s%dx%d%+d%+d\n", prefix, w, h, r->x1, r->y1);
 }
 EXPORT_SYMBOL(drm_rect_debug_print);
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 615b7e6..0ca6410 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -167,47 +167,35 @@
 {
 	struct drm_connector *connector = to_drm_connector(device);
 	struct drm_device *dev = connector->dev;
-	enum drm_connector_status old_status;
+	enum drm_connector_force old_force;
 	int ret;
 
 	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
 	if (ret)
 		return ret;
 
-	old_status = connector->status;
+	old_force = connector->force;
 
-	if (sysfs_streq(buf, "detect")) {
+	if (sysfs_streq(buf, "detect"))
 		connector->force = 0;
-		connector->status = connector->funcs->detect(connector, true);
-	} else if (sysfs_streq(buf, "on")) {
+	else if (sysfs_streq(buf, "on"))
 		connector->force = DRM_FORCE_ON;
-	} else if (sysfs_streq(buf, "on-digital")) {
+	else if (sysfs_streq(buf, "on-digital"))
 		connector->force = DRM_FORCE_ON_DIGITAL;
-	} else if (sysfs_streq(buf, "off")) {
+	else if (sysfs_streq(buf, "off"))
 		connector->force = DRM_FORCE_OFF;
-	} else
+	else
 		ret = -EINVAL;
 
-	if (ret == 0 && connector->force) {
-		if (connector->force == DRM_FORCE_ON ||
-		    connector->force == DRM_FORCE_ON_DIGITAL)
-			connector->status = connector_status_connected;
-		else
-			connector->status = connector_status_disconnected;
-		if (connector->funcs->force)
-			connector->funcs->force(connector);
-	}
-
-	if (old_status != connector->status) {
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
+	if (old_force != connector->force || !connector->force) {
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force updated from %d to %d or reprobing\n",
 			      connector->base.id,
 			      connector->name,
-			      old_status, connector->status);
+			      old_force, connector->force);
 
-		dev->mode_config.delayed_event = true;
-		if (dev->mode_config.poll_enabled)
-			schedule_delayed_work(&dev->mode_config.output_poll_work,
-					      0);
+		connector->funcs->fill_modes(connector,
+					     dev->mode_config.max_width,
+					     dev->mode_config.max_height);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
@@ -256,23 +244,29 @@
 	struct drm_connector *connector = to_drm_connector(connector_dev);
 	unsigned char *edid;
 	size_t size;
+	ssize_t ret = 0;
 
+	mutex_lock(&connector->dev->mode_config.mutex);
 	if (!connector->edid_blob_ptr)
-		return 0;
+		goto unlock;
 
 	edid = connector->edid_blob_ptr->data;
 	size = connector->edid_blob_ptr->length;
 	if (!edid)
-		return 0;
+		goto unlock;
 
 	if (off >= size)
-		return 0;
+		goto unlock;
 
 	if (off + count > size)
 		count = size - off;
 	memcpy(buf, edid + off, count);
 
-	return count;
+	ret = count;
+unlock:
+	mutex_unlock(&connector->dev->mode_config.mutex);
+
+	return ret;
 }
 
 static ssize_t modes_show(struct device *device,
@@ -283,10 +277,12 @@
 	struct drm_display_mode *mode;
 	int written = 0;
 
+	mutex_lock(&connector->dev->mode_config.mutex);
 	list_for_each_entry(mode, &connector->modes, head) {
 		written += snprintf(buf + written, PAGE_SIZE - written, "%s\n",
 				    mode->name);
 	}
+	mutex_unlock(&connector->dev->mode_config.mutex);
 
 	return written;
 }
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 96e86cf..83efca9 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -118,7 +118,7 @@
 
 config DRM_EXYNOS_GSC
 	bool "GScaler"
-	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !ARCH_MULTIPLATFORM
+	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !VIDEO_SAMSUNG_EXYNOS_GSC
 	help
 	  Choose this option if you want to use Exynos GSC for DRM.
 
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index fbe1b31..c7362b9 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -21,11 +21,11 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
 
 #define WINDOWS_NR	3
-#define CURSOR_WIN	2
 #define MIN_FB_WIDTH_FOR_16WORD_BURST	128
 
 static const char * const decon_clks_name[] = {
@@ -56,6 +56,7 @@
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	void __iomem			*addr;
 	struct clk			*clks[ARRAY_SIZE(decon_clks_name)];
 	int				pipe;
@@ -71,6 +72,12 @@
 	DRM_FORMAT_ARGB8888,
 };
 
+static const enum drm_plane_type decon_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static inline void decon_set_bits(struct decon_context *ctx, u32 reg, u32 mask,
 				  u32 val)
 {
@@ -259,21 +266,24 @@
 static void decon_update_plane(struct exynos_drm_crtc *crtc,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct decon_context *ctx = crtc->ctx;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
+	dma_addr_t dma_addr = exynos_drm_fb_dma_addr(fb, 0);
 	u32 val;
 
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
-	val = COORDINATE_X(plane->crtc_x) | COORDINATE_Y(plane->crtc_y);
+	val = COORDINATE_X(state->crtc.x) | COORDINATE_Y(state->crtc.y);
 	writel(val, ctx->addr + DECON_VIDOSDxA(win));
 
-	val = COORDINATE_X(plane->crtc_x + plane->crtc_w - 1) |
-		COORDINATE_Y(plane->crtc_y + plane->crtc_h - 1);
+	val = COORDINATE_X(state->crtc.x + state->crtc.w - 1) |
+		COORDINATE_Y(state->crtc.y + state->crtc.h - 1);
 	writel(val, ctx->addr + DECON_VIDOSDxB(win));
 
 	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |
@@ -284,20 +294,20 @@
 		VIDOSD_Wx_ALPHA_B_F(0x0);
 	writel(val, ctx->addr + DECON_VIDOSDxD(win));
 
-	writel(plane->dma_addr[0], ctx->addr + DECON_VIDW0xADD0B0(win));
+	writel(dma_addr, ctx->addr + DECON_VIDW0xADD0B0(win));
 
-	val = plane->dma_addr[0] + pitch * plane->crtc_h;
+	val = dma_addr + pitch * state->src.h;
 	writel(val, ctx->addr + DECON_VIDW0xADD1B0(win));
 
 	if (ctx->out_type != IFTYPE_HDMI)
-		val = BIT_VAL(pitch - plane->crtc_w * bpp, 27, 14)
-			| BIT_VAL(plane->crtc_w * bpp, 13, 0);
+		val = BIT_VAL(pitch - state->crtc.w * bpp, 27, 14)
+			| BIT_VAL(state->crtc.w * bpp, 13, 0);
 	else
-		val = BIT_VAL(pitch - plane->crtc_w * bpp, 29, 15)
-			| BIT_VAL(plane->crtc_w * bpp, 14, 0);
+		val = BIT_VAL(pitch - state->crtc.w * bpp, 29, 15)
+			| BIT_VAL(state->crtc.w * bpp, 14, 0);
 	writel(val, ctx->addr + DECON_VIDW0xADD2(win));
 
-	decon_win_set_pixfmt(ctx, win, state->fb);
+	decon_win_set_pixfmt(ctx, win, fb);
 
 	/* window enable */
 	decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0);
@@ -377,20 +387,12 @@
 static void decon_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
-	int ret;
-	int i;
 
 	if (!test_and_clear_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
 	pm_runtime_get_sync(ctx->dev);
 
-	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++) {
-		ret = clk_prepare_enable(ctx->clks[i]);
-		if (ret < 0)
-			goto err;
-	}
-
 	set_bit(BIT_CLKS_ENABLED, &ctx->flags);
 
 	/* if vblank was enabled status, enable it again. */
@@ -399,11 +401,6 @@
 
 	decon_commit(ctx->crtc);
 
-	return;
-err:
-	while (--i >= 0)
-		clk_disable_unprepare(ctx->clks[i]);
-
 	set_bit(BIT_SUSPENDED, &ctx->flags);
 }
 
@@ -425,9 +422,6 @@
 
 	decon_swreset(ctx);
 
-	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
-		clk_disable_unprepare(ctx->clks[i]);
-
 	clear_bit(BIT_CLKS_ENABLED, &ctx->flags);
 
 	pm_runtime_put_sync(ctx->dev);
@@ -478,7 +472,6 @@
 static struct exynos_drm_crtc_ops decon_crtc_ops = {
 	.enable			= decon_enable,
 	.disable		= decon_disable,
-	.commit			= decon_commit,
 	.enable_vblank		= decon_enable_vblank,
 	.disable_vblank		= decon_disable_vblank,
 	.atomic_begin		= decon_atomic_begin,
@@ -495,7 +488,6 @@
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	struct exynos_drm_plane *exynos_plane;
 	enum exynos_drm_output_type out_type;
-	enum drm_plane_type type;
 	unsigned int win;
 	int ret;
 
@@ -505,10 +497,13 @@
 	for (win = ctx->first_win; win < WINDOWS_NR; win++) {
 		int tmp = (win == ctx->first_win) ? 0 : win;
 
-		type = exynos_plane_get_type(tmp, CURSOR_WIN);
+		ctx->configs[win].pixel_formats = decon_formats;
+		ctx->configs[win].num_pixel_formats = ARRAY_SIZE(decon_formats);
+		ctx->configs[win].zpos = win;
+		ctx->configs[win].type = decon_win_types[tmp];
+
 		ret = exynos_plane_init(drm_dev, &ctx->planes[win],
-				1 << ctx->pipe, type, decon_formats,
-				ARRAY_SIZE(decon_formats), win);
+					1 << ctx->pipe, &ctx->configs[win]);
 		if (ret)
 			return ret;
 	}
@@ -581,6 +576,44 @@
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_PM
+static int exynos5433_decon_suspend(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
+		clk_disable_unprepare(ctx->clks[i]);
+
+	return 0;
+}
+
+static int exynos5433_decon_resume(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++) {
+		ret = clk_prepare_enable(ctx->clks[i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	while (--i >= 0)
+		clk_disable_unprepare(ctx->clks[i]);
+
+	return ret;
+}
+#endif
+
+static const struct dev_pm_ops exynos5433_decon_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos5433_decon_suspend, exynos5433_decon_resume,
+			   NULL)
+};
+
 static const struct of_device_id exynos5433_decon_driver_dt_match[] = {
 	{
 		.compatible = "samsung,exynos5433-decon",
@@ -684,6 +717,7 @@
 	.remove		= exynos5433_decon_remove,
 	.driver		= {
 		.name	= "exynos5433-decon",
+		.pm	= &exynos5433_decon_pm_ops,
 		.of_match_table = exynos5433_decon_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index ead2b16..c47f9af 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -30,6 +30,7 @@
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
@@ -40,13 +41,13 @@
 #define MIN_FB_WIDTH_FOR_16WORD_BURST 128
 
 #define WINDOWS_NR	2
-#define CURSOR_WIN	1
 
 struct decon_context {
 	struct device			*dev;
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	struct clk			*pclk;
 	struct clk			*aclk;
 	struct clk			*eclk;
@@ -81,6 +82,11 @@
 	DRM_FORMAT_BGRA8888,
 };
 
+static const enum drm_plane_type decon_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static void decon_wait_for_vblank(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
@@ -119,13 +125,8 @@
 	}
 
 	/* Wait for vsync, as disable channel takes effect at next vsync */
-	if (ch_enabled) {
-		unsigned int state = ctx->suspended;
-
-		ctx->suspended = 0;
+	if (ch_enabled)
 		decon_wait_for_vblank(ctx->crtc);
-		ctx->suspended = state;
-	}
 }
 
 static int decon_ctx_initialize(struct decon_context *ctx,
@@ -398,16 +399,17 @@
 static void decon_update_plane(struct exynos_drm_crtc *crtc,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct decon_context *ctx = crtc->ctx;
-	struct drm_display_mode *mode = &crtc->base.state->adjusted_mode;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	int padding;
 	unsigned long val, alpha;
 	unsigned int last_x;
 	unsigned int last_y;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
 
 	if (ctx->suspended)
 		return;
@@ -423,41 +425,32 @@
 	 */
 
 	/* buffer start address */
-	val = (unsigned long)plane->dma_addr[0];
+	val = (unsigned long)exynos_drm_fb_dma_addr(fb, 0);
 	writel(val, ctx->regs + VIDW_BUF_START(win));
 
-	padding = (pitch / bpp) - state->fb->width;
+	padding = (pitch / bpp) - fb->width;
 
 	/* buffer size */
-	writel(state->fb->width + padding, ctx->regs + VIDW_WHOLE_X(win));
-	writel(state->fb->height, ctx->regs + VIDW_WHOLE_Y(win));
+	writel(fb->width + padding, ctx->regs + VIDW_WHOLE_X(win));
+	writel(fb->height, ctx->regs + VIDW_WHOLE_Y(win));
 
 	/* offset from the start of the buffer to read */
-	writel(plane->src_x, ctx->regs + VIDW_OFFSET_X(win));
-	writel(plane->src_y, ctx->regs + VIDW_OFFSET_Y(win));
+	writel(state->src.x, ctx->regs + VIDW_OFFSET_X(win));
+	writel(state->src.y, ctx->regs + VIDW_OFFSET_Y(win));
 
 	DRM_DEBUG_KMS("start addr = 0x%lx\n",
 			(unsigned long)val);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
-			plane->crtc_w, plane->crtc_h);
+			state->crtc.w, state->crtc.h);
 
-	/*
-	 * OSD position.
-	 * In case the window layout goes of LCD layout, DECON fails.
-	 */
-	if ((plane->crtc_x + plane->crtc_w) > mode->hdisplay)
-		plane->crtc_x = mode->hdisplay - plane->crtc_w;
-	if ((plane->crtc_y + plane->crtc_h) > mode->vdisplay)
-		plane->crtc_y = mode->vdisplay - plane->crtc_h;
-
-	val = VIDOSDxA_TOPLEFT_X(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y(plane->crtc_y);
+	val = VIDOSDxA_TOPLEFT_X(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y(state->crtc.y);
 	writel(val, ctx->regs + VIDOSD_A(win));
 
-	last_x = plane->crtc_x + plane->crtc_w;
+	last_x = state->crtc.x + state->crtc.w;
 	if (last_x)
 		last_x--;
-	last_y = plane->crtc_y + plane->crtc_h;
+	last_y = state->crtc.y + state->crtc.h;
 	if (last_y)
 		last_y--;
 
@@ -466,7 +459,7 @@
 	writel(val, ctx->regs + VIDOSD_B(win));
 
 	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
-			plane->crtc_x, plane->crtc_y, last_x, last_y);
+			state->crtc.x, state->crtc.y, last_x, last_y);
 
 	/* OSD alpha */
 	alpha = VIDOSDxC_ALPHA0_R_F(0x0) |
@@ -481,7 +474,7 @@
 
 	writel(alpha, ctx->regs + VIDOSD_D(win));
 
-	decon_win_set_pixfmt(ctx, win, state->fb);
+	decon_win_set_pixfmt(ctx, win, fb);
 
 	/* hardware window 0 doesn't support color key. */
 	if (win != 0)
@@ -555,39 +548,12 @@
 static void decon_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
-	int ret;
 
 	if (!ctx->suspended)
 		return;
 
-	ctx->suspended = false;
-
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(ctx->pclk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the pclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->aclk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the aclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->eclk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the eclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->vclk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the vclk [%d]\n", ret);
-		return;
-	}
-
 	decon_init(ctx);
 
 	/* if vblank was enabled status, enable it again. */
@@ -595,6 +561,8 @@
 		decon_enable_vblank(ctx->crtc);
 
 	decon_commit(ctx->crtc);
+
+	ctx->suspended = false;
 }
 
 static void decon_disable(struct exynos_drm_crtc *crtc)
@@ -613,11 +581,6 @@
 	for (i = 0; i < WINDOWS_NR; i++)
 		decon_disable_plane(crtc, &ctx->planes[i]);
 
-	clk_disable_unprepare(ctx->vclk);
-	clk_disable_unprepare(ctx->eclk);
-	clk_disable_unprepare(ctx->aclk);
-	clk_disable_unprepare(ctx->pclk);
-
 	pm_runtime_put_sync(ctx->dev);
 
 	ctx->suspended = true;
@@ -679,8 +642,7 @@
 	struct decon_context *ctx = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ret = decon_ctx_initialize(ctx, drm_dev);
@@ -689,11 +651,14 @@
 		return ret;
 	}
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, decon_formats,
-					ARRAY_SIZE(decon_formats), zpos);
+	for (i = 0; i < WINDOWS_NR; i++) {
+		ctx->configs[i].pixel_formats = decon_formats;
+		ctx->configs[i].num_pixel_formats = ARRAY_SIZE(decon_formats);
+		ctx->configs[i].zpos = i;
+		ctx->configs[i].type = decon_win_types[i];
+
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &ctx->configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -843,11 +808,63 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos7_decon_suspend(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(ctx->vclk);
+	clk_disable_unprepare(ctx->eclk);
+	clk_disable_unprepare(ctx->aclk);
+	clk_disable_unprepare(ctx->pclk);
+
+	return 0;
+}
+
+static int exynos7_decon_resume(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(ctx->pclk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the pclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->aclk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the aclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->eclk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the eclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->vclk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the vclk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos7_decon_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos7_decon_suspend, exynos7_decon_resume,
+			   NULL)
+};
+
 struct platform_driver decon_driver = {
 	.probe		= decon_probe,
 	.remove		= decon_remove,
 	.driver		= {
 		.name	= "exynos-decon",
+		.pm	= &exynos7_decon_pm_ops,
 		.of_match_table = decon_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index 124fb9a..793e497 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -1009,9 +1009,9 @@
 {
 	int ret;
 
-	encoder->bridge = dp->bridge;
-	dp->bridge->encoder = encoder;
-	ret = drm_bridge_attach(encoder->dev, dp->bridge);
+	encoder->bridge->next = dp->ptn_bridge;
+	dp->ptn_bridge->encoder = encoder;
+	ret = drm_bridge_attach(encoder->dev, dp->ptn_bridge);
 	if (ret) {
 		DRM_ERROR("Failed to attach bridge to drm\n");
 		return ret;
@@ -1020,14 +1020,15 @@
 	return 0;
 }
 
-static int exynos_dp_create_connector(struct drm_encoder *encoder)
+static int exynos_dp_bridge_attach(struct drm_bridge *bridge)
 {
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
+	struct drm_encoder *encoder = &dp->encoder;
 	struct drm_connector *connector = &dp->connector;
 	int ret;
 
 	/* Pre-empt DP connector creation if there's a bridge */
-	if (dp->bridge) {
+	if (dp->ptn_bridge) {
 		ret = exynos_drm_attach_lcd_bridge(dp, encoder);
 		if (!ret)
 			return 0;
@@ -1052,27 +1053,16 @@
 	return ret;
 }
 
-static bool exynos_dp_mode_fixup(struct drm_encoder *encoder,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode)
+static void exynos_dp_bridge_enable(struct drm_bridge *bridge)
 {
-	return true;
-}
-
-static void exynos_dp_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
-{
-}
-
-static void exynos_dp_enable(struct drm_encoder *encoder)
-{
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
 	struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
 
 	if (dp->dpms_mode == DRM_MODE_DPMS_ON)
 		return;
 
+	pm_runtime_get_sync(dp->dev);
+
 	if (dp->panel) {
 		if (drm_panel_prepare(dp->panel)) {
 			DRM_ERROR("failed to setup the panel\n");
@@ -1083,7 +1073,6 @@
 	if (crtc->ops->clock_enable)
 		crtc->ops->clock_enable(dp_to_crtc(dp), true);
 
-	clk_prepare_enable(dp->clock);
 	phy_power_on(dp->phy);
 	exynos_dp_init_dp(dp);
 	enable_irq(dp->irq);
@@ -1092,9 +1081,9 @@
 	dp->dpms_mode = DRM_MODE_DPMS_ON;
 }
 
-static void exynos_dp_disable(struct drm_encoder *encoder)
+static void exynos_dp_bridge_disable(struct drm_bridge *bridge)
 {
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
 	struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
 
 	if (dp->dpms_mode != DRM_MODE_DPMS_ON)
@@ -1110,7 +1099,6 @@
 	disable_irq(dp->irq);
 	flush_work(&dp->hotplug_work);
 	phy_power_off(dp->phy);
-	clk_disable_unprepare(dp->clock);
 
 	if (crtc->ops->clock_enable)
 		crtc->ops->clock_enable(dp_to_crtc(dp), false);
@@ -1120,9 +1108,74 @@
 			DRM_ERROR("failed to turnoff the panel\n");
 	}
 
+	pm_runtime_put_sync(dp->dev);
+
 	dp->dpms_mode = DRM_MODE_DPMS_OFF;
 }
 
+static void exynos_dp_bridge_nop(struct drm_bridge *bridge)
+{
+	/* do nothing */
+}
+
+static const struct drm_bridge_funcs exynos_dp_bridge_funcs = {
+	.enable = exynos_dp_bridge_enable,
+	.disable = exynos_dp_bridge_disable,
+	.pre_enable = exynos_dp_bridge_nop,
+	.post_disable = exynos_dp_bridge_nop,
+	.attach = exynos_dp_bridge_attach,
+};
+
+static int exynos_dp_create_connector(struct drm_encoder *encoder)
+{
+	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct drm_device *drm_dev = dp->drm_dev;
+	struct drm_bridge *bridge;
+	int ret;
+
+	bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL);
+	if (!bridge) {
+		DRM_ERROR("failed to allocate for drm bridge\n");
+		return -ENOMEM;
+	}
+
+	dp->bridge = bridge;
+
+	encoder->bridge = bridge;
+	bridge->driver_private = dp;
+	bridge->encoder = encoder;
+	bridge->funcs = &exynos_dp_bridge_funcs;
+
+	ret = drm_bridge_attach(drm_dev, bridge);
+	if (ret) {
+		DRM_ERROR("failed to attach drm bridge\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool exynos_dp_mode_fixup(struct drm_encoder *encoder,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void exynos_dp_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+}
+
+static void exynos_dp_enable(struct drm_encoder *encoder)
+{
+}
+
+static void exynos_dp_disable(struct drm_encoder *encoder)
+{
+}
+
 static struct drm_encoder_helper_funcs exynos_dp_encoder_helper_funcs = {
 	.mode_fixup = exynos_dp_mode_fixup,
 	.mode_set = exynos_dp_mode_set,
@@ -1238,7 +1291,7 @@
 		}
 	}
 
-	if (!dp->panel && !dp->bridge) {
+	if (!dp->panel && !dp->ptn_bridge) {
 		ret = exynos_dp_dt_parse_panel(dp);
 		if (ret)
 			return ret;
@@ -1289,10 +1342,6 @@
 
 	INIT_WORK(&dp->hotplug_work, exynos_dp_hotplug);
 
-	phy_power_on(dp->phy);
-
-	exynos_dp_init_dp(dp);
-
 	ret = devm_request_irq(&pdev->dev, dp->irq, exynos_dp_irq_handler,
 			irq_flags, "exynos-dp", dp);
 	if (ret) {
@@ -1313,7 +1362,7 @@
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_dp_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dp_encoder_helper_funcs);
 
@@ -1343,8 +1392,9 @@
 static int exynos_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *panel_node, *bridge_node, *endpoint;
+	struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
 	struct exynos_dp_device *dp;
+	int ret;
 
 	dp = devm_kzalloc(&pdev->dev, sizeof(struct exynos_dp_device),
 				GFP_KERNEL);
@@ -1353,36 +1403,96 @@
 
 	platform_set_drvdata(pdev, dp);
 
+	/* This is for the backward compatibility. */
 	panel_node = of_parse_phandle(dev->of_node, "panel", 0);
 	if (panel_node) {
 		dp->panel = of_drm_find_panel(panel_node);
 		of_node_put(panel_node);
 		if (!dp->panel)
 			return -EPROBE_DEFER;
+	} else {
+		endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
+		if (endpoint) {
+			panel_node = of_graph_get_remote_port_parent(endpoint);
+			if (panel_node) {
+				dp->panel = of_drm_find_panel(panel_node);
+				of_node_put(panel_node);
+				if (!dp->panel)
+					return -EPROBE_DEFER;
+			} else {
+				DRM_ERROR("no port node for panel device.\n");
+				return -EINVAL;
+			}
+		}
 	}
 
+	if (endpoint)
+		goto out;
+
 	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
 	if (endpoint) {
 		bridge_node = of_graph_get_remote_port_parent(endpoint);
 		if (bridge_node) {
-			dp->bridge = of_drm_find_bridge(bridge_node);
+			dp->ptn_bridge = of_drm_find_bridge(bridge_node);
 			of_node_put(bridge_node);
-			if (!dp->bridge)
+			if (!dp->ptn_bridge)
 				return -EPROBE_DEFER;
 		} else
 			return -EPROBE_DEFER;
 	}
 
-	return component_add(&pdev->dev, &exynos_dp_ops);
+out:
+	pm_runtime_enable(dev);
+
+	ret = component_add(&pdev->dev, &exynos_dp_ops);
+	if (ret)
+		goto err_disable_pm_runtime;
+
+	return ret;
+
+err_disable_pm_runtime:
+	pm_runtime_disable(dev);
+
+	return ret;
 }
 
 static int exynos_dp_remove(struct platform_device *pdev)
 {
+	pm_runtime_disable(&pdev->dev);
 	component_del(&pdev->dev, &exynos_dp_ops);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_dp_suspend(struct device *dev)
+{
+	struct exynos_dp_device *dp = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dp->clock);
+
+	return 0;
+}
+
+static int exynos_dp_resume(struct device *dev)
+{
+	struct exynos_dp_device *dp = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(dp->clock);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_dp_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_dp_suspend, exynos_dp_resume, NULL)
+};
+
 static const struct of_device_id exynos_dp_match[] = {
 	{ .compatible = "samsung,exynos5-dp" },
 	{},
@@ -1395,6 +1505,7 @@
 	.driver		= {
 		.name	= "exynos-dp",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_dp_pm_ops,
 		.of_match_table = exynos_dp_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h
index e413b6f..66eec4b 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.h
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.h
@@ -153,6 +153,7 @@
 	struct drm_connector	connector;
 	struct drm_panel	*panel;
 	struct drm_bridge	*bridge;
+	struct drm_bridge	*ptn_bridge;
 	struct clk		*clock;
 	unsigned int		irq;
 	void __iomem		*reg_base;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index b3ba27f..9d30a0f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -150,7 +150,7 @@
 	private->crtc[pipe] = crtc;
 
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, plane, NULL,
-					&exynos_crtc_funcs);
+					&exynos_crtc_funcs, NULL);
 	if (ret < 0)
 		goto err_crtc;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index c748b87..1dbf8dc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -309,7 +309,7 @@
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(dev, encoder, &exynos_dpi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dpi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 2c6019d..9756797a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -304,45 +304,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_drm_suspend(struct drm_device *dev, pm_message_t state)
-{
-	struct drm_connector *connector;
-
-	drm_modeset_lock_all(dev);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		int old_dpms = connector->dpms;
-
-		if (connector->funcs->dpms)
-			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
-
-		/* Set the old mode back to the connector for resume */
-		connector->dpms = old_dpms;
-	}
-	drm_modeset_unlock_all(dev);
-
-	return 0;
-}
-
-static int exynos_drm_resume(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-
-	drm_modeset_lock_all(dev);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->funcs->dpms) {
-			int dpms = connector->dpms;
-
-			connector->dpms = DRM_MODE_DPMS_OFF;
-			connector->funcs->dpms(connector, dpms);
-		}
-	}
-	drm_modeset_unlock_all(dev);
-
-	return 0;
-}
-#endif
-
 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_exynos_file_private *file_priv;
@@ -476,31 +437,54 @@
 };
 
 #ifdef CONFIG_PM_SLEEP
-static int exynos_drm_sys_suspend(struct device *dev)
+static int exynos_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	pm_message_t message;
+	struct drm_connector *connector;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	message.event = PM_EVENT_SUSPEND;
-	return exynos_drm_suspend(drm_dev, message);
+	drm_modeset_lock_all(drm_dev);
+	drm_for_each_connector(connector, drm_dev) {
+		int old_dpms = connector->dpms;
+
+		if (connector->funcs->dpms)
+			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
+
+		/* Set the old mode back to the connector for resume */
+		connector->dpms = old_dpms;
+	}
+	drm_modeset_unlock_all(drm_dev);
+
+	return 0;
 }
 
-static int exynos_drm_sys_resume(struct device *dev)
+static int exynos_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct drm_connector *connector;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	return exynos_drm_resume(drm_dev);
+	drm_modeset_lock_all(drm_dev);
+	drm_for_each_connector(connector, drm_dev) {
+		if (connector->funcs->dpms) {
+			int dpms = connector->dpms;
+
+			connector->dpms = DRM_MODE_DPMS_OFF;
+			connector->funcs->dpms(connector, dpms);
+		}
+	}
+	drm_modeset_unlock_all(drm_dev);
+
+	return 0;
 }
 #endif
 
 static const struct dev_pm_ops exynos_drm_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(exynos_drm_sys_suspend, exynos_drm_sys_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(exynos_drm_suspend, exynos_drm_resume)
 };
 
 /* forward declaration */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index f1eda7f..82bbd7f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -38,24 +38,44 @@
 	EXYNOS_DISPLAY_TYPE_VIDI,
 };
 
+struct exynos_drm_rect {
+	unsigned int x, y;
+	unsigned int w, h;
+};
+
+/*
+ * Exynos drm plane state structure.
+ *
+ * @base: plane_state object (contains drm_framebuffer pointer)
+ * @src: rectangle of the source image data to be displayed (clipped to
+ *       visible part).
+ * @crtc: rectangle of the target image position on hardware screen
+ *       (clipped to visible part).
+ * @h_ratio: horizontal scaling ratio, 16.16 fixed point
+ * @v_ratio: vertical scaling ratio, 16.16 fixed point
+ *
+ * this structure consists plane state data that will be applied to hardware
+ * specific overlay info.
+ */
+
+struct exynos_drm_plane_state {
+	struct drm_plane_state base;
+	struct exynos_drm_rect crtc;
+	struct exynos_drm_rect src;
+	unsigned int h_ratio;
+	unsigned int v_ratio;
+};
+
+static inline struct exynos_drm_plane_state *
+to_exynos_plane_state(struct drm_plane_state *state)
+{
+	return container_of(state, struct exynos_drm_plane_state, base);
+}
+
 /*
  * Exynos drm common overlay structure.
  *
  * @base: plane object
- * @src_x: offset x on a framebuffer to be displayed.
- *	- the unit is screen coordinates.
- * @src_y: offset y on a framebuffer to be displayed.
- *	- the unit is screen coordinates.
- * @src_w: width of a partial image to be displayed from framebuffer.
- * @src_h: height of a partial image to be displayed from framebuffer.
- * @crtc_x: offset x on hardware screen.
- * @crtc_y: offset y on hardware screen.
- * @crtc_w: window width to be displayed (hardware screen).
- * @crtc_h: window height to be displayed (hardware screen).
- * @h_ratio: horizontal scaling ratio, 16.16 fixed point
- * @v_ratio: vertical scaling ratio, 16.16 fixed point
- * @dma_addr: array of bus(accessed by dma) address to the memory region
- *	      allocated for a overlay.
  * @zpos: order of overlay layer(z position).
  *
  * this structure is common to exynos SoC and its contents would be copied
@@ -64,21 +84,32 @@
 
 struct exynos_drm_plane {
 	struct drm_plane base;
-	unsigned int src_x;
-	unsigned int src_y;
-	unsigned int src_w;
-	unsigned int src_h;
-	unsigned int crtc_x;
-	unsigned int crtc_y;
-	unsigned int crtc_w;
-	unsigned int crtc_h;
-	unsigned int h_ratio;
-	unsigned int v_ratio;
-	dma_addr_t dma_addr[MAX_FB_BUFFER];
+	const struct exynos_drm_plane_config *config;
 	unsigned int zpos;
 	struct drm_framebuffer *pending_fb;
 };
 
+#define EXYNOS_DRM_PLANE_CAP_DOUBLE	(1 << 0)
+#define EXYNOS_DRM_PLANE_CAP_SCALE	(1 << 1)
+
+/*
+ * Exynos DRM plane configuration structure.
+ *
+ * @zpos: z-position of the plane.
+ * @type: type of the plane (primary, cursor or overlay).
+ * @pixel_formats: supported pixel formats.
+ * @num_pixel_formats: number of elements in 'pixel_formats'.
+ * @capabilities: supported features (see EXYNOS_DRM_PLANE_CAP_*)
+ */
+
+struct exynos_drm_plane_config {
+	unsigned int zpos;
+	enum drm_plane_type type;
+	const uint32_t *pixel_formats;
+	unsigned int num_pixel_formats;
+	unsigned int capabilities;
+};
+
 /*
  * Exynos drm crtc ops
  *
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 12b03b3..bc09bba 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1458,66 +1458,6 @@
 	.transfer = exynos_dsi_host_transfer,
 };
 
-static int exynos_dsi_poweron(struct exynos_dsi *dsi)
-{
-	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
-	int ret, i;
-
-	ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-	if (ret < 0) {
-		dev_err(dsi->dev, "cannot enable regulators %d\n", ret);
-		return ret;
-	}
-
-	for (i = 0; i < driver_data->num_clks; i++) {
-		ret = clk_prepare_enable(dsi->clks[i]);
-		if (ret < 0)
-			goto err_clk;
-	}
-
-	ret = phy_power_on(dsi->phy);
-	if (ret < 0) {
-		dev_err(dsi->dev, "cannot enable phy %d\n", ret);
-		goto err_clk;
-	}
-
-	return 0;
-
-err_clk:
-	while (--i > -1)
-		clk_disable_unprepare(dsi->clks[i]);
-	regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-
-	return ret;
-}
-
-static void exynos_dsi_poweroff(struct exynos_dsi *dsi)
-{
-	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
-	int ret, i;
-
-	usleep_range(10000, 20000);
-
-	if (dsi->state & DSIM_STATE_INITIALIZED) {
-		dsi->state &= ~DSIM_STATE_INITIALIZED;
-
-		exynos_dsi_disable_clock(dsi);
-
-		exynos_dsi_disable_irq(dsi);
-	}
-
-	dsi->state &= ~DSIM_STATE_CMD_LPM;
-
-	phy_power_off(dsi->phy);
-
-	for (i = driver_data->num_clks - 1; i > -1; i--)
-		clk_disable_unprepare(dsi->clks[i]);
-
-	ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-	if (ret < 0)
-		dev_err(dsi->dev, "cannot disable regulators %d\n", ret);
-}
-
 static void exynos_dsi_enable(struct drm_encoder *encoder)
 {
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1526,16 +1466,14 @@
 	if (dsi->state & DSIM_STATE_ENABLED)
 		return;
 
-	ret = exynos_dsi_poweron(dsi);
-	if (ret < 0)
-		return;
+	pm_runtime_get_sync(dsi->dev);
 
 	dsi->state |= DSIM_STATE_ENABLED;
 
 	ret = drm_panel_prepare(dsi->panel);
 	if (ret < 0) {
 		dsi->state &= ~DSIM_STATE_ENABLED;
-		exynos_dsi_poweroff(dsi);
+		pm_runtime_put_sync(dsi->dev);
 		return;
 	}
 
@@ -1547,7 +1485,7 @@
 		dsi->state &= ~DSIM_STATE_ENABLED;
 		exynos_dsi_set_display_enable(dsi, false);
 		drm_panel_unprepare(dsi->panel);
-		exynos_dsi_poweroff(dsi);
+		pm_runtime_put_sync(dsi->dev);
 		return;
 	}
 
@@ -1569,7 +1507,7 @@
 
 	dsi->state &= ~DSIM_STATE_ENABLED;
 
-	exynos_dsi_poweroff(dsi);
+	pm_runtime_put_sync(dsi->dev);
 }
 
 static enum drm_connector_status
@@ -1797,13 +1735,13 @@
 
 	ep = of_graph_get_next_endpoint(node, NULL);
 	if (!ep) {
-		ret = -ENXIO;
+		ret = -EINVAL;
 		goto end;
 	}
 
 	dsi->bridge_node = of_graph_get_remote_port_parent(ep);
 	if (!dsi->bridge_node) {
-		ret = -ENXIO;
+		ret = -EINVAL;
 		goto end;
 	}
 end:
@@ -1831,7 +1769,7 @@
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dsi_encoder_helper_funcs);
 
@@ -1954,22 +1892,99 @@
 
 	platform_set_drvdata(pdev, &dsi->encoder);
 
+	pm_runtime_enable(dev);
+
 	return component_add(dev, &exynos_dsi_component_ops);
 }
 
 static int exynos_dsi_remove(struct platform_device *pdev)
 {
+	pm_runtime_disable(&pdev->dev);
+
 	component_del(&pdev->dev, &exynos_dsi_component_ops);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_dsi_suspend(struct device *dev)
+{
+	struct drm_encoder *encoder = dev_get_drvdata(dev);
+	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
+	int ret, i;
+
+	usleep_range(10000, 20000);
+
+	if (dsi->state & DSIM_STATE_INITIALIZED) {
+		dsi->state &= ~DSIM_STATE_INITIALIZED;
+
+		exynos_dsi_disable_clock(dsi);
+
+		exynos_dsi_disable_irq(dsi);
+	}
+
+	dsi->state &= ~DSIM_STATE_CMD_LPM;
+
+	phy_power_off(dsi->phy);
+
+	for (i = driver_data->num_clks - 1; i > -1; i--)
+		clk_disable_unprepare(dsi->clks[i]);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+	if (ret < 0)
+		dev_err(dsi->dev, "cannot disable regulators %d\n", ret);
+
+	return 0;
+}
+
+static int exynos_dsi_resume(struct device *dev)
+{
+	struct drm_encoder *encoder = dev_get_drvdata(dev);
+	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
+	int ret, i;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+	if (ret < 0) {
+		dev_err(dsi->dev, "cannot enable regulators %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < driver_data->num_clks; i++) {
+		ret = clk_prepare_enable(dsi->clks[i]);
+		if (ret < 0)
+			goto err_clk;
+	}
+
+	ret = phy_power_on(dsi->phy);
+	if (ret < 0) {
+		dev_err(dsi->dev, "cannot enable phy %d\n", ret);
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	while (--i > -1)
+		clk_disable_unprepare(dsi->clks[i]);
+	regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+
+	return ret;
+}
+#endif
+
+static const struct dev_pm_ops exynos_dsi_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
+};
+
 struct platform_driver dsi_driver = {
 	.probe = exynos_dsi_probe,
 	.remove = exynos_dsi_remove,
 	.driver = {
 		   .name = "exynos-dsi",
 		   .owner = THIS_MODULE,
+		   .pm = &exynos_dsi_pm_ops,
 		   .of_match_table = exynos_dsi_of_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index fcea28b..f6bdb0d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -37,6 +37,7 @@
 struct exynos_drm_fb {
 	struct drm_framebuffer	fb;
 	struct exynos_drm_gem	*exynos_gem[MAX_FB_BUFFER];
+	dma_addr_t			dma_addr[MAX_FB_BUFFER];
 };
 
 static int check_fb_gem_memory_type(struct drm_device *drm_dev,
@@ -117,7 +118,7 @@
 
 struct drm_framebuffer *
 exynos_drm_framebuffer_init(struct drm_device *dev,
-			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    const struct drm_mode_fb_cmd2 *mode_cmd,
 			    struct exynos_drm_gem **exynos_gem,
 			    int count)
 {
@@ -135,6 +136,8 @@
 			goto err;
 
 		exynos_fb->exynos_gem[i] = exynos_gem[i];
+		exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr
+						+ mode_cmd->offsets[i];
 	}
 
 	drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
@@ -154,7 +157,7 @@
 
 static struct drm_framebuffer *
 exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
-		      struct drm_mode_fb_cmd2 *mode_cmd)
+		      const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
 	struct drm_gem_object *obj;
@@ -189,21 +192,14 @@
 	return ERR_PTR(ret);
 }
 
-struct exynos_drm_gem *exynos_drm_fb_gem(struct drm_framebuffer *fb, int index)
+dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index)
 {
 	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
-	struct exynos_drm_gem *exynos_gem;
 
 	if (index >= MAX_FB_BUFFER)
-		return NULL;
+		return DMA_ERROR_CODE;
 
-	exynos_gem = exynos_fb->exynos_gem[index];
-	if (!exynos_gem)
-		return NULL;
-
-	DRM_DEBUG_KMS("dma_addr: 0x%lx\n", (unsigned long)exynos_gem->dma_addr);
-
-	return exynos_gem;
+	return exynos_fb->dma_addr[index];
 }
 
 static void exynos_drm_output_poll_changed(struct drm_device *dev)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
index 726a2d4..4aae9dd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -18,12 +18,11 @@
 
 struct drm_framebuffer *
 exynos_drm_framebuffer_init(struct drm_device *dev,
-			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    const struct drm_mode_fb_cmd2 *mode_cmd,
 			    struct exynos_drm_gem **exynos_gem,
 			    int count);
 
-/* get gem object of a drm framebuffer */
-struct exynos_drm_gem *exynos_drm_fb_gem(struct drm_framebuffer *fb, int index);
+dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index);
 
 void exynos_drm_mode_config_init(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index bd75c15..2e224712 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -29,6 +29,7 @@
 #include <drm/exynos_drm.h>
 
 #include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
@@ -87,7 +88,6 @@
 
 /* FIMD has totally five hardware windows. */
 #define WINDOWS_NR	5
-#define CURSOR_WIN	4
 
 struct fimd_driver_data {
 	unsigned int timing_base;
@@ -150,6 +150,7 @@
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	struct clk			*bus_clk;
 	struct clk			*lcd_clk;
 	void __iomem			*regs;
@@ -187,6 +188,14 @@
 };
 MODULE_DEVICE_TABLE(of, fimd_driver_dt_match);
 
+static const enum drm_plane_type fimd_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static const uint32_t fimd_formats[] = {
 	DRM_FORMAT_C8,
 	DRM_FORMAT_XRGB1555,
@@ -478,7 +487,7 @@
 
 
 static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
-				struct drm_framebuffer *fb)
+				uint32_t pixel_format, int width)
 {
 	unsigned long val;
 
@@ -489,11 +498,11 @@
 	 * So the request format is ARGB8888 then change it to XRGB8888.
 	 */
 	if (ctx->driver_data->has_limited_fmt && !win) {
-		if (fb->pixel_format == DRM_FORMAT_ARGB8888)
-			fb->pixel_format = DRM_FORMAT_XRGB8888;
+		if (pixel_format == DRM_FORMAT_ARGB8888)
+			pixel_format = DRM_FORMAT_XRGB8888;
 	}
 
-	switch (fb->pixel_format) {
+	switch (pixel_format) {
 	case DRM_FORMAT_C8:
 		val |= WINCON0_BPPMODE_8BPP_PALETTE;
 		val |= WINCONx_BURSTLEN_8WORD;
@@ -529,17 +538,15 @@
 		break;
 	}
 
-	DRM_DEBUG_KMS("bpp = %d\n", fb->bits_per_pixel);
-
 	/*
-	 * In case of exynos, setting dma-burst to 16Word causes permanent
-	 * tearing for very small buffers, e.g. cursor buffer. Burst Mode
-	 * switching which is based on plane size is not recommended as
-	 * plane size varies alot towards the end of the screen and rapid
-	 * movement causes unstable DMA which results into iommu crash/tear.
+	 * Setting dma-burst to 16Word causes permanent tearing for very small
+	 * buffers, e.g. cursor buffer. Burst Mode switching which based on
+	 * plane size is not recommended as plane size varies alot towards the
+	 * end of the screen and rapid movement causes unstable DMA, but it is
+	 * still better to change dma-burst than displaying garbage.
 	 */
 
-	if (fb->width < MIN_FB_WIDTH_FOR_16WORD_BURST) {
+	if (width < MIN_FB_WIDTH_FOR_16WORD_BURST) {
 		val &= ~WINCONx_BURSTLEN_MASK;
 		val |= WINCONx_BURSTLEN_4WORD;
 	}
@@ -640,39 +647,41 @@
 static void fimd_update_plane(struct exynos_drm_crtc *crtc,
 			      struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct fimd_context *ctx = crtc->ctx;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	dma_addr_t dma_addr;
 	unsigned long val, size, offset;
 	unsigned int last_x, last_y, buf_offsize, line_size;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
 
 	if (ctx->suspended)
 		return;
 
-	offset = plane->src_x * bpp;
-	offset += plane->src_y * pitch;
+	offset = state->src.x * bpp;
+	offset += state->src.y * pitch;
 
 	/* buffer start address */
-	dma_addr = plane->dma_addr[0] + offset;
+	dma_addr = exynos_drm_fb_dma_addr(fb, 0) + offset;
 	val = (unsigned long)dma_addr;
 	writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
 
 	/* buffer end address */
-	size = pitch * plane->crtc_h;
+	size = pitch * state->crtc.h;
 	val = (unsigned long)(dma_addr + size);
 	writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
 
 	DRM_DEBUG_KMS("start addr = 0x%lx, end addr = 0x%lx, size = 0x%lx\n",
 			(unsigned long)dma_addr, val, size);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
-			plane->crtc_w, plane->crtc_h);
+			state->crtc.w, state->crtc.h);
 
 	/* buffer size */
-	buf_offsize = pitch - (plane->crtc_w * bpp);
-	line_size = plane->crtc_w * bpp;
+	buf_offsize = pitch - (state->crtc.w * bpp);
+	line_size = state->crtc.w * bpp;
 	val = VIDW_BUF_SIZE_OFFSET(buf_offsize) |
 		VIDW_BUF_SIZE_PAGEWIDTH(line_size) |
 		VIDW_BUF_SIZE_OFFSET_E(buf_offsize) |
@@ -680,16 +689,16 @@
 	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
 
 	/* OSD position */
-	val = VIDOSDxA_TOPLEFT_X(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y(plane->crtc_y) |
-		VIDOSDxA_TOPLEFT_X_E(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y_E(plane->crtc_y);
+	val = VIDOSDxA_TOPLEFT_X(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y(state->crtc.y) |
+		VIDOSDxA_TOPLEFT_X_E(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y_E(state->crtc.y);
 	writel(val, ctx->regs + VIDOSD_A(win));
 
-	last_x = plane->crtc_x + plane->crtc_w;
+	last_x = state->crtc.x + state->crtc.w;
 	if (last_x)
 		last_x--;
-	last_y = plane->crtc_y + plane->crtc_h;
+	last_y = state->crtc.y + state->crtc.h;
 	if (last_y)
 		last_y--;
 
@@ -699,20 +708,20 @@
 	writel(val, ctx->regs + VIDOSD_B(win));
 
 	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
-			plane->crtc_x, plane->crtc_y, last_x, last_y);
+			state->crtc.x, state->crtc.y, last_x, last_y);
 
 	/* OSD size */
 	if (win != 3 && win != 4) {
 		u32 offset = VIDOSD_D(win);
 		if (win == 0)
 			offset = VIDOSD_C(win);
-		val = plane->crtc_w * plane->crtc_h;
+		val = state->crtc.w * state->crtc.h;
 		writel(val, ctx->regs + offset);
 
 		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
 	}
 
-	fimd_win_set_pixfmt(ctx, win, state->fb);
+	fimd_win_set_pixfmt(ctx, win, fb->pixel_format, state->src.w);
 
 	/* hardware window 0 doesn't support color key. */
 	if (win != 0)
@@ -745,7 +754,6 @@
 static void fimd_enable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
-	int ret;
 
 	if (!ctx->suspended)
 		return;
@@ -754,18 +762,6 @@
 
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(ctx->bus_clk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the bus clk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->lcd_clk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the lcd clk [%d]\n", ret);
-		return;
-	}
-
 	/* if vblank was enabled status, enable it again. */
 	if (test_and_clear_bit(0, &ctx->irq_flags))
 		fimd_enable_vblank(ctx->crtc);
@@ -795,11 +791,7 @@
 
 	writel(0, ctx->regs + VIDCON0);
 
-	clk_disable_unprepare(ctx->lcd_clk);
-	clk_disable_unprepare(ctx->bus_clk);
-
 	pm_runtime_put_sync(ctx->dev);
-
 	ctx->suspended = true;
 }
 
@@ -941,18 +933,19 @@
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ctx->drm_dev = drm_dev;
 	ctx->pipe = priv->pipe++;
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, fimd_formats,
-					ARRAY_SIZE(fimd_formats), zpos);
+	for (i = 0; i < WINDOWS_NR; i++) {
+		ctx->configs[i].pixel_formats = fimd_formats;
+		ctx->configs[i].num_pixel_formats = ARRAY_SIZE(fimd_formats);
+		ctx->configs[i].zpos = i;
+		ctx->configs[i].type = fimd_win_types[i];
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &ctx->configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -1121,12 +1114,49 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_fimd_suspend(struct device *dev)
+{
+	struct fimd_context *ctx = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(ctx->lcd_clk);
+	clk_disable_unprepare(ctx->bus_clk);
+
+	return 0;
+}
+
+static int exynos_fimd_resume(struct device *dev)
+{
+	struct fimd_context *ctx = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(ctx->bus_clk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the bus clk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->lcd_clk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the lcd clk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_fimd_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_fimd_suspend, exynos_fimd_resume, NULL)
+};
+
 struct platform_driver fimd_driver = {
 	.probe		= fimd_probe,
 	.remove		= fimd_remove,
 	.driver		= {
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_fimd_pm_ops,
 		.of_match_table = fimd_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 37ab8b2..9ca5047 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -55,8 +55,6 @@
 	struct sg_table		*sgt;
 };
 
-struct page **exynos_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask);
-
 /* destroy a buffer with gem object */
 void exynos_drm_gem_destroy(struct exynos_drm_gem *exynos_gem);
 
@@ -91,10 +89,6 @@
 					unsigned int gem_handle,
 					struct drm_file *filp);
 
-/* map user space allocated by malloc to pages. */
-int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data,
-				      struct drm_file *file_priv);
-
 /* get buffer information to memory region allocated by gem. */
 int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
@@ -123,28 +117,6 @@
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
-static inline int vma_is_io(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
-}
-
-/* get a copy of a virtual memory region. */
-struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma);
-
-/* release a userspace virtual memory area. */
-void exynos_gem_put_vma(struct vm_area_struct *vma);
-
-/* get pages from user space. */
-int exynos_gem_get_pages_from_userptr(unsigned long start,
-						unsigned int npages,
-						struct page **pages,
-						struct vm_area_struct *vma);
-
-/* drop the reference to pages. */
-void exynos_gem_put_pages_to_userptr(struct page **pages,
-					unsigned int npages,
-					struct vm_area_struct *vma);
-
 /* map sgt with dma region. */
 int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
 				struct sg_table *sgt,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 11b87d2..7aecd23 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -15,7 +15,8 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
-#include <plat/map-base.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
@@ -126,6 +127,7 @@
  * @ippdrv: prepare initialization using ippdrv.
  * @regs_res: register resources.
  * @regs: memory mapped io registers.
+ * @sysreg: handle to SYSREG block regmap.
  * @lock: locking of operations.
  * @gsc_clk: gsc gate clock.
  * @sc: scaler infomations.
@@ -138,6 +140,7 @@
 	struct exynos_drm_ippdrv	ippdrv;
 	struct resource	*regs_res;
 	void __iomem	*regs;
+	struct regmap	*sysreg;
 	struct mutex	lock;
 	struct clk	*gsc_clk;
 	struct gsc_scaler	sc;
@@ -437,9 +440,12 @@
 
 static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
 {
-	u32 gscblk_cfg;
+	unsigned int gscblk_cfg;
 
-	gscblk_cfg = readl(SYSREG_GSCBLK_CFG1);
+	if (!ctx->sysreg)
+		return;
+
+	regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg);
 
 	if (enable)
 		gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
@@ -448,7 +454,7 @@
 	else
 		gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
 
-	writel(gscblk_cfg, SYSREG_GSCBLK_CFG1);
+	regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg);
 }
 
 static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
@@ -1215,10 +1221,10 @@
 	DRM_DEBUG_KMS("enable[%d]\n", enable);
 
 	if (enable) {
-		clk_enable(ctx->gsc_clk);
+		clk_prepare_enable(ctx->gsc_clk);
 		ctx->suspended = false;
 	} else {
-		clk_disable(ctx->gsc_clk);
+		clk_disable_unprepare(ctx->gsc_clk);
 		ctx->suspended = true;
 	}
 
@@ -1663,6 +1669,15 @@
 	if (!ctx)
 		return -ENOMEM;
 
+	if (dev->of_node) {
+		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
+							"samsung,sysreg");
+		if (IS_ERR(ctx->sysreg)) {
+			dev_warn(dev, "failed to get system register.\n");
+			ctx->sysreg = NULL;
+		}
+	}
+
 	/* clock control */
 	ctx->gsc_clk = devm_clk_get(dev, "gscl");
 	if (IS_ERR(ctx->gsc_clk)) {
@@ -1713,7 +1728,6 @@
 	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
 
-	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	ret = exynos_drm_ippdrv_register(ippdrv);
@@ -1797,6 +1811,12 @@
 	SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
 };
 
+static const struct of_device_id exynos_drm_gsc_of_match[] = {
+	{ .compatible = "samsung,exynos5-gsc" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
+
 struct platform_driver gsc_driver = {
 	.probe		= gsc_probe,
 	.remove		= gsc_remove,
@@ -1804,6 +1824,7 @@
 		.name	= "exynos-drm-gsc",
 		.owner	= THIS_MODULE,
 		.pm	= &gsc_pm_ops,
+		.of_match_table = of_match_ptr(exynos_drm_gsc_of_match),
 	},
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 1793117..e668fcd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -56,93 +56,170 @@
 	return size;
 }
 
-static void exynos_plane_mode_set(struct drm_plane *plane,
-				  struct drm_crtc *crtc,
-				  struct drm_framebuffer *fb,
-				  int crtc_x, int crtc_y,
-				  unsigned int crtc_w, unsigned int crtc_h,
-				  uint32_t src_x, uint32_t src_y,
-				  uint32_t src_w, uint32_t src_h)
+static void exynos_plane_mode_set(struct exynos_drm_plane_state *exynos_state)
+
 {
-	struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane);
+	struct drm_plane_state *state = &exynos_state->base;
+	struct drm_crtc *crtc = exynos_state->base.crtc;
 	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	int crtc_x, crtc_y;
+	unsigned int crtc_w, crtc_h;
+	unsigned int src_x, src_y;
+	unsigned int src_w, src_h;
 	unsigned int actual_w;
 	unsigned int actual_h;
 
+	/*
+	 * The original src/dest coordinates are stored in exynos_state->base,
+	 * but we want to keep another copy internal to our driver that we can
+	 * clip/modify ourselves.
+	 */
+
+	crtc_x = state->crtc_x;
+	crtc_y = state->crtc_y;
+	crtc_w = state->crtc_w;
+	crtc_h = state->crtc_h;
+
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	/* set ratio */
+	exynos_state->h_ratio = (src_w << 16) / crtc_w;
+	exynos_state->v_ratio = (src_h << 16) / crtc_h;
+
+	/* clip to visible area */
 	actual_w = exynos_plane_get_size(crtc_x, crtc_w, mode->hdisplay);
 	actual_h = exynos_plane_get_size(crtc_y, crtc_h, mode->vdisplay);
 
 	if (crtc_x < 0) {
 		if (actual_w)
-			src_x -= crtc_x;
+			src_x += ((-crtc_x) * exynos_state->h_ratio) >> 16;
 		crtc_x = 0;
 	}
 
 	if (crtc_y < 0) {
 		if (actual_h)
-			src_y -= crtc_y;
+			src_y += ((-crtc_y) * exynos_state->v_ratio) >> 16;
 		crtc_y = 0;
 	}
 
-	/* set ratio */
-	exynos_plane->h_ratio = (src_w << 16) / crtc_w;
-	exynos_plane->v_ratio = (src_h << 16) / crtc_h;
-
 	/* set drm framebuffer data. */
-	exynos_plane->src_x = src_x;
-	exynos_plane->src_y = src_y;
-	exynos_plane->src_w = (actual_w * exynos_plane->h_ratio) >> 16;
-	exynos_plane->src_h = (actual_h * exynos_plane->v_ratio) >> 16;
+	exynos_state->src.x = src_x;
+	exynos_state->src.y = src_y;
+	exynos_state->src.w = (actual_w * exynos_state->h_ratio) >> 16;
+	exynos_state->src.h = (actual_h * exynos_state->v_ratio) >> 16;
 
 	/* set plane range to be displayed. */
-	exynos_plane->crtc_x = crtc_x;
-	exynos_plane->crtc_y = crtc_y;
-	exynos_plane->crtc_w = actual_w;
-	exynos_plane->crtc_h = actual_h;
+	exynos_state->crtc.x = crtc_x;
+	exynos_state->crtc.y = crtc_y;
+	exynos_state->crtc.w = actual_w;
+	exynos_state->crtc.h = actual_h;
 
 	DRM_DEBUG_KMS("plane : offset_x/y(%d,%d), width/height(%d,%d)",
-			exynos_plane->crtc_x, exynos_plane->crtc_y,
-			exynos_plane->crtc_w, exynos_plane->crtc_h);
+			exynos_state->crtc.x, exynos_state->crtc.y,
+			exynos_state->crtc.w, exynos_state->crtc.h);
+}
 
-	plane->crtc = crtc;
+static void exynos_drm_plane_reset(struct drm_plane *plane)
+{
+	struct exynos_drm_plane_state *exynos_state;
+
+	if (plane->state) {
+		exynos_state = to_exynos_plane_state(plane->state);
+		if (exynos_state->base.fb)
+			drm_framebuffer_unreference(exynos_state->base.fb);
+		kfree(exynos_state);
+		plane->state = NULL;
+	}
+
+	exynos_state = kzalloc(sizeof(*exynos_state), GFP_KERNEL);
+	if (exynos_state) {
+		plane->state = &exynos_state->base;
+		plane->state->plane = plane;
+	}
+}
+
+static struct drm_plane_state *
+exynos_drm_plane_duplicate_state(struct drm_plane *plane)
+{
+	struct exynos_drm_plane_state *exynos_state;
+	struct exynos_drm_plane_state *copy;
+
+	exynos_state = to_exynos_plane_state(plane->state);
+	copy = kzalloc(sizeof(*exynos_state), GFP_KERNEL);
+	if (!copy)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &copy->base);
+	return &copy->base;
+}
+
+static void exynos_drm_plane_destroy_state(struct drm_plane *plane,
+					   struct drm_plane_state *old_state)
+{
+	struct exynos_drm_plane_state *old_exynos_state =
+					to_exynos_plane_state(old_state);
+	__drm_atomic_helper_plane_destroy_state(plane, old_state);
+	kfree(old_exynos_state);
 }
 
 static struct drm_plane_funcs exynos_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
 	.destroy	= drm_plane_cleanup,
-	.reset = drm_atomic_helper_plane_reset,
-	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+	.reset		= exynos_drm_plane_reset,
+	.atomic_duplicate_state = exynos_drm_plane_duplicate_state,
+	.atomic_destroy_state = exynos_drm_plane_destroy_state,
 };
 
+static int
+exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config,
+			    struct exynos_drm_plane_state *state)
+{
+	bool width_ok = false, height_ok = false;
+
+	if (config->capabilities & EXYNOS_DRM_PLANE_CAP_SCALE)
+		return 0;
+
+	if (state->src.w == state->crtc.w)
+		width_ok = true;
+
+	if (state->src.h == state->crtc.h)
+		height_ok = true;
+
+	if ((config->capabilities & EXYNOS_DRM_PLANE_CAP_DOUBLE) &&
+	    state->h_ratio == (1 << 15))
+		width_ok = true;
+
+	if ((config->capabilities & EXYNOS_DRM_PLANE_CAP_DOUBLE) &&
+	    state->v_ratio == (1 << 15))
+		height_ok = true;
+
+	if (width_ok & height_ok)
+		return 0;
+
+	DRM_DEBUG_KMS("scaling mode is not supported");
+	return -ENOTSUPP;
+}
+
 static int exynos_plane_atomic_check(struct drm_plane *plane,
 				     struct drm_plane_state *state)
 {
 	struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane);
-	int nr;
-	int i;
+	struct exynos_drm_plane_state *exynos_state =
+						to_exynos_plane_state(state);
+	int ret = 0;
 
-	if (!state->fb)
+	if (!state->crtc || !state->fb)
 		return 0;
 
-	nr = drm_format_num_planes(state->fb->pixel_format);
-	for (i = 0; i < nr; i++) {
-		struct exynos_drm_gem *exynos_gem =
-					exynos_drm_fb_gem(state->fb, i);
-		if (!exynos_gem) {
-			DRM_DEBUG_KMS("gem object is null\n");
-			return -EFAULT;
-		}
+	/* translate state into exynos_state */
+	exynos_plane_mode_set(exynos_state);
 
-		exynos_plane->dma_addr[i] = exynos_gem->dma_addr +
-					    state->fb->offsets[i];
-
-		DRM_DEBUG_KMS("buffer: %d, dma_addr = 0x%lx\n",
-				i, (unsigned long)exynos_plane->dma_addr[i]);
-	}
-
-	return 0;
+	ret = exynos_drm_plane_check_size(exynos_plane->config, exynos_state);
+	return ret;
 }
 
 static void exynos_plane_atomic_update(struct drm_plane *plane,
@@ -155,12 +232,7 @@
 	if (!state->crtc)
 		return;
 
-	exynos_plane_mode_set(plane, state->crtc, state->fb,
-			      state->crtc_x, state->crtc_y,
-			      state->crtc_w, state->crtc_h,
-			      state->src_x >> 16, state->src_y >> 16,
-			      state->src_w >> 16, state->src_h >> 16);
-
+	plane->crtc = state->crtc;
 	exynos_plane->pending_fb = state->fb;
 
 	if (exynos_crtc->ops->update_plane)
@@ -177,8 +249,7 @@
 		return;
 
 	if (exynos_crtc->ops->disable_plane)
-		exynos_crtc->ops->disable_plane(exynos_crtc,
-						exynos_plane);
+		exynos_crtc->ops->disable_plane(exynos_crtc, exynos_plane);
 }
 
 static const struct drm_plane_helper_funcs plane_helper_funcs = {
@@ -207,28 +278,19 @@
 	drm_object_attach_property(&plane->base, prop, zpos);
 }
 
-enum drm_plane_type exynos_plane_get_type(unsigned int zpos,
-					  unsigned int cursor_win)
-{
-		if (zpos == DEFAULT_WIN)
-			return DRM_PLANE_TYPE_PRIMARY;
-		else if (zpos == cursor_win)
-			return DRM_PLANE_TYPE_CURSOR;
-		else
-			return DRM_PLANE_TYPE_OVERLAY;
-}
-
 int exynos_plane_init(struct drm_device *dev,
 		      struct exynos_drm_plane *exynos_plane,
-		      unsigned long possible_crtcs, enum drm_plane_type type,
-		      const uint32_t *formats, unsigned int fcount,
-		      unsigned int zpos)
+		      unsigned long possible_crtcs,
+		      const struct exynos_drm_plane_config *config)
 {
 	int err;
 
-	err = drm_universal_plane_init(dev, &exynos_plane->base, possible_crtcs,
-				       &exynos_plane_funcs, formats, fcount,
-				       type);
+	err = drm_universal_plane_init(dev, &exynos_plane->base,
+				       possible_crtcs,
+				       &exynos_plane_funcs,
+				       config->pixel_formats,
+				       config->num_pixel_formats,
+				       config->type, NULL);
 	if (err) {
 		DRM_ERROR("failed to initialize plane\n");
 		return err;
@@ -236,10 +298,12 @@
 
 	drm_plane_helper_add(&exynos_plane->base, &plane_helper_funcs);
 
-	exynos_plane->zpos = zpos;
+	exynos_plane->zpos = config->zpos;
+	exynos_plane->config = config;
 
-	if (type == DRM_PLANE_TYPE_OVERLAY)
-		exynos_plane_attach_zpos_property(&exynos_plane->base, zpos);
+	if (config->type == DRM_PLANE_TYPE_OVERLAY)
+		exynos_plane_attach_zpos_property(&exynos_plane->base,
+						  config->zpos);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.h b/drivers/gpu/drm/exynos/exynos_drm_plane.h
index abb641e..0dd0965 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.h
@@ -9,10 +9,7 @@
  *
  */
 
-enum drm_plane_type exynos_plane_get_type(unsigned int zpos,
-					  unsigned int cursor_win);
 int exynos_plane_init(struct drm_device *dev,
 		      struct exynos_drm_plane *exynos_plane,
-		      unsigned long possible_crtcs, enum drm_plane_type type,
-		      const uint32_t *formats, unsigned int fcount,
-		      unsigned int zpos);
+		      unsigned long possible_crtcs,
+		      const struct exynos_drm_plane_config *config);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 2f5c118..bea0f78 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -790,10 +790,10 @@
 static int rotator_clk_crtl(struct rot_context *rot, bool enable)
 {
 	if (enable) {
-		clk_enable(rot->clock);
+		clk_prepare_enable(rot->clock);
 		rot->suspended = false;
 	} else {
-		clk_disable(rot->clock);
+		clk_disable_unprepare(rot->clock);
 		rot->suspended = true;
 	}
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 669362c..319aa31 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -24,12 +24,12 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_vidi.h"
 
 /* vidi has totally three virtual windows. */
 #define WINDOWS_NR		3
-#define CURSOR_WIN		2
 
 #define ctx_from_connector(c)	container_of(c, struct vidi_context, \
 					connector)
@@ -89,6 +89,12 @@
 	DRM_FORMAT_NV12,
 };
 
+static const enum drm_plane_type vidi_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static int vidi_enable_vblank(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
@@ -125,12 +131,15 @@
 static void vidi_update_plane(struct exynos_drm_crtc *crtc,
 			      struct exynos_drm_plane *plane)
 {
+	struct drm_plane_state *state = plane->base.state;
 	struct vidi_context *ctx = crtc->ctx;
+	dma_addr_t addr;
 
 	if (ctx->suspended)
 		return;
 
-	DRM_DEBUG_KMS("dma_addr = %pad\n", plane->dma_addr);
+	addr = exynos_drm_fb_dma_addr(state->fb, 0);
+	DRM_DEBUG_KMS("dma_addr = %pad\n", &addr);
 
 	if (ctx->vblank_on)
 		schedule_work(&ctx->work);
@@ -439,17 +448,21 @@
 	struct drm_device *drm_dev = data;
 	struct drm_encoder *encoder = &ctx->encoder;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	struct exynos_drm_plane_config plane_config = { 0 };
+	unsigned int i;
 	int pipe, ret;
 
 	vidi_ctx_initialize(ctx, drm_dev);
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, formats,
-					ARRAY_SIZE(formats), zpos);
+	plane_config.pixel_formats = formats;
+	plane_config.num_pixel_formats = ARRAY_SIZE(formats);
+
+	for (i = 0; i < WINDOWS_NR; i++) {
+		plane_config.zpos = i;
+		plane_config.type = vidi_win_types[i];
+
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &plane_config);
 		if (ret)
 			return ret;
 	}
@@ -473,7 +486,7 @@
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_vidi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_vidi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 57b6755..7d5ca6c 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -113,7 +113,7 @@
 	void __iomem			*regs_hdmiphy;
 	struct i2c_client		*hdmiphy_port;
 	struct i2c_adapter		*ddc_adpt;
-	struct gpio_desc 		*hpd_gpio;
+	struct gpio_desc		*hpd_gpio;
 	int				irq;
 	struct regmap			*pmureg;
 	struct clk			*hdmi;
@@ -1588,8 +1588,6 @@
 	if (hdata->powered)
 		return;
 
-	hdata->powered = true;
-
 	pm_runtime_get_sync(hdata->dev);
 
 	if (regulator_bulk_enable(ARRAY_SIZE(supply), hdata->regul_bulk))
@@ -1599,10 +1597,9 @@
 	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
 			PMU_HDMI_PHY_ENABLE_BIT, 1);
 
-	clk_prepare_enable(hdata->hdmi);
-	clk_prepare_enable(hdata->sclk_hdmi);
-
 	hdmi_conf_apply(hdata);
+
+	hdata->powered = true;
 }
 
 static void hdmi_disable(struct drm_encoder *encoder)
@@ -1633,9 +1630,6 @@
 
 	cancel_delayed_work(&hdata->hotplug_work);
 
-	clk_disable_unprepare(hdata->sclk_hdmi);
-	clk_disable_unprepare(hdata->hdmi);
-
 	/* reset pmu hdmiphy control bit to disable hdmiphy */
 	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
 			PMU_HDMI_PHY_ENABLE_BIT, 0);
@@ -1793,7 +1787,7 @@
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_hdmi_encoder_helper_funcs);
 
@@ -1978,12 +1972,49 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_hdmi_suspend(struct device *dev)
+{
+	struct hdmi_context *hdata = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(hdata->sclk_hdmi);
+	clk_disable_unprepare(hdata->hdmi);
+
+	return 0;
+}
+
+static int exynos_hdmi_resume(struct device *dev)
+{
+	struct hdmi_context *hdata = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(hdata->hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
+		return ret;
+	}
+	ret = clk_prepare_enable(hdata->sclk_hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the sclk_mixer clk [%d]\n",
+			  ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_hdmi_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_hdmi_suspend, exynos_hdmi_resume, NULL)
+};
+
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
 	.remove		= hdmi_remove,
 	.driver		= {
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_hdmi_pm_ops,
 		.of_match_table = hdmi_match_types,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index d09f8f9..dfb35e2 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -37,12 +37,12 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
 
 #define MIXER_WIN_NR		3
 #define VP_DEFAULT_WIN		2
-#define CURSOR_WIN		1
 
 /* The pixelformats that are natively supported by the mixer. */
 #define MXR_FORMAT_RGB565	4
@@ -111,6 +111,28 @@
 	bool					has_sclk;
 };
 
+static const struct exynos_drm_plane_config plane_configs[MIXER_WIN_NR] = {
+	{
+		.zpos = 0,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.pixel_formats = mixer_formats,
+		.num_pixel_formats = ARRAY_SIZE(mixer_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_DOUBLE,
+	}, {
+		.zpos = 1,
+		.type = DRM_PLANE_TYPE_CURSOR,
+		.pixel_formats = mixer_formats,
+		.num_pixel_formats = ARRAY_SIZE(mixer_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_DOUBLE,
+	}, {
+		.zpos = 2,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.pixel_formats = vp_formats,
+		.num_pixel_formats = ARRAY_SIZE(vp_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_SCALE,
+	},
+};
+
 static const u8 filter_y_horiz_tap8[] = {
 	0,	-1,	-1,	-1,	-1,	-1,	-1,	-1,
 	-1,	-1,	-1,	-1,	-1,	0,	0,	0,
@@ -399,10 +421,11 @@
 static void vp_video_buffer(struct mixer_context *ctx,
 			    struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
+	struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode;
 	struct mixer_resources *res = &ctx->mixer_res;
-	struct drm_plane_state *state = plane->base.state;
-	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &state->crtc->mode;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned long flags;
 	dma_addr_t luma_addr[2], chroma_addr[2];
 	bool tiled_mode = false;
@@ -422,8 +445,8 @@
 		return;
 	}
 
-	luma_addr[0] = plane->dma_addr[0];
-	chroma_addr[0] = plane->dma_addr[1];
+	luma_addr[0] = exynos_drm_fb_dma_addr(fb, 0);
+	chroma_addr[0] = exynos_drm_fb_dma_addr(fb, 1);
 
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
 		ctx->interlace = true;
@@ -459,24 +482,24 @@
 	vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
 		VP_IMG_VSIZE(fb->height / 2));
 
-	vp_reg_write(res, VP_SRC_WIDTH, plane->src_w);
-	vp_reg_write(res, VP_SRC_HEIGHT, plane->src_h);
+	vp_reg_write(res, VP_SRC_WIDTH, state->src.w);
+	vp_reg_write(res, VP_SRC_HEIGHT, state->src.h);
 	vp_reg_write(res, VP_SRC_H_POSITION,
-			VP_SRC_H_POSITION_VAL(plane->src_x));
-	vp_reg_write(res, VP_SRC_V_POSITION, plane->src_y);
+			VP_SRC_H_POSITION_VAL(state->src.x));
+	vp_reg_write(res, VP_SRC_V_POSITION, state->src.y);
 
-	vp_reg_write(res, VP_DST_WIDTH, plane->crtc_w);
-	vp_reg_write(res, VP_DST_H_POSITION, plane->crtc_x);
+	vp_reg_write(res, VP_DST_WIDTH, state->crtc.w);
+	vp_reg_write(res, VP_DST_H_POSITION, state->crtc.x);
 	if (ctx->interlace) {
-		vp_reg_write(res, VP_DST_HEIGHT, plane->crtc_h / 2);
-		vp_reg_write(res, VP_DST_V_POSITION, plane->crtc_y / 2);
+		vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h / 2);
+		vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y / 2);
 	} else {
-		vp_reg_write(res, VP_DST_HEIGHT, plane->crtc_h);
-		vp_reg_write(res, VP_DST_V_POSITION, plane->crtc_y);
+		vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h);
+		vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y);
 	}
 
-	vp_reg_write(res, VP_H_RATIO, plane->h_ratio);
-	vp_reg_write(res, VP_V_RATIO, plane->v_ratio);
+	vp_reg_write(res, VP_H_RATIO, state->h_ratio);
+	vp_reg_write(res, VP_V_RATIO, state->v_ratio);
 
 	vp_reg_write(res, VP_ENDIAN_MODE, VP_ENDIAN_MODE_LITTLE);
 
@@ -505,37 +528,14 @@
 	mixer_reg_writemask(res, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE);
 }
 
-static int mixer_setup_scale(const struct exynos_drm_plane *plane,
-		unsigned int *x_ratio, unsigned int *y_ratio)
-{
-	if (plane->crtc_w != plane->src_w) {
-		if (plane->crtc_w == 2 * plane->src_w)
-			*x_ratio = 1;
-		else
-			goto fail;
-	}
-
-	if (plane->crtc_h != plane->src_h) {
-		if (plane->crtc_h == 2 * plane->src_h)
-			*y_ratio = 1;
-		else
-			goto fail;
-	}
-
-	return 0;
-
-fail:
-	DRM_DEBUG_KMS("only 2x width/height scaling of plane supported\n");
-	return -ENOTSUPP;
-}
-
 static void mixer_graph_buffer(struct mixer_context *ctx,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
+	struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode;
 	struct mixer_resources *res = &ctx->mixer_res;
-	struct drm_plane_state *state = plane->base.state;
-	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &state->crtc->mode;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned long flags;
 	unsigned int win = plane->zpos;
 	unsigned int x_ratio = 0, y_ratio = 0;
@@ -567,17 +567,17 @@
 		return;
 	}
 
-	/* check if mixer supports requested scaling setup */
-	if (mixer_setup_scale(plane, &x_ratio, &y_ratio))
-		return;
+	/* ratio is already checked by common plane code */
+	x_ratio = state->h_ratio == (1 << 15);
+	y_ratio = state->v_ratio == (1 << 15);
 
-	dst_x_offset = plane->crtc_x;
-	dst_y_offset = plane->crtc_y;
+	dst_x_offset = state->crtc.x;
+	dst_y_offset = state->crtc.y;
 
 	/* converting dma address base and source offset */
-	dma_addr = plane->dma_addr[0]
-		+ (plane->src_x * fb->bits_per_pixel >> 3)
-		+ (plane->src_y * fb->pitches[0]);
+	dma_addr = exynos_drm_fb_dma_addr(fb, 0)
+		+ (state->src.x * fb->bits_per_pixel >> 3)
+		+ (state->src.y * fb->pitches[0]);
 	src_x_offset = 0;
 	src_y_offset = 0;
 
@@ -605,8 +605,8 @@
 		mixer_reg_write(res, MXR_RESOLUTION, val);
 	}
 
-	val  = MXR_GRP_WH_WIDTH(plane->src_w);
-	val |= MXR_GRP_WH_HEIGHT(plane->src_h);
+	val  = MXR_GRP_WH_WIDTH(state->src.w);
+	val |= MXR_GRP_WH_HEIGHT(state->src.h);
 	val |= MXR_GRP_WH_H_SCALE(x_ratio);
 	val |= MXR_GRP_WH_V_SCALE(y_ratio);
 	mixer_reg_write(res, MXR_GRAPHIC_WH(win), val);
@@ -1020,43 +1020,12 @@
 {
 	struct mixer_context *ctx = crtc->ctx;
 	struct mixer_resources *res = &ctx->mixer_res;
-	int ret;
 
 	if (test_bit(MXR_BIT_POWERED, &ctx->flags))
 		return;
 
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(res->mixer);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the mixer clk [%d]\n", ret);
-		return;
-	}
-	ret = clk_prepare_enable(res->hdmi);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
-		return;
-	}
-	if (ctx->vp_enabled) {
-		ret = clk_prepare_enable(res->vp);
-		if (ret < 0) {
-			DRM_ERROR("Failed to prepare_enable the vp clk [%d]\n",
-				  ret);
-			return;
-		}
-		if (ctx->has_sclk) {
-			ret = clk_prepare_enable(res->sclk_mixer);
-			if (ret < 0) {
-				DRM_ERROR("Failed to prepare_enable the " \
-					   "sclk_mixer clk [%d]\n",
-					  ret);
-				return;
-			}
-		}
-	}
-
-	set_bit(MXR_BIT_POWERED, &ctx->flags);
-
 	mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET);
 
 	if (test_bit(MXR_BIT_VSYNC, &ctx->flags)) {
@@ -1064,12 +1033,13 @@
 		mixer_reg_writemask(res, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC);
 	}
 	mixer_win_reset(ctx);
+
+	set_bit(MXR_BIT_POWERED, &ctx->flags);
 }
 
 static void mixer_disable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
-	struct mixer_resources *res = &ctx->mixer_res;
 	int i;
 
 	if (!test_bit(MXR_BIT_POWERED, &ctx->flags))
@@ -1081,17 +1051,9 @@
 	for (i = 0; i < MIXER_WIN_NR; i++)
 		mixer_disable_plane(crtc, &ctx->planes[i]);
 
+	pm_runtime_put(ctx->dev);
+
 	clear_bit(MXR_BIT_POWERED, &ctx->flags);
-
-	clk_disable_unprepare(res->hdmi);
-	clk_disable_unprepare(res->mixer);
-	if (ctx->vp_enabled) {
-		clk_disable_unprepare(res->vp);
-		if (ctx->has_sclk)
-			clk_disable_unprepare(res->sclk_mixer);
-	}
-
-	pm_runtime_put_sync(ctx->dev);
 }
 
 /* Only valid for Mixer version 16.0.33.0 */
@@ -1187,30 +1149,19 @@
 	struct mixer_context *ctx = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_plane *exynos_plane;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ret = mixer_initialize(ctx, drm_dev);
 	if (ret)
 		return ret;
 
-	for (zpos = 0; zpos < MIXER_WIN_NR; zpos++) {
-		enum drm_plane_type type;
-		const uint32_t *formats;
-		unsigned int fcount;
+	for (i = 0; i < MIXER_WIN_NR; i++) {
+		if (i == VP_DEFAULT_WIN && !ctx->vp_enabled)
+			continue;
 
-		if (zpos < VP_DEFAULT_WIN) {
-			formats = mixer_formats;
-			fcount = ARRAY_SIZE(mixer_formats);
-		} else {
-			formats = vp_formats;
-			fcount = ARRAY_SIZE(vp_formats);
-		}
-
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, formats, fcount,
-					zpos);
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &plane_configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -1293,10 +1244,70 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int exynos_mixer_suspend(struct device *dev)
+{
+	struct mixer_context *ctx = dev_get_drvdata(dev);
+	struct mixer_resources *res = &ctx->mixer_res;
+
+	clk_disable_unprepare(res->hdmi);
+	clk_disable_unprepare(res->mixer);
+	if (ctx->vp_enabled) {
+		clk_disable_unprepare(res->vp);
+		if (ctx->has_sclk)
+			clk_disable_unprepare(res->sclk_mixer);
+	}
+
+	return 0;
+}
+
+static int exynos_mixer_resume(struct device *dev)
+{
+	struct mixer_context *ctx = dev_get_drvdata(dev);
+	struct mixer_resources *res = &ctx->mixer_res;
+	int ret;
+
+	ret = clk_prepare_enable(res->mixer);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the mixer clk [%d]\n", ret);
+		return ret;
+	}
+	ret = clk_prepare_enable(res->hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
+		return ret;
+	}
+	if (ctx->vp_enabled) {
+		ret = clk_prepare_enable(res->vp);
+		if (ret < 0) {
+			DRM_ERROR("Failed to prepare_enable the vp clk [%d]\n",
+				  ret);
+			return ret;
+		}
+		if (ctx->has_sclk) {
+			ret = clk_prepare_enable(res->sclk_mixer);
+			if (ret < 0) {
+				DRM_ERROR("Failed to prepare_enable the " \
+					   "sclk_mixer clk [%d]\n",
+					  ret);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_mixer_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
+};
+
 struct platform_driver mixer_driver = {
 	.driver = {
 		.name = "exynos-mixer",
 		.owner = THIS_MODULE,
+		.pm = &exynos_mixer_pm_ops,
 		.of_match_table = mixer_match_types,
 	},
 	.probe = mixer_probe,
diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h
index 9ad5927..4704a99 100644
--- a/drivers/gpu/drm/exynos/regs-gsc.h
+++ b/drivers/gpu/drm/exynos/regs-gsc.h
@@ -273,12 +273,12 @@
 #define GSC_CLK_GATE_MODE_SNOOP_CNT(x)	((x) << 0)
 
 /* SYSCON. GSCBLK_CFG */
-#define SYSREG_GSCBLK_CFG1		(S3C_VA_SYS + 0x0224)
+#define SYSREG_GSCBLK_CFG1		0x0224
 #define GSC_BLK_DISP1WB_DEST(x)		(x << 10)
 #define GSC_BLK_SW_RESET_WB_DEST(x)	(1 << (18 + x))
 #define GSC_BLK_PXLASYNC_LO_MASK_WB(x)	(0 << (14 + x))
 #define GSC_BLK_GSCL_WB_IN_SRC_SEL(x)	(1 << (2 * x))
-#define SYSREG_GSCBLK_CFG2		(S3C_VA_SYS + 0x2000)
+#define SYSREG_GSCBLK_CFG2		0x2000
 #define PXLASYNC_LO_MASK_CAMIF_GSCL(x)	(1 << (x))
 
 #endif /* EXYNOS_REGS_GSC_H_ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index 82a3d31..d8ab8f0 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -175,7 +175,7 @@
 
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
 	ret = drm_crtc_init_with_planes(fsl_dev->drm, crtc, primary, NULL,
-					&fsl_dcu_drm_crtc_funcs);
+					&fsl_dcu_drm_crtc_funcs, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 51daaea..4b13cf9 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -249,7 +249,7 @@
 				       &fsl_dcu_drm_plane_funcs,
 				       fsl_dcu_drm_plane_formats,
 				       ARRAY_SIZE(fsl_dcu_drm_plane_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		primary = NULL;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index fe8ab5d..8780deb 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -57,7 +57,7 @@
 
 	encoder->possible_crtcs = 1;
 	ret = drm_encoder_init(fsl_dev->drm, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 3531f90..8745971 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -619,6 +619,8 @@
 	.init_pm = cdv_init_pm,
 	.save_regs = cdv_save_display_registers,
 	.restore_regs = cdv_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = cdv_power_down,
 	.power_up = cdv_power_up,
 	.update_wm = cdv_update_wm,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 248c33a..d0717a8 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -273,7 +273,7 @@
 
 	encoder = &gma_encoder->base;
 	drm_encoder_init(dev, encoder,
-		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC);
+		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index 7d47b3d..6126546 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -983,8 +983,6 @@
 };
 
 const struct drm_crtc_funcs cdv_intel_crtc_funcs = {
-	.save = gma_crtc_save,
-	.restore = gma_crtc_restore,
 	.cursor_set = gma_crtc_cursor_set,
 	.cursor_move = gma_crtc_cursor_move,
 	.gamma_set = gma_crtc_gamma_set,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 17cea40..7bb1f1a 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -2020,7 +2020,8 @@
 	encoder = &gma_encoder->base;
 
 	drm_connector_init(dev, connector, &cdv_intel_dp_connector_funcs, type);
-	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs,
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 6b1d334..ddf2d77 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -270,8 +270,6 @@
 
 static const struct drm_connector_funcs cdv_hdmi_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = cdv_hdmi_save,
-	.restore = cdv_hdmi_restore,
 	.detect = cdv_hdmi_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = cdv_hdmi_set_property,
@@ -306,13 +304,16 @@
 
 	connector = &gma_connector->base;
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
+	gma_connector->save = cdv_hdmi_save;
+	gma_connector->restore = cdv_hdmi_restore;
+
 	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &cdv_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
 
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_HDMI;
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index 211069b..813ef23 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -530,8 +530,6 @@
 
 static const struct drm_connector_funcs cdv_intel_lvds_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = cdv_intel_lvds_save,
-	.restore = cdv_intel_lvds_restore,
 	.detect = cdv_intel_lvds_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = cdv_intel_lvds_set_property,
@@ -643,6 +641,8 @@
 	gma_encoder->dev_priv = lvds_priv;
 
 	connector = &gma_connector->base;
+	gma_connector->save = cdv_intel_lvds_save;
+	gma_connector->restore = cdv_intel_lvds_restore;
 	encoder = &gma_encoder->base;
 
 
@@ -652,7 +652,7 @@
 
 	drm_encoder_init(dev, encoder,
 			 &cdv_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 2eaf1b3..ee95c03 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -241,7 +241,7 @@
  */
 static int psb_framebuffer_init(struct drm_device *dev,
 					struct psb_framebuffer *fb,
-					struct drm_mode_fb_cmd2 *mode_cmd,
+					const struct drm_mode_fb_cmd2 *mode_cmd,
 					struct gtt_range *gt)
 {
 	u32 bpp, depth;
@@ -284,7 +284,7 @@
 
 static struct drm_framebuffer *psb_framebuffer_create
 			(struct drm_device *dev,
-			 struct drm_mode_fb_cmd2 *mode_cmd,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
 			 struct gtt_range *gt)
 {
 	struct psb_framebuffer *fb;
@@ -406,8 +406,6 @@
 
 	memset(dev_priv->vram_addr + backing->offset, 0, size);
 
-	mutex_lock(&dev->struct_mutex);
-
 	info = drm_fb_helper_alloc_fbi(&fbdev->psb_fb_helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
@@ -463,17 +461,15 @@
 	dev_dbg(dev->dev, "allocated %dx%d fb\n",
 					psbfb->base.width, psbfb->base.height);
 
-	mutex_unlock(&dev->struct_mutex);
 	return 0;
 out_unref:
 	if (backing->stolen)
 		psb_gtt_free_range(dev, backing);
 	else
-		drm_gem_object_unreference(&backing->gem);
+		drm_gem_object_unreference_unlocked(&backing->gem);
 
 	drm_fb_helper_release_fbi(&fbdev->psb_fb_helper);
 out_err1:
-	mutex_unlock(&dev->struct_mutex);
 	psb_gtt_free_range(dev, backing);
 	return ret;
 }
@@ -488,7 +484,7 @@
  */
 static struct drm_framebuffer *psb_user_framebuffer_create
 			(struct drm_device *dev, struct drm_file *filp,
-			 struct drm_mode_fb_cmd2 *cmd)
+			 const struct drm_mode_fb_cmd2 *cmd)
 {
 	struct gtt_range *r;
 	struct drm_gem_object *obj;
@@ -569,7 +565,7 @@
 	drm_framebuffer_cleanup(&psbfb->base);
 
 	if (psbfb->gtt)
-		drm_gem_object_unreference(&psbfb->gtt->gem);
+		drm_gem_object_unreference_unlocked(&psbfb->gtt->gem);
 	return 0;
 }
 
@@ -784,12 +780,8 @@
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	if (dev_priv->modeset) {
-		mutex_lock(&dev->struct_mutex);
-
 		drm_kms_helper_poll_fini(dev);
 		psb_fbdev_fini(dev);
 		drm_mode_config_cleanup(dev);
-
-		mutex_unlock(&dev->struct_mutex);
 	}
 }
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index c707fa6..506224b 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -62,15 +62,10 @@
 	int ret = 0;
 	struct drm_gem_object *obj;
 
-	mutex_lock(&dev->struct_mutex);
-
 	/* GEM does all our handle to object mapping */
 	obj = drm_gem_object_lookup(dev, file, handle);
-	if (obj == NULL) {
-		ret = -ENOENT;
-		goto unlock;
-	}
-	/* What validation is needed here ? */
+	if (obj == NULL)
+		return -ENOENT;
 
 	/* Make it mmapable */
 	ret = drm_gem_create_mmap_offset(obj);
@@ -78,9 +73,7 @@
 		goto out;
 	*offset = drm_vma_node_offset_addr(&obj->vma_node);
 out:
-	drm_gem_object_unreference(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
 
@@ -130,7 +123,7 @@
 		return ret;
 	}
 	/* We have the initial and handle reference but need only one now */
-	drm_gem_object_unreference(&r->gem);
+	drm_gem_object_unreference_unlocked(&r->gem);
 	*handlep = handle;
 	return 0;
 }
@@ -189,7 +182,7 @@
 
 	/* Make sure we don't parallel update on a fault, nor move or remove
 	   something from beneath our feet */
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&dev_priv->mmap_mutex);
 
 	/* For now the mmap pins the object and it stays pinned. As things
 	   stand that will do us no harm */
@@ -215,7 +208,7 @@
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
 
 fail:
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->mmap_mutex);
 	switch (ret) {
 	case 0:
 	case -ERESTARTSYS:
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index 001b450..ff17af4 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -349,8 +349,6 @@
 	/* If we didn't get a handle then turn the cursor off */
 	if (!handle) {
 		temp = CURSOR_MODE_DISABLE;
-		mutex_lock(&dev->struct_mutex);
-
 		if (gma_power_begin(dev, false)) {
 			REG_WRITE(control, temp);
 			REG_WRITE(base, 0);
@@ -362,11 +360,9 @@
 			gt = container_of(gma_crtc->cursor_obj,
 					  struct gtt_range, gem);
 			psb_gtt_unpin(gt);
-			drm_gem_object_unreference(gma_crtc->cursor_obj);
+			drm_gem_object_unreference_unlocked(gma_crtc->cursor_obj);
 			gma_crtc->cursor_obj = NULL;
 		}
-
-		mutex_unlock(&dev->struct_mutex);
 		return 0;
 	}
 
@@ -376,7 +372,6 @@
 		return -EINVAL;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	obj = drm_gem_object_lookup(dev, file_priv, handle);
 	if (!obj) {
 		ret = -ENOENT;
@@ -441,17 +436,15 @@
 	if (gma_crtc->cursor_obj) {
 		gt = container_of(gma_crtc->cursor_obj, struct gtt_range, gem);
 		psb_gtt_unpin(gt);
-		drm_gem_object_unreference(gma_crtc->cursor_obj);
+		drm_gem_object_unreference_unlocked(gma_crtc->cursor_obj);
 	}
 
 	gma_crtc->cursor_obj = obj;
 unlock:
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 
 unref_cursor:
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index ce015db..8f69225 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -425,6 +425,7 @@
 
 	if (!resume) {
 		mutex_init(&dev_priv->gtt_mutex);
+		mutex_init(&dev_priv->mmap_mutex);
 		psb_gtt_alloc(dev);
 	}
 
diff --git a/drivers/gpu/drm/gma500/mdfld_device.c b/drivers/gpu/drm/gma500/mdfld_device.c
index 265ad0d..e2ab858 100644
--- a/drivers/gpu/drm/gma500/mdfld_device.c
+++ b/drivers/gpu/drm/gma500/mdfld_device.c
@@ -546,6 +546,8 @@
 
 	.save_regs = mdfld_save_registers,
 	.restore_regs = mdfld_restore_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = mdfld_power_down,
 	.power_up = mdfld_power_up,
 };
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
index d4813e0..1a1acd3 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
@@ -994,7 +994,7 @@
 	drm_encoder_init(dev,
 			encoder,
 			p_funcs->encoder_funcs,
-			DRM_MODE_ENCODER_LVDS);
+			DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder,
 				p_funcs->encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 89f705c..d758f4c 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -405,8 +405,6 @@
 /*DSI connector funcs*/
 static const struct drm_connector_funcs mdfld_dsi_connector_funcs = {
 	.dpms = /*drm_helper_connector_dpms*/mdfld_dsi_connector_dpms,
-	.save = mdfld_dsi_connector_save,
-	.restore = mdfld_dsi_connector_restore,
 	.detect = mdfld_dsi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = mdfld_dsi_connector_set_property,
@@ -563,6 +561,9 @@
 
 
 	connector = &dsi_connector->base.base;
+	dsi_connector->base.save = mdfld_dsi_connector_save;
+	dsi_connector->base.restore = mdfld_dsi_connector_restore;
+
 	drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs,
 						DRM_MODE_CONNECTOR_LVDS);
 	drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs);
diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
index 368a03a..ba30b43 100644
--- a/drivers/gpu/drm/gma500/oaktrail_device.c
+++ b/drivers/gpu/drm/gma500/oaktrail_device.c
@@ -568,6 +568,8 @@
 
 	.save_regs = oaktrail_save_display_registers,
 	.restore_regs = oaktrail_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = oaktrail_power_down,
 	.power_up = oaktrail_power_up,
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 2310d87..2d18499 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -654,7 +654,7 @@
 
 	drm_encoder_init(dev, encoder,
 			 &oaktrail_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 83bbc27..f7038f1 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -323,7 +323,7 @@
 			   DRM_MODE_CONNECTOR_LVDS);
 
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index 07df7d4..dc0f852 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -181,7 +181,7 @@
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct drm_connector *connector;
+	struct gma_connector *connector;
 	struct psb_state *regs = &dev_priv->regs.psb;
 
 	/* Display arbitration control + watermarks */
@@ -198,12 +198,12 @@
 	drm_modeset_lock_all(dev);
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (drm_helper_crtc_in_use(crtc))
-			crtc->funcs->save(crtc);
+			dev_priv->ops->save_crtc(crtc);
 	}
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		if (connector->funcs->save)
-			connector->funcs->save(connector);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, base.head)
+		if (connector->save)
+			connector->save(&connector->base);
 
 	drm_modeset_unlock_all(dev);
 	return 0;
@@ -219,7 +219,7 @@
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct drm_connector *connector;
+	struct gma_connector *connector;
 	struct psb_state *regs = &dev_priv->regs.psb;
 
 	/* Display arbitration + watermarks */
@@ -238,11 +238,11 @@
 	drm_modeset_lock_all(dev);
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		if (drm_helper_crtc_in_use(crtc))
-			crtc->funcs->restore(crtc);
+			dev_priv->ops->restore_crtc(crtc);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		if (connector->funcs->restore)
-			connector->funcs->restore(connector);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, base.head)
+		if (connector->restore)
+			connector->restore(&connector->base);
 
 	drm_modeset_unlock_all(dev);
 	return 0;
@@ -354,6 +354,8 @@
 	.init_pm = psb_init_pm,
 	.save_regs = psb_save_display_registers,
 	.restore_regs = psb_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = psb_power_down,
 	.power_up = psb_power_up,
 };
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index e21726e..b743727 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -465,6 +465,8 @@
 	struct mutex gtt_mutex;
 	struct resource *gtt_mem;	/* Our PCI resource */
 
+	struct mutex mmap_mutex;
+
 	struct psb_mmu_driver *mmu;
 	struct psb_mmu_pd *pf_pd;
 
@@ -651,6 +653,8 @@
 	void (*init_pm)(struct drm_device *dev);
 	int (*save_regs)(struct drm_device *dev);
 	int (*restore_regs)(struct drm_device *dev);
+	void (*save_crtc)(struct drm_crtc *crtc);
+	void (*restore_crtc)(struct drm_crtc *crtc);
 	int (*power_up)(struct drm_device *dev);
 	int (*power_down)(struct drm_device *dev);
 	void (*update_wm)(struct drm_device *dev, struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 6659da8..dcdbc37 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -439,8 +439,6 @@
 };
 
 const struct drm_crtc_funcs psb_intel_crtc_funcs = {
-	.save = gma_crtc_save,
-	.restore = gma_crtc_restore,
 	.cursor_set = gma_crtc_cursor_set,
 	.cursor_move = gma_crtc_cursor_move,
 	.gamma_set = gma_crtc_gamma_set,
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 860dd21..2a3b7c6 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -140,6 +140,9 @@
 struct gma_connector {
 	struct drm_connector base;
 	struct gma_encoder *encoder;
+
+	void (*save)(struct drm_connector *connector);
+	void (*restore)(struct drm_connector *connector);
 };
 
 struct psb_intel_crtc_state {
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index ce0645d..b1b9331 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -653,8 +653,6 @@
 
 const struct drm_connector_funcs psb_intel_lvds_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = psb_intel_lvds_save,
-	.restore = psb_intel_lvds_restore,
 	.detect = psb_intel_lvds_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = psb_intel_lvds_set_property,
@@ -715,6 +713,9 @@
 	gma_encoder->dev_priv = lvds_priv;
 
 	connector = &gma_connector->base;
+	gma_connector->save = psb_intel_lvds_save;
+	gma_connector->restore = psb_intel_lvds_restore;
+
 	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &psb_intel_lvds_connector_funcs,
@@ -722,7 +723,7 @@
 
 	drm_encoder_init(dev, encoder,
 			 &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 58529ce..e787d37 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -1837,8 +1837,6 @@
 
 static const struct drm_connector_funcs psb_intel_sdvo_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = psb_intel_sdvo_save,
-	.restore = psb_intel_sdvo_restore,
 	.detect = psb_intel_sdvo_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = psb_intel_sdvo_set_property,
@@ -2021,6 +2019,9 @@
 	connector->base.base.doublescan_allowed = 0;
 	connector->base.base.display_info.subpixel_order = SubPixelHorizontalRGB;
 
+	connector->base.save = psb_intel_sdvo_save;
+	connector->base.restore = psb_intel_sdvo_restore;
+
 	gma_connector_attach_encoder(&connector->base, &encoder->base);
 	drm_connector_register(&connector->base.base);
 }
@@ -2525,7 +2526,8 @@
 	/* encoder type will be decided later */
 	gma_encoder = &psb_intel_sdvo->base;
 	gma_encoder->type = INTEL_OUTPUT_SDVO;
-	drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
+	drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs,
+			 0, NULL);
 
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 896b6aa..a46248f 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -855,18 +855,6 @@
 	priv->dpms = mode;
 }
 
-static void
-tda998x_encoder_save(struct drm_encoder *encoder)
-{
-	DBG("");
-}
-
-static void
-tda998x_encoder_restore(struct drm_encoder *encoder)
-{
-	DBG("");
-}
-
 static bool
 tda998x_encoder_mode_fixup(struct drm_encoder *encoder,
 			  const struct drm_display_mode *mode,
@@ -1351,8 +1339,6 @@
 
 static const struct drm_encoder_helper_funcs tda998x_encoder_helper_funcs = {
 	.dpms = tda998x_encoder_dpms,
-	.save = tda998x_encoder_save,
-	.restore = tda998x_encoder_restore,
 	.mode_fixup = tda998x_encoder_mode_fixup,
 	.prepare = tda998x_encoder_prepare,
 	.commit = tda998x_encoder_commit,
@@ -1437,7 +1423,7 @@
 
 	drm_encoder_helper_add(&priv->encoder, &tda998x_encoder_helper_funcs);
 	ret = drm_encoder_init(drm, &priv->encoder, &tda998x_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS);
+			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret)
 		goto err_encoder;
 
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 051eab3..fcd77b2 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -10,6 +10,7 @@
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
 	select TMPFS
+	select STOP_MACHINE
 	select DRM_KMS_HELPER
 	select DRM_PANEL
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 44d290a..0851de07 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -77,6 +77,7 @@
 	  dvo_tfp410.o \
 	  intel_crt.o \
 	  intel_ddi.o \
+	  intel_dp_link_training.o \
 	  intel_dp_mst.o \
 	  intel_dp.o \
 	  intel_dsi.o \
diff --git a/drivers/gpu/drm/i915/dvo.h b/drivers/gpu/drm/i915/dvo.h
index 0e2c1b9..13dea42 100644
--- a/drivers/gpu/drm/i915/dvo.h
+++ b/drivers/gpu/drm/i915/dvo.h
@@ -32,7 +32,8 @@
 	const char *name;
 	int type;
 	/* DVOA/B/C output register */
-	u32 dvo_reg;
+	i915_reg_t dvo_reg;
+	i915_reg_t dvo_srcdim_reg;
 	/* GPIO register used for i2c bus to control this device */
 	u32 gpio;
 	int slave_addr;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index db58c8d..814d894 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -407,14 +407,14 @@
  * LRI.
  */
 struct drm_i915_reg_descriptor {
-	u32 addr;
+	i915_reg_t addr;
 	u32 mask;
 	u32 value;
 };
 
 /* Convenience macro for adding 32-bit registers. */
-#define REG32(address, ...)                             \
-	{ .addr = address, __VA_ARGS__ }
+#define REG32(_reg, ...) \
+	{ .addr = (_reg), __VA_ARGS__ }
 
 /*
  * Convenience macro for adding 64-bit registers.
@@ -423,8 +423,13 @@
  * access commands only allow 32-bit accesses. Hence, we have to include
  * entries for both halves of the 64-bit registers.
  */
-#define REG64(addr)                                     \
-	REG32(addr), REG32(addr + sizeof(u32))
+#define REG64(_reg) \
+	{ .addr = _reg }, \
+	{ .addr = _reg ## _UDW }
+
+#define REG64_IDX(_reg, idx) \
+	{ .addr = _reg(idx) }, \
+	{ .addr = _reg ## _UDW(idx) }
 
 static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
 	REG64(GPGPU_THREADS_DISPATCHED),
@@ -451,14 +456,14 @@
 	REG32(GEN7_GPGPU_DISPATCHDIMX),
 	REG32(GEN7_GPGPU_DISPATCHDIMY),
 	REG32(GEN7_GPGPU_DISPATCHDIMZ),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(0)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(1)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(2)),
-	REG64(GEN7_SO_NUM_PRIMS_WRITTEN(3)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(0)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(1)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(2)),
-	REG64(GEN7_SO_PRIM_STORAGE_NEEDED(3)),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2),
+	REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 3),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 0),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 1),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 2),
+	REG64_IDX(GEN7_SO_PRIM_STORAGE_NEEDED, 3),
 	REG32(GEN7_SO_WRITE_OFFSET(0)),
 	REG32(GEN7_SO_WRITE_OFFSET(1)),
 	REG32(GEN7_SO_WRITE_OFFSET(2)),
@@ -592,7 +597,7 @@
 	bool ret = true;
 
 	for (i = 0; i < reg_count; i++) {
-		u32 curr = reg_table[i].addr;
+		u32 curr = i915_mmio_reg_offset(reg_table[i].addr);
 
 		if (curr < previous) {
 			DRM_ERROR("CMD: table not sorted ring=%d entry=%d reg=0x%08X prev=0x%08X\n",
@@ -847,7 +852,7 @@
 		int i;
 
 		for (i = 0; i < count; i++) {
-			if (table[i].addr == addr)
+			if (i915_mmio_reg_offset(table[i].addr) == addr)
 				return &table[i];
 		}
 	}
@@ -1023,7 +1028,7 @@
 			 * to the register. Hence, limit OACONTROL writes to
 			 * only MI_LOAD_REGISTER_IMM commands.
 			 */
-			if (reg_addr == OACONTROL) {
+			if (reg_addr == i915_mmio_reg_offset(OACONTROL)) {
 				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
 					DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
 					return false;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a3b22bd..a8721fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1252,18 +1252,21 @@
 
 		max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 0 :
 			    rp_state_cap >> 16) & 0xff;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (IS_BROXTON(dev) ? rp_state_cap >> 16 :
 			    rp_state_cap >> 0) & 0xff;
-		max_freq *= (IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1);
+		max_freq *= (IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+			     GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
@@ -1523,7 +1526,7 @@
 		seq_printf(m, "RC information accurate: %s\n", yesno(count < 51));
 	}
 
-	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
+	gt_core_status = I915_READ_FW(GEN6_GT_CORE_STATUS);
 	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
 
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
@@ -1636,11 +1639,11 @@
 	intel_runtime_pm_get(dev_priv);
 	mutex_lock(&dev_priv->fbc.lock);
 
-	if (intel_fbc_enabled(dev_priv))
+	if (intel_fbc_is_active(dev_priv))
 		seq_puts(m, "FBC enabled\n");
 	else
 		seq_printf(m, "FBC disabled: %s\n",
-			  intel_no_fbc_reason_str(dev_priv->fbc.no_fbc_reason));
+			   dev_priv->fbc.no_fbc_reason);
 
 	if (INTEL_INFO(dev_priv)->gen >= 7)
 		seq_printf(m, "Compressing: %s\n",
@@ -1801,7 +1804,7 @@
 	if (ret)
 		goto out;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq =
 			dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER;
@@ -1821,7 +1824,8 @@
 				       &ia_freq);
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
 			   intel_gpu_freq(dev_priv, (gpu_freq *
-				(IS_SKYLAKE(dev) ? GEN9_FREQ_SCALER : 1))),
+				(IS_SKYLAKE(dev) || IS_KABYLAKE(dev) ?
+				 GEN9_FREQ_SCALER : 1))),
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
@@ -1865,31 +1869,29 @@
 {
 	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
-	struct intel_fbdev *ifbdev = NULL;
-	struct intel_framebuffer *fb;
+	struct intel_framebuffer *fbdev_fb = NULL;
 	struct drm_framebuffer *drm_fb;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct drm_i915_private *dev_priv = dev->dev_private;
+       if (to_i915(dev)->fbdev) {
+               fbdev_fb = to_intel_framebuffer(to_i915(dev)->fbdev->helper.fb);
 
-	ifbdev = dev_priv->fbdev;
-	fb = to_intel_framebuffer(ifbdev->helper.fb);
-
-	seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
-		   fb->base.width,
-		   fb->base.height,
-		   fb->base.depth,
-		   fb->base.bits_per_pixel,
-		   fb->base.modifier[0],
-		   atomic_read(&fb->base.refcount.refcount));
-	describe_obj(m, fb->obj);
-	seq_putc(m, '\n');
+               seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
+                         fbdev_fb->base.width,
+                         fbdev_fb->base.height,
+                         fbdev_fb->base.depth,
+                         fbdev_fb->base.bits_per_pixel,
+                         fbdev_fb->base.modifier[0],
+                         atomic_read(&fbdev_fb->base.refcount.refcount));
+               describe_obj(m, fbdev_fb->obj);
+               seq_putc(m, '\n');
+       }
 #endif
 
 	mutex_lock(&dev->mode_config.fb_lock);
 	drm_for_each_fb(drm_fb, dev) {
-		fb = to_intel_framebuffer(drm_fb);
-		if (ifbdev && &fb->base == ifbdev->helper.fb)
+		struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb);
+		if (fb == fbdev_fb)
 			continue;
 
 		seq_printf(m, "user size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
@@ -2402,6 +2404,12 @@
 		guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
 	seq_printf(m, "\tversion found: %d.%d\n",
 		guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found);
+	seq_printf(m, "\theader: offset is %d; size = %d\n",
+		guc_fw->header_offset, guc_fw->header_size);
+	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
+		guc_fw->ucode_offset, guc_fw->ucode_size);
+	seq_printf(m, "\tRSA: offset is %d; size = %d\n",
+		guc_fw->rsa_offset, guc_fw->rsa_size);
 
 	tmp = I915_READ(GUC_STATUS);
 
@@ -2461,15 +2469,15 @@
 	if (!HAS_GUC_SCHED(dev_priv->dev))
 		return 0;
 
+	if (mutex_lock_interruptible(&dev->struct_mutex))
+		return 0;
+
 	/* Take a local copy of the GuC data, so we can dump it at leisure */
-	spin_lock(&dev_priv->guc.host2guc_lock);
 	guc = dev_priv->guc;
-	if (guc.execbuf_client) {
-		spin_lock(&guc.execbuf_client->wq_lock);
+	if (guc.execbuf_client)
 		client = *guc.execbuf_client;
-		spin_unlock(&guc.execbuf_client->wq_lock);
-	}
-	spin_unlock(&dev_priv->guc.host2guc_lock);
+
+	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
 	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
@@ -2550,7 +2558,7 @@
 		   yesno(work_busy(&dev_priv->psr.work.work)));
 
 	if (HAS_DDI(dev))
-		enabled = I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE;
+		enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
 	else {
 		for_each_pipe(dev_priv, pipe) {
 			stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) &
@@ -2570,9 +2578,12 @@
 		}
 	seq_puts(m, "\n");
 
-	/* CHV PSR has no kind of performance counter */
-	if (HAS_DDI(dev)) {
-		psrperf = I915_READ(EDP_PSR_PERF_CNT(dev)) &
+	/*
+	 * VLV/CHV PSR has no kind of performance counter
+	 * SKL+ Perf counter is reset to 0 everytime DC state is entered
+	 */
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+		psrperf = I915_READ(EDP_PSR_PERF_CNT) &
 			EDP_PSR_PERF_CNT_MASK;
 
 		seq_printf(m, "Performance_Counter: %u\n", psrperf);
@@ -2673,75 +2684,6 @@
 	return 0;
 }
 
-static const char *power_domain_str(enum intel_display_power_domain domain)
-{
-	switch (domain) {
-	case POWER_DOMAIN_PIPE_A:
-		return "PIPE_A";
-	case POWER_DOMAIN_PIPE_B:
-		return "PIPE_B";
-	case POWER_DOMAIN_PIPE_C:
-		return "PIPE_C";
-	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
-		return "PIPE_A_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
-		return "PIPE_B_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
-		return "PIPE_C_PANEL_FITTER";
-	case POWER_DOMAIN_TRANSCODER_A:
-		return "TRANSCODER_A";
-	case POWER_DOMAIN_TRANSCODER_B:
-		return "TRANSCODER_B";
-	case POWER_DOMAIN_TRANSCODER_C:
-		return "TRANSCODER_C";
-	case POWER_DOMAIN_TRANSCODER_EDP:
-		return "TRANSCODER_EDP";
-	case POWER_DOMAIN_PORT_DDI_A_2_LANES:
-		return "PORT_DDI_A_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_A_4_LANES:
-		return "PORT_DDI_A_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_2_LANES:
-		return "PORT_DDI_B_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_4_LANES:
-		return "PORT_DDI_B_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_2_LANES:
-		return "PORT_DDI_C_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_4_LANES:
-		return "PORT_DDI_C_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_2_LANES:
-		return "PORT_DDI_D_2_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_4_LANES:
-		return "PORT_DDI_D_4_LANES";
-	case POWER_DOMAIN_PORT_DDI_E_2_LANES:
-		return "PORT_DDI_E_2_LANES";
-	case POWER_DOMAIN_PORT_DSI:
-		return "PORT_DSI";
-	case POWER_DOMAIN_PORT_CRT:
-		return "PORT_CRT";
-	case POWER_DOMAIN_PORT_OTHER:
-		return "PORT_OTHER";
-	case POWER_DOMAIN_VGA:
-		return "VGA";
-	case POWER_DOMAIN_AUDIO:
-		return "AUDIO";
-	case POWER_DOMAIN_PLLS:
-		return "PLLS";
-	case POWER_DOMAIN_AUX_A:
-		return "AUX_A";
-	case POWER_DOMAIN_AUX_B:
-		return "AUX_B";
-	case POWER_DOMAIN_AUX_C:
-		return "AUX_C";
-	case POWER_DOMAIN_AUX_D:
-		return "AUX_D";
-	case POWER_DOMAIN_INIT:
-		return "INIT";
-	default:
-		MISSING_CASE(domain);
-		return "?";
-	}
-}
-
 static int i915_power_domain_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -2767,7 +2709,7 @@
 				continue;
 
 			seq_printf(m, "  %-23s %d\n",
-				 power_domain_str(power_domain),
+				 intel_display_power_domain_str(power_domain),
 				 power_domains->domain_use_count[power_domain]);
 		}
 	}
@@ -2777,6 +2719,51 @@
 	return 0;
 }
 
+static int i915_dmc_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_csr *csr;
+
+	if (!HAS_CSR(dev)) {
+		seq_puts(m, "not supported\n");
+		return 0;
+	}
+
+	csr = &dev_priv->csr;
+
+	intel_runtime_pm_get(dev_priv);
+
+	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
+	seq_printf(m, "path: %s\n", csr->fw_path);
+
+	if (!csr->dmc_payload)
+		goto out;
+
+	seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
+		   CSR_VERSION_MINOR(csr->version));
+
+	if (IS_SKYLAKE(dev) && csr->version >= CSR_VERSION(1, 6)) {
+		seq_printf(m, "DC3 -> DC5 count: %d\n",
+			   I915_READ(SKL_CSR_DC3_DC5_COUNT));
+		seq_printf(m, "DC5 -> DC6 count: %d\n",
+			   I915_READ(SKL_CSR_DC5_DC6_COUNT));
+	} else if (IS_BROXTON(dev) && csr->version >= CSR_VERSION(1, 4)) {
+		seq_printf(m, "DC3 -> DC5 count: %d\n",
+			   I915_READ(BXT_CSR_DC3_DC5_COUNT));
+	}
+
+out:
+	seq_printf(m, "program base: 0x%08x\n", I915_READ(CSR_PROGRAM(0)));
+	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
+	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
+
+	intel_runtime_pm_put(dev_priv);
+
+	return 0;
+}
+
 static void intel_seq_print_mode(struct seq_file *m, int tabs,
 				 struct drm_display_mode *mode)
 {
@@ -2944,6 +2931,107 @@
 	return cursor_active(dev, pipe);
 }
 
+static const char *plane_type(enum drm_plane_type type)
+{
+	switch (type) {
+	case DRM_PLANE_TYPE_OVERLAY:
+		return "OVL";
+	case DRM_PLANE_TYPE_PRIMARY:
+		return "PRI";
+	case DRM_PLANE_TYPE_CURSOR:
+		return "CUR";
+	/*
+	 * Deliberately omitting default: to generate compiler warnings
+	 * when a new drm_plane_type gets added.
+	 */
+	}
+
+	return "unknown";
+}
+
+static const char *plane_rotation(unsigned int rotation)
+{
+	static char buf[48];
+	/*
+	 * According to doc only one DRM_ROTATE_ is allowed but this
+	 * will print them all to visualize if the values are misused
+	 */
+	snprintf(buf, sizeof(buf),
+		 "%s%s%s%s%s%s(0x%08x)",
+		 (rotation & BIT(DRM_ROTATE_0)) ? "0 " : "",
+		 (rotation & BIT(DRM_ROTATE_90)) ? "90 " : "",
+		 (rotation & BIT(DRM_ROTATE_180)) ? "180 " : "",
+		 (rotation & BIT(DRM_ROTATE_270)) ? "270 " : "",
+		 (rotation & BIT(DRM_REFLECT_X)) ? "FLIPX " : "",
+		 (rotation & BIT(DRM_REFLECT_Y)) ? "FLIPY " : "",
+		 rotation);
+
+	return buf;
+}
+
+static void intel_plane_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct intel_plane *intel_plane;
+
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane_state *state;
+		struct drm_plane *plane = &intel_plane->base;
+
+		if (!plane->state) {
+			seq_puts(m, "plane->state is NULL!\n");
+			continue;
+		}
+
+		state = plane->state;
+
+		seq_printf(m, "\t--Plane id %d: type=%s, crtc_pos=%4dx%4d, crtc_size=%4dx%4d, src_pos=%d.%04ux%d.%04u, src_size=%d.%04ux%d.%04u, format=%s, rotation=%s\n",
+			   plane->base.id,
+			   plane_type(intel_plane->base.type),
+			   state->crtc_x, state->crtc_y,
+			   state->crtc_w, state->crtc_h,
+			   (state->src_x >> 16),
+			   ((state->src_x & 0xffff) * 15625) >> 10,
+			   (state->src_y >> 16),
+			   ((state->src_y & 0xffff) * 15625) >> 10,
+			   (state->src_w >> 16),
+			   ((state->src_w & 0xffff) * 15625) >> 10,
+			   (state->src_h >> 16),
+			   ((state->src_h & 0xffff) * 15625) >> 10,
+			   state->fb ? drm_get_format_name(state->fb->pixel_format) : "N/A",
+			   plane_rotation(state->rotation));
+	}
+}
+
+static void intel_scaler_info(struct seq_file *m, struct intel_crtc *intel_crtc)
+{
+	struct intel_crtc_state *pipe_config;
+	int num_scalers = intel_crtc->num_scalers;
+	int i;
+
+	pipe_config = to_intel_crtc_state(intel_crtc->base.state);
+
+	/* Not all platformas have a scaler */
+	if (num_scalers) {
+		seq_printf(m, "\tnum_scalers=%d, scaler_users=%x scaler_id=%d",
+			   num_scalers,
+			   pipe_config->scaler_state.scaler_users,
+			   pipe_config->scaler_state.scaler_id);
+
+		for (i = 0; i < SKL_NUM_SCALERS; i++) {
+			struct intel_scaler *sc =
+					&pipe_config->scaler_state.scalers[i];
+
+			seq_printf(m, ", scalers[%d]: use=%s, mode=%x",
+				   i, yesno(sc->in_use), sc->mode);
+		}
+		seq_puts(m, "\n");
+	} else {
+		seq_puts(m, "\tNo scalers available on this platform\n");
+	}
+}
+
 static int i915_display_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -2963,10 +3051,12 @@
 
 		pipe_config = to_intel_crtc_state(crtc->base.state);
 
-		seq_printf(m, "CRTC %d: pipe: %c, active=%s (size=%dx%d)\n",
+		seq_printf(m, "CRTC %d: pipe: %c, active=%s, (size=%dx%d), dither=%s, bpp=%d\n",
 			   crtc->base.base.id, pipe_name(crtc->pipe),
 			   yesno(pipe_config->base.active),
-			   pipe_config->pipe_src_w, pipe_config->pipe_src_h);
+			   pipe_config->pipe_src_w, pipe_config->pipe_src_h,
+			   yesno(pipe_config->dither), pipe_config->pipe_bpp);
+
 		if (pipe_config->base.active) {
 			intel_crtc_info(m, crtc);
 
@@ -2976,6 +3066,8 @@
 				   x, y, crtc->base.cursor->state->crtc_w,
 				   crtc->base.cursor->state->crtc_h,
 				   crtc->cursor_addr, yesno(active));
+			intel_scaler_info(m, crtc);
+			intel_plane_info(m, crtc);
 		}
 
 		seq_printf(m, "\tunderrun reporting: cpu=%s pch=%s \n",
@@ -3110,7 +3202,8 @@
 
 	seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
 	for (i = 0; i < dev_priv->workarounds.count; ++i) {
-		u32 addr, mask, value, read;
+		i915_reg_t addr;
+		u32 mask, value, read;
 		bool ok;
 
 		addr = dev_priv->workarounds.reg[i].addr;
@@ -3119,7 +3212,7 @@
 		read = I915_READ(addr);
 		ok = (value & mask) == (read & mask);
 		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
-			   addr, value, mask, read, ok ? "OK" : "FAIL");
+			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
 	}
 
 	intel_runtime_pm_put(dev_priv);
@@ -5023,7 +5116,7 @@
 
 		stat->slice_total++;
 
-		if (IS_SKYLAKE(dev))
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			ss_cnt = INTEL_INFO(dev)->subslice_per_slice;
 
 		for (ss = 0; ss < ss_max; ss++) {
@@ -5236,6 +5329,7 @@
 	{"i915_energy_uJ", i915_energy_uJ, 0},
 	{"i915_runtime_pm_status", i915_runtime_pm_status, 0},
 	{"i915_power_domain_info", i915_power_domain_info, 0},
+	{"i915_dmc_info", i915_dmc_info, 0},
 	{"i915_display_info", i915_display_info, 0},
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index b4741d1..a81c766 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -28,7 +28,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/async.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -338,7 +337,7 @@
 		i915_resume_switcheroo(dev);
 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
-		pr_err("switched off\n");
+		pr_info("switched off\n");
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		i915_suspend_switcheroo(dev, pmm);
 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
@@ -396,7 +395,9 @@
 	if (ret)
 		goto cleanup_vga_switcheroo;
 
-	intel_power_domains_init_hw(dev_priv);
+	intel_power_domains_init_hw(dev_priv, false);
+
+	intel_csr_ucode_init(dev_priv);
 
 	ret = intel_irq_install(dev_priv);
 	if (ret)
@@ -437,7 +438,7 @@
 	 * scanning against hotplug events. Hence do this first and ignore the
 	 * tiny window where we will loose hotplug notifactions.
 	 */
-	async_schedule(intel_fbdev_initial_config, dev_priv);
+	intel_fbdev_initial_config_async(dev);
 
 	drm_kms_helper_poll_init(dev);
 
@@ -663,7 +664,8 @@
 	 * supports EU power gating on devices with more than one EU
 	 * pair per subslice.
 	*/
-	info->has_slice_pg = (IS_SKYLAKE(dev) && (info->slice_total > 1));
+	info->has_slice_pg = ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
+			       (info->slice_total > 1));
 	info->has_subslice_pg = (IS_BROXTON(dev) && (info->subslice_total > 1));
 	info->has_eu_pg = (info->eu_per_subslice > 2);
 }
@@ -890,7 +892,6 @@
 	spin_lock_init(&dev_priv->mmio_flip_lock);
 	mutex_init(&dev_priv->sb_lock);
 	mutex_init(&dev_priv->modeset_restore_lock);
-	mutex_init(&dev_priv->csr_lock);
 	mutex_init(&dev_priv->av_mutex);
 
 	intel_pm_setup(dev);
@@ -937,9 +938,6 @@
 
 	intel_uncore_init(dev);
 
-	/* Load CSR Firmware for SKL */
-	intel_csr_ucode_init(dev);
-
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
 		goto out_freecsr;
@@ -1113,7 +1111,7 @@
 out_gtt:
 	i915_global_gtt_cleanup(dev);
 out_freecsr:
-	intel_csr_ucode_fini(dev);
+	intel_csr_ucode_fini(dev_priv);
 	intel_uncore_fini(dev);
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_bridge:
@@ -1131,6 +1129,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
+	intel_fbdev_fini(dev);
+
 	i915_audio_component_cleanup(dev_priv);
 
 	ret = i915_gem_suspend(dev);
@@ -1153,8 +1153,6 @@
 
 	acpi_video_unregister();
 
-	intel_fbdev_fini(dev);
-
 	drm_vblank_cleanup(dev);
 
 	intel_modeset_cleanup(dev);
@@ -1196,7 +1194,7 @@
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
 
-	intel_csr_ucode_fini(dev);
+	intel_csr_ucode_fini(dev_priv);
 
 	intel_teardown_gmbus(dev);
 	intel_teardown_mchbar(dev);
@@ -1264,8 +1262,6 @@
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	if (file_priv && file_priv->bsd_ring)
-		file_priv->bsd_ring = NULL;
 	kfree(file_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 760e0ce..e6935f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -228,121 +228,83 @@
 	.need_gfx_hws = 1, .has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
-	.has_llc = 1
+	.has_llc = 1, \
+	GEN_DEFAULT_PIPEOFFSETS, \
+	IVB_CURSOR_OFFSETS
 
 static const struct intel_device_info intel_ivybridge_d_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_m_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
 	.is_mobile = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_q_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
 	.num_pipes = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
+#define VLV_FEATURES  \
+	.gen = 7, .num_pipes = 2, \
+	.need_gfx_hws = 1, .has_hotplug = 1, \
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.display_mmio_offset = VLV_DISPLAY_BASE, \
+	GEN_DEFAULT_PIPEOFFSETS, \
+	CURSOR_OFFSETS
+
 static const struct intel_device_info intel_valleyview_m_info = {
-	GEN7_FEATURES,
-	.is_mobile = 1,
-	.num_pipes = 2,
+	VLV_FEATURES,
 	.is_valleyview = 1,
-	.display_mmio_offset = VLV_DISPLAY_BASE,
-	.has_fbc = 0, /* legal, last one wins */
-	.has_llc = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	CURSOR_OFFSETS,
+	.is_mobile = 1,
 };
 
 static const struct intel_device_info intel_valleyview_d_info = {
-	GEN7_FEATURES,
-	.num_pipes = 2,
+	VLV_FEATURES,
 	.is_valleyview = 1,
-	.display_mmio_offset = VLV_DISPLAY_BASE,
-	.has_fbc = 0, /* legal, last one wins */
-	.has_llc = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	CURSOR_OFFSETS,
 };
 
+#define HSW_FEATURES  \
+	GEN7_FEATURES, \
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
+	.has_ddi = 1, \
+	.has_fpga_dbg = 1
+
 static const struct intel_device_info intel_haswell_d_info = {
-	GEN7_FEATURES,
+	HSW_FEATURES,
 	.is_haswell = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_haswell_m_info = {
-	GEN7_FEATURES,
+	HSW_FEATURES,
 	.is_haswell = 1,
 	.is_mobile = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_d_info = {
-	.gen = 8, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	HSW_FEATURES,
+	.gen = 8,
 };
 
 static const struct intel_device_info intel_broadwell_m_info = {
-	.gen = 8, .is_mobile = 1, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	HSW_FEATURES,
+	.gen = 8, .is_mobile = 1,
 };
 
 static const struct intel_device_info intel_broadwell_gt3d_info = {
-	.gen = 8, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	HSW_FEATURES,
+	.gen = 8,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_gt3m_info = {
-	.gen = 8, .is_mobile = 1, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	HSW_FEATURES,
+	.gen = 8, .is_mobile = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_cherryview_info = {
@@ -356,33 +318,21 @@
 };
 
 static const struct intel_device_info intel_skylake_info = {
+	HSW_FEATURES,
 	.is_skylake = 1,
-	.gen = 9, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	.gen = 9,
 };
 
 static const struct intel_device_info intel_skylake_gt3_info = {
+	HSW_FEATURES,
 	.is_skylake = 1,
-	.gen = 9, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	.gen = 9,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broxton_info = {
 	.is_preliminary = 1,
+	.is_broxton = 1,
 	.gen = 9,
 	.need_gfx_hws = 1, .has_hotplug = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
@@ -394,50 +344,67 @@
 	IVB_CURSOR_OFFSETS,
 };
 
+static const struct intel_device_info intel_kabylake_info = {
+	HSW_FEATURES,
+	.is_preliminary = 1,
+	.is_kabylake = 1,
+	.gen = 9,
+};
+
+static const struct intel_device_info intel_kabylake_gt3_info = {
+	HSW_FEATURES,
+	.is_preliminary = 1,
+	.is_kabylake = 1,
+	.gen = 9,
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
+};
+
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
  * and subvendor IDs, we need it to come before the more general IVB
  * PCI ID matches, otherwise we'll use the wrong info struct above.
  */
-#define INTEL_PCI_IDS \
-	INTEL_I830_IDS(&intel_i830_info),	\
-	INTEL_I845G_IDS(&intel_845g_info),	\
-	INTEL_I85X_IDS(&intel_i85x_info),	\
-	INTEL_I865G_IDS(&intel_i865g_info),	\
-	INTEL_I915G_IDS(&intel_i915g_info),	\
-	INTEL_I915GM_IDS(&intel_i915gm_info),	\
-	INTEL_I945G_IDS(&intel_i945g_info),	\
-	INTEL_I945GM_IDS(&intel_i945gm_info),	\
-	INTEL_I965G_IDS(&intel_i965g_info),	\
-	INTEL_G33_IDS(&intel_g33_info),		\
-	INTEL_I965GM_IDS(&intel_i965gm_info),	\
-	INTEL_GM45_IDS(&intel_gm45_info), 	\
-	INTEL_G45_IDS(&intel_g45_info), 	\
-	INTEL_PINEVIEW_IDS(&intel_pineview_info),	\
-	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),	\
-	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),	\
-	INTEL_SNB_D_IDS(&intel_sandybridge_d_info),	\
-	INTEL_SNB_M_IDS(&intel_sandybridge_m_info),	\
-	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */ \
-	INTEL_IVB_M_IDS(&intel_ivybridge_m_info),	\
-	INTEL_IVB_D_IDS(&intel_ivybridge_d_info),	\
-	INTEL_HSW_D_IDS(&intel_haswell_d_info), \
-	INTEL_HSW_M_IDS(&intel_haswell_m_info), \
-	INTEL_VLV_M_IDS(&intel_valleyview_m_info),	\
-	INTEL_VLV_D_IDS(&intel_valleyview_d_info),	\
-	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),	\
-	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),	\
-	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),	\
-	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info), \
-	INTEL_CHV_IDS(&intel_cherryview_info),	\
-	INTEL_SKL_GT1_IDS(&intel_skylake_info),	\
-	INTEL_SKL_GT2_IDS(&intel_skylake_info),	\
-	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),	\
-	INTEL_BXT_IDS(&intel_broxton_info)
-
-static const struct pci_device_id pciidlist[] = {		/* aka */
-	INTEL_PCI_IDS,
+static const struct pci_device_id pciidlist[] = {
+	INTEL_I830_IDS(&intel_i830_info),
+	INTEL_I845G_IDS(&intel_845g_info),
+	INTEL_I85X_IDS(&intel_i85x_info),
+	INTEL_I865G_IDS(&intel_i865g_info),
+	INTEL_I915G_IDS(&intel_i915g_info),
+	INTEL_I915GM_IDS(&intel_i915gm_info),
+	INTEL_I945G_IDS(&intel_i945g_info),
+	INTEL_I945GM_IDS(&intel_i945gm_info),
+	INTEL_I965G_IDS(&intel_i965g_info),
+	INTEL_G33_IDS(&intel_g33_info),
+	INTEL_I965GM_IDS(&intel_i965gm_info),
+	INTEL_GM45_IDS(&intel_gm45_info),
+	INTEL_G45_IDS(&intel_g45_info),
+	INTEL_PINEVIEW_IDS(&intel_pineview_info),
+	INTEL_IRONLAKE_D_IDS(&intel_ironlake_d_info),
+	INTEL_IRONLAKE_M_IDS(&intel_ironlake_m_info),
+	INTEL_SNB_D_IDS(&intel_sandybridge_d_info),
+	INTEL_SNB_M_IDS(&intel_sandybridge_m_info),
+	INTEL_IVB_Q_IDS(&intel_ivybridge_q_info), /* must be first IVB */
+	INTEL_IVB_M_IDS(&intel_ivybridge_m_info),
+	INTEL_IVB_D_IDS(&intel_ivybridge_d_info),
+	INTEL_HSW_D_IDS(&intel_haswell_d_info),
+	INTEL_HSW_M_IDS(&intel_haswell_m_info),
+	INTEL_VLV_M_IDS(&intel_valleyview_m_info),
+	INTEL_VLV_D_IDS(&intel_valleyview_d_info),
+	INTEL_BDW_GT12M_IDS(&intel_broadwell_m_info),
+	INTEL_BDW_GT12D_IDS(&intel_broadwell_d_info),
+	INTEL_BDW_GT3M_IDS(&intel_broadwell_gt3m_info),
+	INTEL_BDW_GT3D_IDS(&intel_broadwell_gt3d_info),
+	INTEL_CHV_IDS(&intel_cherryview_info),
+	INTEL_SKL_GT1_IDS(&intel_skylake_info),
+	INTEL_SKL_GT2_IDS(&intel_skylake_info),
+	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),
+	INTEL_SKL_GT4_IDS(&intel_skylake_gt3_info),
+	INTEL_BXT_IDS(&intel_broxton_info),
+	INTEL_KBL_GT1_IDS(&intel_kabylake_info),
+	INTEL_KBL_GT2_IDS(&intel_kabylake_info),
+	INTEL_KBL_GT3_IDS(&intel_kabylake_gt3_info),
+	INTEL_KBL_GT4_IDS(&intel_kabylake_gt3_info),
 	{0, 0, 0}
 };
 
@@ -463,7 +430,7 @@
 	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		ret = PCH_LPT;
 		DRM_DEBUG_KMS("Assuming LynxPoint PCH\n");
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ret = PCH_SPT;
 		DRM_DEBUG_KMS("Assuming SunrisePoint PCH\n");
 	}
@@ -526,12 +493,15 @@
 			} else if (id == INTEL_PCH_SPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_SPT;
 				DRM_DEBUG_KMS("Found SunrisePoint PCH\n");
-				WARN_ON(!IS_SKYLAKE(dev));
+				WARN_ON(!IS_SKYLAKE(dev) &&
+					!IS_KABYLAKE(dev));
 			} else if (id == INTEL_PCH_SPT_LP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_SPT;
 				DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
-				WARN_ON(!IS_SKYLAKE(dev));
-			} else if (id == INTEL_PCH_P2X_DEVICE_ID_TYPE) {
+				WARN_ON(!IS_SKYLAKE(dev) &&
+					!IS_KABYLAKE(dev));
+			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
+				   (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
 				dev_priv->pch_type = intel_virt_detect_pch(dev);
 			} else
 				continue;
@@ -570,26 +540,6 @@
 	return true;
 }
 
-void i915_firmware_load_error_print(const char *fw_path, int err)
-{
-	DRM_ERROR("failed to load firmware %s (%d)\n", fw_path, err);
-
-	/*
-	 * If the reason is not known assume -ENOENT since that's the most
-	 * usual failure mode.
-	 */
-	if (!err)
-		err = -ENOENT;
-
-	if (!(IS_BUILTIN(CONFIG_DRM_I915) && err == -ENOENT))
-		return;
-
-	DRM_ERROR(
-	  "The driver is built-in, so to load the firmware you need to\n"
-	  "include it either in the kernel (see CONFIG_EXTRA_FIRMWARE) or\n"
-	  "in your initrd/initramfs image.\n");
-}
-
 static void intel_suspend_encoders(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -608,9 +558,16 @@
 static int intel_suspend_complete(struct drm_i915_private *dev_priv);
 static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 			      bool rpm_resume);
-static int skl_resume_prepare(struct drm_i915_private *dev_priv);
 static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
 
+static bool suspend_to_idle(struct drm_i915_private *dev_priv)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+	if (acpi_target_system_state() < ACPI_STATE_S3)
+		return true;
+#endif
+	return false;
+}
 
 static int i915_drm_suspend(struct drm_device *dev)
 {
@@ -663,11 +620,7 @@
 
 	i915_save_state(dev);
 
-	opregion_target_state = PCI_D3cold;
-#if IS_ENABLED(CONFIG_ACPI_SLEEP)
-	if (acpi_target_system_state() < ACPI_STATE_S3)
-		opregion_target_state = PCI_D1;
-#endif
+	opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold;
 	intel_opregion_notify_adapter(dev, opregion_target_state);
 
 	intel_uncore_forcewake_reset(dev, false);
@@ -679,18 +632,35 @@
 
 	intel_display_set_init_power(dev_priv, false);
 
+	if (HAS_CSR(dev_priv))
+		flush_work(&dev_priv->csr.work);
+
 	return 0;
 }
 
 static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 {
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
+	bool fw_csr;
 	int ret;
 
+	fw_csr = suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
+	/*
+	 * In case of firmware assisted context save/restore don't manually
+	 * deinit the power domains. This also means the CSR/DMC firmware will
+	 * stay active, it will power down any HW resources as required and
+	 * also enable deeper system power states that would be blocked if the
+	 * firmware was inactive.
+	 */
+	if (!fw_csr)
+		intel_power_domains_suspend(dev_priv);
+
 	ret = intel_suspend_complete(dev_priv);
 
 	if (ret) {
 		DRM_ERROR("Suspend complete failed: %d\n", ret);
+		if (!fw_csr)
+			intel_power_domains_init_hw(dev_priv, true);
 
 		return ret;
 	}
@@ -711,6 +681,8 @@
 	if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6))
 		pci_set_power_state(drm_dev->pdev, PCI_D3hot);
 
+	dev_priv->suspended_to_idle = suspend_to_idle(dev_priv);
+
 	return 0;
 }
 
@@ -823,8 +795,10 @@
 	 * FIXME: This should be solved with a special hdmi sink device or
 	 * similar so that power domains can be employed.
 	 */
-	if (pci_enable_device(dev->pdev))
-		return -EIO;
+	if (pci_enable_device(dev->pdev)) {
+		ret = -EIO;
+		goto out;
+	}
 
 	pci_set_master(dev->pdev);
 
@@ -838,13 +812,16 @@
 
 	if (IS_BROXTON(dev))
 		ret = bxt_resume_prepare(dev_priv);
-	else if (IS_SKYLAKE(dev_priv))
-		ret = skl_resume_prepare(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		hsw_disable_pc8(dev_priv);
 
 	intel_uncore_sanitize(dev);
-	intel_power_domains_init_hw(dev_priv);
+
+	if (!(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload))
+		intel_power_domains_init_hw(dev_priv, true);
+
+out:
+	dev_priv->suspended_to_idle = false;
 
 	return ret;
 }
@@ -1051,15 +1028,6 @@
 	return i915_drm_resume(drm_dev);
 }
 
-static int skl_suspend_complete(struct drm_i915_private *dev_priv)
-{
-	/* Enabling DC6 is not a hard requirement to enter runtime D3 */
-
-	skl_uninit_cdclk(dev_priv);
-
-	return 0;
-}
-
 static int hsw_suspend_complete(struct drm_i915_private *dev_priv)
 {
 	hsw_enable_pc8(dev_priv);
@@ -1099,16 +1067,6 @@
 	return 0;
 }
 
-static int skl_resume_prepare(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-
-	skl_init_cdclk(dev_priv);
-	intel_csr_load_program(dev);
-
-	return 0;
-}
-
 /*
  * Save all Gunit registers that may be lost after a D3 and a subsequent
  * S0i[R123] transition. The list of registers needing a save/restore is
@@ -1572,8 +1530,6 @@
 
 	if (IS_BROXTON(dev))
 		ret = bxt_resume_prepare(dev_priv);
-	else if (IS_SKYLAKE(dev))
-		ret = skl_resume_prepare(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		hsw_disable_pc8(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
@@ -1616,8 +1572,6 @@
 
 	if (IS_BROXTON(dev_priv))
 		ret = bxt_suspend_complete(dev_priv);
-	else if (IS_SKYLAKE(dev_priv))
-		ret = skl_suspend_complete(dev_priv);
 	else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		ret = hsw_suspend_complete(dev_priv);
 	else if (IS_VALLEYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8afda45..f1a8a53 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -57,7 +57,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20151010"
+#define DRIVER_DATE		"20151204"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -180,15 +180,11 @@
 	POWER_DOMAIN_TRANSCODER_B,
 	POWER_DOMAIN_TRANSCODER_C,
 	POWER_DOMAIN_TRANSCODER_EDP,
-	POWER_DOMAIN_PORT_DDI_A_2_LANES,
-	POWER_DOMAIN_PORT_DDI_A_4_LANES,
-	POWER_DOMAIN_PORT_DDI_B_2_LANES,
-	POWER_DOMAIN_PORT_DDI_B_4_LANES,
-	POWER_DOMAIN_PORT_DDI_C_2_LANES,
-	POWER_DOMAIN_PORT_DDI_C_4_LANES,
-	POWER_DOMAIN_PORT_DDI_D_2_LANES,
-	POWER_DOMAIN_PORT_DDI_D_4_LANES,
-	POWER_DOMAIN_PORT_DDI_E_2_LANES,
+	POWER_DOMAIN_PORT_DDI_A_LANES,
+	POWER_DOMAIN_PORT_DDI_B_LANES,
+	POWER_DOMAIN_PORT_DDI_C_LANES,
+	POWER_DOMAIN_PORT_DDI_D_LANES,
+	POWER_DOMAIN_PORT_DDI_E_LANES,
 	POWER_DOMAIN_PORT_DSI,
 	POWER_DOMAIN_PORT_CRT,
 	POWER_DOMAIN_PORT_OTHER,
@@ -199,6 +195,8 @@
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_AUX_D,
+	POWER_DOMAIN_GMBUS,
+	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_INIT,
 
 	POWER_DOMAIN_NUM,
@@ -288,7 +286,7 @@
 	list_for_each_entry(intel_plane,				\
 			    &(dev)->mode_config.plane_list,		\
 			    base.head)					\
-		if ((intel_plane)->pipe == (intel_crtc)->pipe)
+		for_each_if ((intel_plane)->pipe == (intel_crtc)->pipe)
 
 #define for_each_intel_crtc(dev, intel_crtc) \
 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
@@ -305,15 +303,15 @@
 
 #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
 	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
-		if ((intel_encoder)->base.crtc == (__crtc))
+		for_each_if ((intel_encoder)->base.crtc == (__crtc))
 
 #define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
 	list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
-		if ((intel_connector)->base.encoder == (__encoder))
+		for_each_if ((intel_connector)->base.encoder == (__encoder))
 
 #define for_each_power_domain(domain, mask)				\
 	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
-		if ((1 << (domain)) & (mask))
+		for_each_if ((1 << (domain)) & (mask))
 
 struct drm_i915_private;
 struct i915_mm_struct;
@@ -351,6 +349,8 @@
 	/* hsw/bdw */
 	DPLL_ID_WRPLL1 = 0,
 	DPLL_ID_WRPLL2 = 1,
+	DPLL_ID_SPLL = 2,
+
 	/* skl */
 	DPLL_ID_SKL_DPLL1 = 0,
 	DPLL_ID_SKL_DPLL2 = 1,
@@ -367,6 +367,7 @@
 
 	/* hsw, bdw */
 	uint32_t wrpll;
+	uint32_t spll;
 
 	/* skl */
 	/*
@@ -627,11 +628,9 @@
 			  int target, int refclk,
 			  struct dpll *match_clock,
 			  struct dpll *best_clock);
+	int (*compute_pipe_wm)(struct intel_crtc *crtc,
+			       struct drm_atomic_state *state);
 	void (*update_wm)(struct drm_crtc *crtc);
-	void (*update_sprite_wm)(struct drm_plane *plane,
-				 struct drm_crtc *crtc,
-				 uint32_t sprite_width, uint32_t sprite_height,
-				 int pixel_size, bool enable, bool scaled);
 	int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
 	void (*modeset_commit_cdclk)(struct drm_atomic_state *state);
 	/* Returns the active state of the crtc, and if the crtc is active,
@@ -689,18 +688,18 @@
 	void (*force_wake_put)(struct drm_i915_private *dev_priv,
 							enum forcewake_domains domains);
 
-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, off_t offset, bool trace);
+	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
 
-	void (*mmio_writeb)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint32_t val, bool trace);
-	void (*mmio_writeq)(struct drm_i915_private *dev_priv, off_t offset,
+	void (*mmio_writeq)(struct drm_i915_private *dev_priv, i915_reg_t r,
 				uint64_t val, bool trace);
 };
 
@@ -717,11 +716,11 @@
 		enum forcewake_domain_id id;
 		unsigned wake_count;
 		struct timer_list timer;
-		u32 reg_set;
+		i915_reg_t reg_set;
 		u32 val_set;
 		u32 val_clear;
-		u32 reg_ack;
-		u32 reg_post;
+		i915_reg_t reg_ack;
+		i915_reg_t reg_post;
 		u32 val_reset;
 	} fw_domain[FW_DOMAIN_ID_COUNT];
 };
@@ -731,25 +730,24 @@
 	for ((i__) = 0, (domain__) = &(dev_priv__)->uncore.fw_domain[0]; \
 	     (i__) < FW_DOMAIN_ID_COUNT; \
 	     (i__)++, (domain__) = &(dev_priv__)->uncore.fw_domain[i__]) \
-		if (((mask__) & (dev_priv__)->uncore.fw_domains) & (1 << (i__)))
+		for_each_if (((mask__) & (dev_priv__)->uncore.fw_domains) & (1 << (i__)))
 
 #define for_each_fw_domain(domain__, dev_priv__, i__) \
 	for_each_fw_domain_mask(domain__, FORCEWAKE_ALL, dev_priv__, i__)
 
-enum csr_state {
-	FW_UNINITIALIZED = 0,
-	FW_LOADED,
-	FW_FAILED
-};
+#define CSR_VERSION(major, minor)	((major) << 16 | (minor))
+#define CSR_VERSION_MAJOR(version)	((version) >> 16)
+#define CSR_VERSION_MINOR(version)	((version) & 0xffff)
 
 struct intel_csr {
+	struct work_struct work;
 	const char *fw_path;
 	uint32_t *dmc_payload;
 	uint32_t dmc_fw_size;
+	uint32_t version;
 	uint32_t mmio_count;
-	uint32_t mmioaddr[8];
+	i915_reg_t mmioaddr[8];
 	uint32_t mmiodata[8];
-	enum csr_state state;
 };
 
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
@@ -767,6 +765,8 @@
 	func(is_valleyview) sep \
 	func(is_haswell) sep \
 	func(is_skylake) sep \
+	func(is_broxton) sep \
+	func(is_kabylake) sep \
 	func(is_preliminary) sep \
 	func(has_fbc) sep \
 	func(has_pipe_cxsr) sep \
@@ -902,7 +902,6 @@
 	/* This is always the inner lock when overlapping with struct_mutex and
 	 * it's the outer lock when overlapping with stolen_lock. */
 	struct mutex lock;
-	unsigned long uncompressed_size;
 	unsigned threshold;
 	unsigned int fb_id;
 	unsigned int possible_framebuffer_bits;
@@ -915,38 +914,21 @@
 
 	bool false_color;
 
-	/* Tracks whether the HW is actually enabled, not whether the feature is
-	 * possible. */
 	bool enabled;
+	bool active;
 
 	struct intel_fbc_work {
-		struct delayed_work work;
-		struct intel_crtc *crtc;
+		bool scheduled;
+		struct work_struct work;
 		struct drm_framebuffer *fb;
-	} *fbc_work;
+		unsigned long enable_jiffies;
+	} work;
 
-	enum no_fbc_reason {
-		FBC_OK, /* FBC is enabled */
-		FBC_UNSUPPORTED, /* FBC is not supported by this chipset */
-		FBC_NO_OUTPUT, /* no outputs enabled to compress */
-		FBC_STOLEN_TOO_SMALL, /* not enough space for buffers */
-		FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
-		FBC_MODE_TOO_LARGE, /* mode too large for compression */
-		FBC_BAD_PLANE, /* fbc not supported on plane */
-		FBC_NOT_TILED, /* buffer not tiled */
-		FBC_MULTIPLE_PIPES, /* more than one pipe active */
-		FBC_MODULE_PARAM,
-		FBC_CHIP_DEFAULT, /* disabled by default on this chip */
-		FBC_ROTATION, /* rotation is not supported */
-		FBC_IN_DBG_MASTER, /* kernel debugger is active */
-		FBC_BAD_STRIDE, /* stride is not supported */
-		FBC_PIXEL_RATE, /* pixel rate is too big */
-		FBC_PIXEL_FORMAT /* pixel format is invalid */
-	} no_fbc_reason;
+	const char *no_fbc_reason;
 
-	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
-	void (*enable_fbc)(struct intel_crtc *crtc);
-	void (*disable_fbc)(struct drm_i915_private *dev_priv);
+	bool (*is_active)(struct drm_i915_private *dev_priv);
+	void (*activate)(struct intel_crtc *crtc);
+	void (*deactivate)(struct drm_i915_private *dev_priv);
 };
 
 /**
@@ -1016,7 +998,7 @@
 	struct i2c_adapter adapter;
 	u32 force_bit;
 	u32 reg0;
-	u32 gpio_reg;
+	i915_reg_t gpio_reg;
 	struct i2c_algo_bit_data bit_algo;
 	struct drm_i915_private *dev_priv;
 };
@@ -1665,7 +1647,7 @@
 };
 
 struct i915_wa_reg {
-	u32 addr;
+	i915_reg_t addr;
 	u32 value;
 	/* bitmask representing WA bits */
 	u32 mask;
@@ -1694,6 +1676,13 @@
 	struct drm_i915_gem_request     *request;
 };
 
+/* used in computing the new watermarks state */
+struct intel_wm_config {
+	unsigned int num_pipes_active;
+	bool sprites_enabled;
+	bool sprites_scaled;
+};
+
 struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *objects;
@@ -1714,9 +1703,6 @@
 
 	struct intel_csr csr;
 
-	/* Display CSR-related protection */
-	struct mutex csr_lock;
-
 	struct intel_gmbus gmbus[GMBUS_NUM_PINS];
 
 	/** gmbus_mutex protects against concurrent usage of the single hw gmbus
@@ -1731,6 +1717,8 @@
 	/* MMIO base address for MIPI regs */
 	uint32_t mipi_mmio_base;
 
+	uint32_t psr_mmio_base;
+
 	wait_queue_head_t gmbus_wait_queue;
 
 	struct pci_dev *bridge_dev;
@@ -1896,6 +1884,7 @@
 	u32 chv_phy_control;
 
 	u32 suspend_count;
+	bool suspended_to_idle;
 	struct i915_suspend_saved_registers regfile;
 	struct vlv_s0ix_state vlv_s0ix_state;
 
@@ -1918,6 +1907,9 @@
 		 */
 		uint16_t skl_latency[8];
 
+		/* Committed wm config */
+		struct intel_wm_config config;
+
 		/*
 		 * The skl_wm_values structure is a bit too big for stack
 		 * allocation, so we keep the staging struct where we store
@@ -1976,7 +1968,7 @@
 /* Iterate over initialised rings */
 #define for_each_ring(ring__, dev_priv__, i__) \
 	for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
-		if (((ring__) = &(dev_priv__)->ring[(i__)]), intel_ring_initialized((ring__)))
+		for_each_if ((((ring__) = &(dev_priv__)->ring[(i__)]), intel_ring_initialized((ring__))))
 
 enum hdmi_force_audio {
 	HDMI_AUDIO_OFF_DVI = -2,	/* no aux data for HDMI-DVI converter */
@@ -2432,6 +2424,15 @@
 #define INTEL_DEVID(p)	(INTEL_INFO(p)->device_id)
 #define INTEL_REVID(p)	(__I915__(p)->dev->pdev->revision)
 
+#define REVID_FOREVER		0xff
+/*
+ * Return true if revision is in range [since,until] inclusive.
+ *
+ * Use 0 for open-ended since, and REVID_FOREVER for open-ended until.
+ */
+#define IS_REVID(p, since, until) \
+	(INTEL_REVID(p) >= (since) && INTEL_REVID(p) <= (until))
+
 #define IS_I830(dev)		(INTEL_DEVID(dev) == 0x3577)
 #define IS_845G(dev)		(INTEL_DEVID(dev) == 0x2562)
 #define IS_I85X(dev)		(INTEL_INFO(dev)->is_i85x)
@@ -2458,7 +2459,8 @@
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_BROADWELL(dev)	(!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_SKYLAKE(dev)	(INTEL_INFO(dev)->is_skylake)
-#define IS_BROXTON(dev)	(!INTEL_INFO(dev)->is_skylake && IS_GEN9(dev))
+#define IS_BROXTON(dev)		(INTEL_INFO(dev)->is_broxton)
+#define IS_KABYLAKE(dev)	(INTEL_INFO(dev)->is_kabylake)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
 				 (INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
@@ -2493,16 +2495,21 @@
 
 #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary)
 
-#define SKL_REVID_A0		(0x0)
-#define SKL_REVID_B0		(0x1)
-#define SKL_REVID_C0		(0x2)
-#define SKL_REVID_D0		(0x3)
-#define SKL_REVID_E0		(0x4)
-#define SKL_REVID_F0		(0x5)
+#define SKL_REVID_A0		0x0
+#define SKL_REVID_B0		0x1
+#define SKL_REVID_C0		0x2
+#define SKL_REVID_D0		0x3
+#define SKL_REVID_E0		0x4
+#define SKL_REVID_F0		0x5
 
-#define BXT_REVID_A0		(0x0)
-#define BXT_REVID_B0		(0x3)
-#define BXT_REVID_C0		(0x9)
+#define IS_SKL_REVID(p, since, until) (IS_SKYLAKE(p) && IS_REVID(p, since, until))
+
+#define BXT_REVID_A0		0x0
+#define BXT_REVID_A1		0x1
+#define BXT_REVID_B0		0x3
+#define BXT_REVID_C0		0x9
+
+#define IS_BXT_REVID(p, since, until) (IS_BROXTON(p) && IS_REVID(p, since, until))
 
 /*
  * The genX designation typically refers to the render engine, so render
@@ -2574,10 +2581,10 @@
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)
 #define HAS_PSR(dev)		(IS_HASWELL(dev) || IS_BROADWELL(dev) || \
 				 IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev) || \
-				 IS_SKYLAKE(dev))
+				 IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 #define HAS_RUNTIME_PM(dev)	(IS_GEN6(dev) || IS_HASWELL(dev) || \
 				 IS_BROADWELL(dev) || IS_VALLEYVIEW(dev) || \
-				 IS_SKYLAKE(dev))
+				 IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 #define HAS_RC6(dev)		(INTEL_INFO(dev)->gen >= 6)
 #define HAS_RC6p(dev)		(INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev))
 
@@ -2601,11 +2608,13 @@
 #define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
+#define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type)
 #define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_LPT_LP(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+#define HAS_PCH_LPT_H(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
 #define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP)
@@ -2637,6 +2646,7 @@
 	int panel_use_ssc;
 	int vbt_sdvo_panel_type;
 	int enable_rc6;
+	int enable_dc;
 	int enable_fbc;
 	int enable_ppgtt;
 	int enable_execlists;
@@ -2648,6 +2658,7 @@
 	int enable_cmd_parser;
 	/* leave bools at the end to not create holes */
 	bool enable_hangcheck;
+	bool fastboot;
 	bool prefault_disable;
 	bool load_detect_test;
 	bool reset;
@@ -2684,7 +2695,6 @@
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
-void i915_firmware_load_error_print(const char *fw_path, int err);
 
 /* intel_hotplug.c */
 void intel_hpd_irq_handler(struct drm_device *dev, u32 pin_mask, u32 long_mask);
@@ -2741,17 +2751,47 @@
 void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
 				   uint32_t mask,
 				   uint32_t bits);
-void
-ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
-void
-ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
+void ilk_update_display_irq(struct drm_i915_private *dev_priv,
+			    uint32_t interrupt_mask,
+			    uint32_t enabled_irq_mask);
+static inline void
+ilk_enable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ilk_update_display_irq(dev_priv, bits, bits);
+}
+static inline void
+ilk_disable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ilk_update_display_irq(dev_priv, bits, 0);
+}
+void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
+			 enum pipe pipe,
+			 uint32_t interrupt_mask,
+			 uint32_t enabled_irq_mask);
+static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv,
+				       enum pipe pipe, uint32_t bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, bits);
+}
+static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv,
+					enum pipe pipe, uint32_t bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, 0);
+}
 void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
 				  uint32_t interrupt_mask,
 				  uint32_t enabled_irq_mask);
-#define ibx_enable_display_interrupt(dev_priv, bits) \
-	ibx_display_interrupt_update((dev_priv), (bits), (bits))
-#define ibx_disable_display_interrupt(dev_priv, bits) \
-	ibx_display_interrupt_update((dev_priv), (bits), 0)
+static inline void
+ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, bits);
+}
+static inline void
+ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, 0);
+}
+
 
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
@@ -2991,8 +3031,6 @@
 int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined,
-				     struct drm_i915_gem_request **pipelined_request,
 				     const struct i915_ggtt_view *view);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
 					      const struct i915_ggtt_view *view);
@@ -3347,7 +3385,6 @@
 extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
 				  bool enable);
 extern void intel_detect_pch(struct drm_device *dev);
-extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 extern int intel_enable_rc6(const struct drm_device *dev);
 
 extern bool i915_semaphore_is_enabled(struct drm_device *dev);
@@ -3430,6 +3467,32 @@
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
+#define __raw_read(x, s) \
+static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \
+					     i915_reg_t reg) \
+{ \
+	return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
+}
+
+#define __raw_write(x, s) \
+static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \
+				       i915_reg_t reg, uint##x##_t val) \
+{ \
+	write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
+}
+__raw_read(8, b)
+__raw_read(16, w)
+__raw_read(32, l)
+__raw_read(64, q)
+
+__raw_write(8, b)
+__raw_write(16, w)
+__raw_write(32, l)
+__raw_write(64, q)
+
+#undef __raw_read
+#undef __raw_write
+
 /* These are untraced mmio-accessors that are only valid to be used inside
  * criticial sections inside IRQ handlers where forcewake is explicitly
  * controlled.
@@ -3437,8 +3500,8 @@
  * Note: Should only be used between intel_uncore_forcewake_irqlock() and
  * intel_uncore_forcewake_irqunlock().
  */
-#define I915_READ_FW(reg__) readl(dev_priv->regs + (reg__))
-#define I915_WRITE_FW(reg__, val__) writel(val__, dev_priv->regs + (reg__))
+#define I915_READ_FW(reg__) __raw_i915_read32(dev_priv, (reg__))
+#define I915_WRITE_FW(reg__, val__) __raw_i915_write32(dev_priv, (reg__), (val__))
 #define POSTING_READ_FW(reg__) (void)I915_READ_FW(reg__)
 
 /* "Broadcast RGB" property */
@@ -3446,7 +3509,7 @@
 #define INTEL_BROADCAST_RGB_FULL 1
 #define INTEL_BROADCAST_RGB_LIMITED 2
 
-static inline uint32_t i915_vgacntrl_reg(struct drm_device *dev)
+static inline i915_reg_t i915_vgacntrl_reg(struct drm_device *dev)
 {
 	if (IS_VALLEYVIEW(dev))
 		return VLV_VGACNTRL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5cf4a19..b7d7cec 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1210,8 +1210,16 @@
 	if (i915_gem_request_completed(req, true))
 		return 0;
 
-	timeout_expire = timeout ?
-		jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+	timeout_expire = 0;
+	if (timeout) {
+		if (WARN_ON(*timeout < 0))
+			return -EINVAL;
+
+		if (*timeout == 0)
+			return -ETIME;
+
+		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+	}
 
 	if (INTEL_INFO(dev_priv)->gen >= 6)
 		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
@@ -2737,6 +2745,8 @@
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
+	struct intel_ringbuffer *buffer;
+
 	while (!list_empty(&ring->active_list)) {
 		struct drm_i915_gem_object *obj;
 
@@ -2752,18 +2762,23 @@
 	 * are the ones that keep the context and ringbuffer backing objects
 	 * pinned in place.
 	 */
-	while (!list_empty(&ring->execlist_queue)) {
-		struct drm_i915_gem_request *submit_req;
 
-		submit_req = list_first_entry(&ring->execlist_queue,
-				struct drm_i915_gem_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
+	if (i915.enable_execlists) {
+		spin_lock_irq(&ring->execlist_lock);
+		while (!list_empty(&ring->execlist_queue)) {
+			struct drm_i915_gem_request *submit_req;
 
-		if (submit_req->ctx != ring->default_context)
-			intel_lr_context_unpin(submit_req);
+			submit_req = list_first_entry(&ring->execlist_queue,
+					struct drm_i915_gem_request,
+					execlist_link);
+			list_del(&submit_req->execlist_link);
 
-		i915_gem_request_unreference(submit_req);
+			if (submit_req->ctx != ring->default_context)
+				intel_lr_context_unpin(submit_req);
+
+			i915_gem_request_unreference(submit_req);
+		}
+		spin_unlock_irq(&ring->execlist_lock);
 	}
 
 	/*
@@ -2782,6 +2797,18 @@
 
 		i915_gem_request_retire(request);
 	}
+
+	/* Having flushed all requests from all queues, we know that all
+	 * ringbuffers must now be empty. However, since we do not reclaim
+	 * all space when retiring the request (to prevent HEADs colliding
+	 * with rapid ringbuffer wraparound) the amount of available space
+	 * upon reset is less than when we start. Do one more pass over
+	 * all the ringbuffers to reset last_retired_head.
+	 */
+	list_for_each_entry(buffer, &ring->buffers, link) {
+		buffer->last_retired_head = buffer->tail;
+		intel_ring_update_space(buffer);
+	}
 }
 
 void i915_gem_reset(struct drm_device *dev)
@@ -2922,6 +2949,10 @@
 		if (!list_empty(&ring->request_list))
 			return;
 
+	/* we probably should sync with hangcheck here, using cancel_work_sync.
+	 * Also locking seems to be fubar here, ring->request_list is protected
+	 * by dev->struct_mutex. */
+
 	intel_mark_idle(dev);
 
 	if (mutex_trylock(&dev->struct_mutex)) {
@@ -3046,7 +3077,7 @@
 		if (ret == 0)
 			ret = __i915_wait_request(req[i], reset_counter, true,
 						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-						  file->driver_priv);
+						  to_rps_client(file));
 		i915_gem_request_unreference__unlocked(req[i]);
 	}
 	return ret;
@@ -3809,6 +3840,7 @@
 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
 	enum i915_cache_level level;
@@ -3825,7 +3857,7 @@
 		 * cacheline, whereas normally such cachelines would get
 		 * invalidated.
 		 */
-		if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 			return -ENODEV;
 
 		level = I915_CACHE_LLC;
@@ -3837,9 +3869,11 @@
 		return -EINVAL;
 	}
 
+	intel_runtime_pm_get(dev_priv);
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		return ret;
+		goto rpm_put;
 
 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 	if (&obj->base == NULL) {
@@ -3852,6 +3886,9 @@
 	drm_gem_object_unreference(&obj->base);
 unlock:
 	mutex_unlock(&dev->struct_mutex);
+rpm_put:
+	intel_runtime_pm_put(dev_priv);
+
 	return ret;
 }
 
@@ -3863,17 +3900,11 @@
 int
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined,
-				     struct drm_i915_gem_request **pipelined_request,
 				     const struct i915_ggtt_view *view)
 {
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
-	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
-	if (ret)
-		return ret;
-
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
@@ -4470,10 +4501,8 @@
 {
 	struct i915_vma *vma;
 	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
-		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
-			continue;
-		if (vma->vm == vm)
+		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
+		    vma->vm == vm)
 			return vma;
 	}
 	return NULL;
@@ -4562,7 +4591,6 @@
 	struct intel_engine_cs *ring = req->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
 	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
 	int i, ret;
 
@@ -4578,10 +4606,10 @@
 	 * here because no other code should access these registers other than
 	 * at initialization time.
 	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
+	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, reg_base + i);
-		intel_ring_emit(ring, remap_info[i/4]);
+		intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
+		intel_ring_emit(ring, remap_info[i]);
 	}
 
 	intel_ring_advance(ring);
@@ -4749,18 +4777,9 @@
 	if (HAS_GUC_UCODE(dev)) {
 		ret = intel_guc_ucode_load(dev);
 		if (ret) {
-			/*
-			 * If we got an error and GuC submission is enabled, map
-			 * the error to -EIO so the GPU will be declared wedged.
-			 * OTOH, if we didn't intend to use the GuC anyway, just
-			 * discard the error and carry on.
-			 */
-			DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
-				  i915.enable_guc_submission ? "" :
-				  " (ignored)");
-			ret = i915.enable_guc_submission ? -EIO : 0;
-			if (ret)
-				goto out;
+			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
+			ret = -EIO;
+			goto out;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8c688a5..43761c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -141,8 +141,6 @@
 	if (!ppgtt)
 		return;
 
-	WARN_ON(!list_empty(&ppgtt->base.active_list));
-
 	list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
 				 mm_list) {
 		if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
@@ -556,7 +554,7 @@
 				if (signaller == ring)
 					continue;
 
-				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
 				intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
 		}
@@ -581,7 +579,7 @@
 				if (signaller == ring)
 					continue;
 
-				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit_reg(ring, RING_PSMI_CTL(signaller->mmio_base));
 				intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
 			}
 		}
@@ -925,6 +923,14 @@
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
 		break;
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		if (ctx->ppgtt)
+			args->value = ctx->ppgtt->base.total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
+		else
+			args->value = to_i915(dev)->gtt.base.total;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ed7d63a..a4c243c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,7 +1114,7 @@
 
 	for (i = 0; i < 4; i++) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
+		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
 		intel_ring_emit(ring, 0);
 	}
 
@@ -1241,7 +1241,7 @@
 
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, INSTPM);
+		intel_ring_emit_reg(ring, INSTPM);
 		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
 		intel_ring_advance(ring);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 40a10b2..5981985 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -59,7 +59,7 @@
 				 struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int fence_reg_lo, fence_reg_hi;
+	i915_reg_t fence_reg_lo, fence_reg_hi;
 	int fence_pitch_shift;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
@@ -642,11 +642,10 @@
 		}
 
 		/* check for L-shaped memory aka modified enhanced addressing */
-		if (IS_GEN4(dev)) {
-			uint32_t ddc2 = I915_READ(DCC2);
-
-			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
-				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		if (IS_GEN4(dev) &&
+		    !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
 
 		if (dcc == 0xffffffff) {
@@ -675,16 +674,35 @@
 		 * matching, which was the case for the swizzling required in
 		 * the table above, or from the 1-ch value being less than
 		 * the minimum size of a rank.
+		 *
+		 * Reports indicate that the swizzling actually
+		 * varies depending upon page placement inside the
+		 * channels, i.e. we see swizzled pages where the
+		 * banks of memory are paired and unswizzled on the
+		 * uneven portion, so leave that as unknown.
 		 */
-		if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
-			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-		} else {
+		if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 			swizzle_y = I915_BIT_6_SWIZZLE_9;
 		}
 	}
 
+	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+	    swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+		/* Userspace likes to explode if it sees unknown swizzling,
+		 * so lie. We will finish the lie when reporting through
+		 * the get-tiling-ioctl by reporting the physical swizzle
+		 * mode as unknown instead.
+		 *
+		 * As we don't strictly know what the swizzling is, it may be
+		 * bit17 dependent, and so we need to also prevent the pages
+		 * from being moved.
+		 */
+		dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+	}
+
 	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
 	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 43f35d1..1f7e6b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/seq_file.h>
+#include <linux/stop_machine.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -104,9 +105,11 @@
 {
 	bool has_aliasing_ppgtt;
 	bool has_full_ppgtt;
+	bool has_full_48bit_ppgtt;
 
 	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
 	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
+	has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
 
 	if (intel_vgpu_active(dev))
 		has_full_ppgtt = false; /* emulation is too hard */
@@ -125,6 +128,9 @@
 	if (enable_ppgtt == 2 && has_full_ppgtt)
 		return 2;
 
+	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
+		return 3;
+
 #ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
@@ -141,7 +147,7 @@
 	}
 
 	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
-		return 2;
+		return has_full_48bit_ppgtt ? 3 : 2;
 	else
 		return has_aliasing_ppgtt ? 1 : 0;
 }
@@ -661,10 +667,10 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
+	intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
 	intel_ring_emit(ring, upper_32_bits(addr));
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
+	intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
 	intel_ring_emit(ring, lower_32_bits(addr));
 	intel_ring_advance(ring);
 
@@ -904,14 +910,13 @@
 	enum vgt_g2v_type msg;
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned int offset = vgtif_reg(pdp0_lo);
 	int i;
 
 	if (USES_FULL_48BIT_PPGTT(dev)) {
 		u64 daddr = px_dma(&ppgtt->pml4);
 
-		I915_WRITE(offset, lower_32_bits(daddr));
-		I915_WRITE(offset + 4, upper_32_bits(daddr));
+		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
 
 		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
 				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
@@ -919,10 +924,8 @@
 		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
 			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
 
-			I915_WRITE(offset, lower_32_bits(daddr));
-			I915_WRITE(offset + 4, upper_32_bits(daddr));
-
-			offset += 8;
+			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
+			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
 		}
 
 		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
@@ -1662,9 +1665,9 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
-	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
 	intel_ring_emit(ring, get_pd_offset(ppgtt));
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -1699,9 +1702,9 @@
 		return ret;
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
 	intel_ring_emit(ring, PP_DIR_DCLV_2G);
-	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
+	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
 	intel_ring_emit(ring, get_pd_offset(ppgtt));
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
@@ -2528,6 +2531,26 @@
 	return 0;
 }
 
+struct ggtt_bind_vma__cb {
+	struct i915_vma *vma;
+	enum i915_cache_level cache_level;
+	u32 flags;
+};
+
+static int ggtt_bind_vma__cb(void *_arg)
+{
+	struct ggtt_bind_vma__cb *arg = _arg;
+	return ggtt_bind_vma(arg->vma, arg->cache_level, arg->flags);
+}
+
+static int ggtt_bind_vma__BKL(struct i915_vma *vma,
+			      enum i915_cache_level cache_level,
+			      u32 flags)
+{
+	struct ggtt_bind_vma__cb arg = { vma, cache_level, flags };
+	return stop_machine(ggtt_bind_vma__cb, &arg, NULL);
+}
+
 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 				 enum i915_cache_level cache_level,
 				 u32 flags)
@@ -2995,6 +3018,9 @@
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
 
+	if (IS_CHERRYVIEW(dev))
+		dev_priv->gtt.base.bind_vma = ggtt_bind_vma__BKL;
+
 	return ret;
 }
 
@@ -3302,7 +3328,7 @@
 intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
 			  struct drm_i915_gem_object *obj)
 {
-	struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
+	struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info;
 	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3534,7 +3560,7 @@
 	if (view->type == I915_GGTT_VIEW_NORMAL) {
 		return obj->base.size;
 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
-		return view->rotation_info.size;
+		return view->params.rotation_info.size;
 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
 		return view->params.partial.size << PAGE_SHIFT;
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a216397..877c32c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -156,13 +156,10 @@
 			u64 offset;
 			unsigned int size;
 		} partial;
+		struct intel_rotation_info rotation_info;
 	} params;
 
 	struct sg_table *pages;
-
-	union {
-		struct intel_rotation_info rotation_info;
-	};
 };
 
 extern const struct i915_ggtt_view i915_ggtt_view_normal;
@@ -556,7 +553,7 @@
 
 	if (a->type != b->type)
 		return false;
-	if (a->type == I915_GGTT_VIEW_PARTIAL)
+	if (a->type != I915_GGTT_VIEW_NORMAL)
 		return !memcmp(&a->params, &b->params, sizeof(a->params));
 	return true;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index cdacf3f..598ed2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -433,7 +433,8 @@
 					 &reserved_size);
 		break;
 	default:
-		if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+		if (IS_BROADWELL(dev_priv) ||
+		    IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev))
 			bdw_get_stolen_reserved(dev_priv, &reserved_base,
 						&reserved_size);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8a6717c..7410f6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -176,6 +176,8 @@
 		return -EINVAL;
 	}
 
+	intel_runtime_pm_get(dev_priv);
+
 	mutex_lock(&dev->struct_mutex);
 	if (obj->pin_display || obj->framebuffer_references) {
 		ret = -EBUSY;
@@ -269,6 +271,8 @@
 	drm_gem_object_unreference(&obj->base);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_runtime_pm_put(dev_priv);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 2f04e4f..06ca408 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -366,6 +366,17 @@
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device);
 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
+
+	if (HAS_CSR(dev)) {
+		struct intel_csr *csr = &dev_priv->csr;
+
+		err_printf(m, "DMC loaded: %s\n",
+			   yesno(csr->dmc_payload != NULL));
+		err_printf(m, "DMC fw version: %d.%d\n",
+			   CSR_VERSION_MAJOR(csr->version),
+			   CSR_VERSION_MINOR(csr->version));
+	}
+
 	err_printf(m, "EIR: 0x%08x\n", error->eir);
 	err_printf(m, "IER: 0x%08x\n", error->ier);
 	if (INTEL_INFO(dev)->gen >= 8) {
@@ -862,7 +873,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		ering->rc_psmi = I915_READ(ring->mmio_base + 0x50);
+		ering->rc_psmi = I915_READ(RING_PSMI_CTL(ring->mmio_base));
 		ering->fault_reg = I915_READ(RING_FAULT_REG(ring));
 		if (INTEL_INFO(dev)->gen >= 8)
 			gen8_record_semaphore_state(dev_priv, error, ring, ering);
@@ -899,7 +910,7 @@
 	ering->ctl = I915_READ_CTL(ring);
 
 	if (I915_NEED_GFX_HWS(dev)) {
-		int mmio;
+		i915_reg_t mmio;
 
 		if (IS_GEN7(dev)) {
 			switch (ring->id) {
@@ -1071,6 +1082,25 @@
 		list_for_each_entry(request, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
+			if (count >= error->ring[i].num_requests) {
+				/*
+				 * If the ring request list was changed in
+				 * between the point where the error request
+				 * list was created and dimensioned and this
+				 * point then just exit early to avoid crashes.
+				 *
+				 * We don't need to communicate that the
+				 * request list changed state during error
+				 * state capture and that the error state is
+				 * slightly incorrect as a consequence since we
+				 * are typically only interested in the request
+				 * list state at the point of error state
+				 * capture, not in any changes happening during
+				 * the capture.
+				 */
+				break;
+			}
+
 			erq = &error->ring[i].requests[count++];
 			erq->seqno = request->seqno;
 			erq->jiffies = request->emitted_jiffies;
@@ -1181,7 +1211,7 @@
 	if (IS_VALLEYVIEW(dev)) {
 		error->gtier[0] = I915_READ(GTIER);
 		error->ier = I915_READ(VLV_IER);
-		error->forcewake = I915_READ(FORCEWAKE_VLV);
+		error->forcewake = I915_READ_FW(FORCEWAKE_VLV);
 	}
 
 	if (IS_GEN7(dev))
@@ -1193,14 +1223,14 @@
 	}
 
 	if (IS_GEN6(dev)) {
-		error->forcewake = I915_READ(FORCEWAKE);
+		error->forcewake = I915_READ_FW(FORCEWAKE);
 		error->gab_ctl = I915_READ(GAB_CTL);
 		error->gfx_mode = I915_READ(GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
 	if (INTEL_INFO(dev)->gen >= 7)
-		error->forcewake = I915_READ(FORCEWAKE_MT);
+		error->forcewake = I915_READ_FW(FORCEWAKE_MT);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		error->derrmr = I915_READ(DERRMR);
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
index c4cb1c0..685c799 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -26,7 +26,7 @@
 
 /* Definitions of GuC H/W registers, bits, etc */
 
-#define GUC_STATUS			0xc000
+#define GUC_STATUS			_MMIO(0xc000)
 #define   GS_BOOTROM_SHIFT		1
 #define   GS_BOOTROM_MASK		  (0x7F << GS_BOOTROM_SHIFT)
 #define   GS_BOOTROM_RSA_FAILED		  (0x50 << GS_BOOTROM_SHIFT)
@@ -39,40 +39,41 @@
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
 #define   GS_MIA_CORE_STATE		  (1 << GS_MIA_SHIFT)
 
-#define SOFT_SCRATCH(n)			(0xc180 + ((n) * 4))
+#define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 
-#define UOS_RSA_SCRATCH(i)		(0xc200 + (i) * 4)
-#define DMA_ADDR_0_LOW			0xc300
-#define DMA_ADDR_0_HIGH			0xc304
-#define DMA_ADDR_1_LOW			0xc308
-#define DMA_ADDR_1_HIGH			0xc30c
+#define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
+#define   UOS_RSA_SCRATCH_MAX_COUNT	  64
+#define DMA_ADDR_0_LOW			_MMIO(0xc300)
+#define DMA_ADDR_0_HIGH			_MMIO(0xc304)
+#define DMA_ADDR_1_LOW			_MMIO(0xc308)
+#define DMA_ADDR_1_HIGH			_MMIO(0xc30c)
 #define   DMA_ADDRESS_SPACE_WOPCM	  (7 << 16)
 #define   DMA_ADDRESS_SPACE_GTT		  (8 << 16)
-#define DMA_COPY_SIZE			0xc310
-#define DMA_CTRL			0xc314
+#define DMA_COPY_SIZE			_MMIO(0xc310)
+#define DMA_CTRL			_MMIO(0xc314)
 #define   UOS_MOVE			  (1<<4)
 #define   START_DMA			  (1<<0)
-#define DMA_GUC_WOPCM_OFFSET		0xc340
+#define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
 #define   GUC_WOPCM_OFFSET_VALUE	  0x80000	/* 512KB */
-#define GUC_MAX_IDLE_COUNT		0xC3E4
+#define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
 
-#define GUC_WOPCM_SIZE			0xc050
+#define GUC_WOPCM_SIZE			_MMIO(0xc050)
 #define   GUC_WOPCM_SIZE_VALUE  	  (0x80 << 12)	/* 512KB */
 
 /* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
 #define	GUC_WOPCM_TOP			(GUC_WOPCM_SIZE_VALUE)
 
-#define GEN8_GT_PM_CONFIG		0x138140
-#define GEN9LP_GT_PM_CONFIG		0x138140
-#define GEN9_GT_PM_CONFIG		0x13816c
+#define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG		_MMIO(0x13816c)
 #define   GT_DOORBELL_ENABLE		  (1<<0)
 
-#define GEN8_GTCR			0x4274
+#define GEN8_GTCR			_MMIO(0x4274)
 #define   GEN8_GTCR_INVALIDATE		  (1<<0)
 
-#define GUC_ARAT_C6DIS			0xA178
+#define GUC_ARAT_C6DIS			_MMIO(0xA178)
 
-#define GUC_SHIM_CONTROL		0xc064
+#define GUC_SHIM_CONTROL		_MMIO(0xc064)
 #define   GUC_DISABLE_SRAM_INIT_TO_ZEROES	(1<<0)
 #define   GUC_ENABLE_READ_CACHE_LOGIC		(1<<1)
 #define   GUC_ENABLE_MIA_CACHING		(1<<2)
@@ -89,21 +90,21 @@
 				 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA	| \
 				 GUC_ENABLE_MIA_CLOCK_GATING)
 
-#define HOST2GUC_INTERRUPT		0xc4c8
+#define HOST2GUC_INTERRUPT		_MMIO(0xc4c8)
 #define   HOST2GUC_TRIGGER		  (1<<0)
 
 #define DRBMISC1			0x1984
 #define   DOORBELL_ENABLE		  (1<<0)
 
-#define GEN8_DRBREGL(x)			(0x1000 + (x) * 8)
+#define GEN8_DRBREGL(x)			_MMIO(0x1000 + (x) * 8)
 #define   GEN8_DRB_VALID		  (1<<0)
-#define GEN8_DRBREGU(x)			(GEN8_DRBREGL(x) + 4)
+#define GEN8_DRBREGU(x)			_MMIO(0x1000 + (x) * 8 + 4)
 
-#define DE_GUCRMR			0x44054
+#define DE_GUCRMR			_MMIO(0x44054)
 
-#define GUC_BCS_RCS_IER			0xC550
-#define GUC_VCS2_VCS1_IER		0xC554
-#define GUC_WD_VECS_IER			0xC558
-#define GUC_PM_P24C_IER			0xC55C
+#define GUC_BCS_RCS_IER			_MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER		_MMIO(0xC554)
+#define GUC_WD_VECS_IER			_MMIO(0xC558)
+#define GUC_PM_P24C_IER			_MMIO(0xC55C)
 
 #endif
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 036b42b..0d23785b 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -27,7 +27,7 @@
 #include "intel_guc.h"
 
 /**
- * DOC: GuC Client
+ * DOC: GuC-based command submission
  *
  * i915_guc_client:
  * We use the term client to avoid confusion with contexts. A i915_guc_client is
@@ -86,7 +86,6 @@
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock(&dev_priv->guc.host2guc_lock);
 
 	dev_priv->guc.action_count += 1;
 	dev_priv->guc.action_cmd = data[0];
@@ -119,7 +118,6 @@
 	}
 	dev_priv->guc.action_status = status;
 
-	spin_unlock(&dev_priv->guc.host2guc_lock);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -161,9 +159,9 @@
 	data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE;
 	/* WaRsDisableCoarsePowerGating:skl,bxt */
 	if (!intel_enable_rc6(dev_priv->dev) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    (IS_SKL_GT3(dev) && (INTEL_REVID(dev) <= SKL_REVID_E0)) ||
-	    (IS_SKL_GT4(dev) && (INTEL_REVID(dev) <= SKL_REVID_E0)))
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
+	    (IS_SKL_GT3(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)) ||
+	    (IS_SKL_GT4(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		data[1] = 0;
 	else
 		/* bit 0 and 1 are for Render and Media domain separately */
@@ -258,7 +256,7 @@
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct guc_doorbell_info *doorbell;
 	void *base;
-	int drbreg = GEN8_DRBREGL(client->doorbell_id);
+	i915_reg_t drbreg = GEN8_DRBREGL(client->doorbell_id);
 	int value;
 
 	base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0));
@@ -292,16 +290,12 @@
 	const uint32_t cacheline_size = cache_line_size();
 	uint32_t offset;
 
-	spin_lock(&guc->host2guc_lock);
-
 	/* Doorbell uses a single cache line within a page */
 	offset = offset_in_page(guc->db_cacheline);
 
 	/* Moving to next cache line to reduce contention */
 	guc->db_cacheline += cacheline_size;
 
-	spin_unlock(&guc->host2guc_lock);
-
 	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
 			offset, guc->db_cacheline, cacheline_size);
 
@@ -322,13 +316,11 @@
 	const uint16_t end = start + half;
 	uint16_t id;
 
-	spin_lock(&guc->host2guc_lock);
 	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
 	if (id == end)
 		id = GUC_INVALID_DOORBELL_ID;
 	else
 		bitmap_set(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 
 	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
 			hi_pri ? "high" : "normal", id);
@@ -338,9 +330,7 @@
 
 static void release_doorbell(struct intel_guc *guc, uint16_t id)
 {
-	spin_lock(&guc->host2guc_lock);
 	bitmap_clear(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 }
 
 /*
@@ -487,16 +477,13 @@
 	struct guc_process_desc *desc;
 	void *base;
 	u32 size = sizeof(struct guc_wq_item);
-	int ret = 0, timeout_counter = 200;
+	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
-				gc->wq_size) >= size, 1);
-
-		if (!ret) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			*offset = gc->wq_tail;
 
 			/* advance the tail for next workqueue item */
@@ -505,7 +492,11 @@
 
 			/* this will break the loop */
 			timeout_counter = 0;
+			ret = 0;
 		}
+
+		if (timeout_counter)
+			usleep_range(1000, 2000);
 	};
 
 	kunmap_atomic(base);
@@ -588,8 +579,7 @@
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @client:	the guc client where commands will go through
- * @ctx:	LRC where commands come from
- * @ring:	HW engine that will excute the commands
+ * @rq:		request associated with the commands
  *
  * Return:	0 if succeed
  */
@@ -598,15 +588,12 @@
 {
 	struct intel_guc *guc = client->guc;
 	enum intel_ring_id ring_id = rq->ring->id;
-	unsigned long flags;
 	int q_ret, b_ret;
 
 	/* Need this because of the deferred pin ctx and ring */
 	/* Shall we move this right after ring is pinned? */
 	lr_context_update(rq);
 
-	spin_lock_irqsave(&client->wq_lock, flags);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
@@ -621,12 +608,8 @@
 	} else {
 		client->retcode = 0;
 	}
-	spin_unlock_irqrestore(&client->wq_lock, flags);
-
-	spin_lock(&guc->host2guc_lock);
 	guc->submissions[ring_id] += 1;
 	guc->last_seqno[ring_id] = rq->seqno;
-	spin_unlock(&guc->host2guc_lock);
 
 	return q_ret;
 }
@@ -678,7 +661,7 @@
 /**
  * gem_release_guc_obj() - Release gem object allocated for GuC usage
  * @obj:	gem obj to be released
-  */
+ */
 static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
 {
 	if (!obj)
@@ -731,7 +714,8 @@
  * 		The kernel client to replace ExecList submission is created with
  * 		NORMAL priority. Priority of a client for scheduler can be HIGH,
  * 		while a preemption context can use CRITICAL.
- * @ctx		the context to own the client (we use the default render context)
+ * @ctx:	the context that owns the client (we use the default render
+ * 		context)
  *
  * Return:	An i915_guc_client object if success.
  */
@@ -768,7 +752,6 @@
 	client->client_obj = obj;
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
-	spin_lock_init(&client->wq_lock);
 
 	client->doorbell_offset = select_doorbell_cacheline(guc);
 
@@ -871,8 +854,6 @@
 	if (!guc->ctx_pool_obj)
 		return -ENOMEM;
 
-	spin_lock_init(&dev_priv->guc.host2guc_lock);
-
 	ida_init(&guc->ctx_ids);
 
 	guc_create_log(guc);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0d228f9..e88d692 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -139,7 +139,8 @@
 /*
  * We should clear IMR at preinstall/uninstall, and just check at postinstall.
  */
-static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, u32 reg)
+static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv,
+				    i915_reg_t reg)
 {
 	u32 val = I915_READ(reg);
 
@@ -147,7 +148,7 @@
 		return;
 
 	WARN(1, "Interrupt register 0x%x is not zero: 0x%08x\n",
-	     reg, val);
+	     i915_mmio_reg_offset(reg), val);
 	I915_WRITE(reg, 0xffffffff);
 	POSTING_READ(reg);
 	I915_WRITE(reg, 0xffffffff);
@@ -214,9 +215,9 @@
  * @interrupt_mask: mask of interrupt bits to update
  * @enabled_irq_mask: mask of interrupt bits to enable
  */
-static void ilk_update_display_irq(struct drm_i915_private *dev_priv,
-				   uint32_t interrupt_mask,
-				   uint32_t enabled_irq_mask)
+void ilk_update_display_irq(struct drm_i915_private *dev_priv,
+			    uint32_t interrupt_mask,
+			    uint32_t enabled_irq_mask)
 {
 	uint32_t new_val;
 
@@ -238,18 +239,6 @@
 	}
 }
 
-void
-ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	ilk_update_display_irq(dev_priv, mask, mask);
-}
-
-void
-ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	ilk_update_display_irq(dev_priv, mask, 0);
-}
-
 /**
  * ilk_update_gt_irq - update GTIMR
  * @dev_priv: driver private
@@ -283,27 +272,27 @@
 	ilk_update_gt_irq(dev_priv, mask, 0);
 }
 
-static u32 gen6_pm_iir(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
 }
 
-static u32 gen6_pm_imr(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
 }
 
-static u32 gen6_pm_ier(struct drm_i915_private *dev_priv)
+static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->gen >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
 }
 
 /**
-  * snb_update_pm_irq - update GEN6_PMIMR
-  * @dev_priv: driver private
-  * @interrupt_mask: mask of interrupt bits to update
-  * @enabled_irq_mask: mask of interrupt bits to enable
-  */
+ * snb_update_pm_irq - update GEN6_PMIMR
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
 static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
 			      uint32_t interrupt_mask,
 			      uint32_t enabled_irq_mask)
@@ -350,7 +339,7 @@
 void gen6_reset_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t reg = gen6_pm_iir(dev_priv);
+	i915_reg_t reg = gen6_pm_iir(dev_priv);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	I915_WRITE(reg, dev_priv->pm_rps_events);
@@ -417,11 +406,11 @@
 }
 
 /**
-  * bdw_update_port_irq - update DE port interrupt
-  * @dev_priv: driver private
-  * @interrupt_mask: mask of interrupt bits to update
-  * @enabled_irq_mask: mask of interrupt bits to enable
-  */
+ * bdw_update_port_irq - update DE port interrupt
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
 static void bdw_update_port_irq(struct drm_i915_private *dev_priv,
 				uint32_t interrupt_mask,
 				uint32_t enabled_irq_mask)
@@ -449,6 +438,38 @@
 }
 
 /**
+ * bdw_update_pipe_irq - update DE pipe interrupt
+ * @dev_priv: driver private
+ * @pipe: pipe whose interrupt to update
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
+void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
+			 enum pipe pipe,
+			 uint32_t interrupt_mask,
+			 uint32_t enabled_irq_mask)
+{
+	uint32_t new_val;
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	WARN_ON(enabled_irq_mask & ~interrupt_mask);
+
+	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
+		return;
+
+	new_val = dev_priv->de_irq_mask[pipe];
+	new_val &= ~interrupt_mask;
+	new_val |= (~enabled_irq_mask & interrupt_mask);
+
+	if (new_val != dev_priv->de_irq_mask[pipe]) {
+		dev_priv->de_irq_mask[pipe] = new_val;
+		I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
+		POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	}
+}
+
+/**
  * ibx_display_interrupt_update - update SDEIMR
  * @dev_priv: driver private
  * @interrupt_mask: mask of interrupt bits to update
@@ -477,7 +498,7 @@
 __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
 		       u32 enable_mask, u32 status_mask)
 {
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -504,7 +525,7 @@
 __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
 		        u32 enable_mask, u32 status_mask)
 {
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -665,8 +686,7 @@
 static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long high_frame;
-	unsigned long low_frame;
+	i915_reg_t high_frame, low_frame;
 	u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal;
 	struct intel_crtc *intel_crtc =
 		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
@@ -717,9 +737,7 @@
 	return I915_READ(PIPE_FRMCOUNT_G4X(pipe));
 }
 
-/* raw reads, only for fast reads of display block, no need for forcewake etc. */
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
-
+/* I915_READ_FW, only for fast reads of display block, no need for forcewake etc. */
 static int __intel_get_crtc_scanline(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -733,9 +751,9 @@
 		vtotal /= 2;
 
 	if (IS_GEN2(dev))
-		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
+		position = I915_READ_FW(PIPEDSL(pipe)) & DSL_LINEMASK_GEN2;
 	else
-		position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
+		position = I915_READ_FW(PIPEDSL(pipe)) & DSL_LINEMASK_GEN3;
 
 	/*
 	 * On HSW, the DSL reg (0x70000) appears to return 0 if we
@@ -827,7 +845,7 @@
 		 * We can split this into vertical and horizontal
 		 * scanout position.
 		 */
-		position = (__raw_i915_read32(dev_priv, PIPEFRAMEPIXEL(pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT;
+		position = (I915_READ_FW(PIPEFRAMEPIXEL(pipe)) & PIPE_PIXEL_MASK) >> PIPE_PIXEL_SHIFT;
 
 		/* convert to pixel counts */
 		vbl_start *= htotal;
@@ -1188,7 +1206,7 @@
 	POSTING_READ(GEN7_MISCCPCTL);
 
 	while ((slice = ffs(dev_priv->l3_parity.which_slice)) != 0) {
-		u32 reg;
+		i915_reg_t reg;
 
 		slice--;
 		if (WARN_ON_ONCE(slice >= NUM_L3_SLICES(dev_priv->dev)))
@@ -1196,7 +1214,7 @@
 
 		dev_priv->l3_parity.which_slice &= ~(1<<slice);
 
-		reg = GEN7_L3CDERRST1 + (slice * 0x200);
+		reg = GEN7_L3CDERRST1(slice);
 
 		error_status = I915_READ(reg);
 		row = GEN7_PARITY_ERROR_ROW(error_status);
@@ -1290,70 +1308,69 @@
 		ivybridge_parity_error_irq_handler(dev, gt_iir);
 }
 
+static __always_inline void
+gen8_cs_irq_handler(struct intel_engine_cs *ring, u32 iir, int test_shift)
+{
+	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift))
+		notify_ring(ring);
+	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift))
+		intel_lrc_irq_handler(ring);
+}
+
 static irqreturn_t gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
 {
 	irqreturn_t ret = IRQ_NONE;
 
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(0));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(0), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(0));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(0), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[RCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[RCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[RCS],
+					iir, GEN8_RCS_IRQ_SHIFT);
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[BCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[BCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[BCS],
+					iir, GEN8_BCS_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(1));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(1), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(1));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(1), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VCS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VCS]);
+			gen8_cs_irq_handler(&dev_priv->ring[VCS],
+					iir, GEN8_VCS1_IRQ_SHIFT);
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VCS2]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VCS2]);
+			gen8_cs_irq_handler(&dev_priv->ring[VCS2],
+					iir, GEN8_VCS2_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(3));
-		if (tmp) {
-			I915_WRITE_FW(GEN8_GT_IIR(3), tmp);
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(3));
+		if (iir) {
+			I915_WRITE_FW(GEN8_GT_IIR(3), iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & (GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
-				intel_lrc_irq_handler(&dev_priv->ring[VECS]);
-			if (tmp & (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT))
-				notify_ring(&dev_priv->ring[VECS]);
+			gen8_cs_irq_handler(&dev_priv->ring[VECS],
+					iir, GEN8_VECS_IRQ_SHIFT);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT3)!\n");
 	}
 
 	if (master_ctl & GEN8_GT_PM_IRQ) {
-		u32 tmp = I915_READ_FW(GEN8_GT_IIR(2));
-		if (tmp & dev_priv->pm_rps_events) {
+		u32 iir = I915_READ_FW(GEN8_GT_IIR(2));
+		if (iir & dev_priv->pm_rps_events) {
 			I915_WRITE_FW(GEN8_GT_IIR(2),
-				      tmp & dev_priv->pm_rps_events);
+				      iir & dev_priv->pm_rps_events);
 			ret = IRQ_HANDLED;
-			gen6_rps_irq_handler(dev_priv, tmp);
+			gen6_rps_irq_handler(dev_priv, iir);
 		} else
 			DRM_ERROR("The master control interrupt lied (PM)!\n");
 	}
@@ -1625,7 +1642,7 @@
 
 	spin_lock(&dev_priv->irq_lock);
 	for_each_pipe(dev_priv, pipe) {
-		int reg;
+		i915_reg_t reg;
 		u32 mask, iir_bit = 0;
 
 		/*
@@ -1827,8 +1844,24 @@
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0;
 
+	/*
+	 * Somehow the PCH doesn't seem to really ack the interrupt to the CPU
+	 * unless we touch the hotplug register, even if hotplug_trigger is
+	 * zero. Not acking leads to "The master control interrupt lied (SDE)!"
+	 * errors.
+	 */
 	dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
+	if (!hotplug_trigger) {
+		u32 mask = PORTA_HOTPLUG_STATUS_MASK |
+			PORTD_HOTPLUG_STATUS_MASK |
+			PORTC_HOTPLUG_STATUS_MASK |
+			PORTB_HOTPLUG_STATUS_MASK;
+		dig_hotplug_reg &= ~mask;
+	}
+
 	I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
+	if (!hotplug_trigger)
+		return;
 
 	intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
 			   dig_hotplug_reg, hpd,
@@ -1843,8 +1876,7 @@
 	int pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK;
 
-	if (hotplug_trigger)
-		ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_ibx);
+	ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_ibx);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -1937,8 +1969,7 @@
 	int pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT;
 
-	if (hotplug_trigger)
-		ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_cpt);
+	ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_cpt);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
@@ -2644,7 +2675,7 @@
 						     DE_PIPE_VBLANK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv, bit);
+	ilk_enable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -2669,10 +2700,9 @@
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_VBLANK;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	bdw_enable_pipe_irq(dev_priv, pipe, GEN8_PIPE_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
 	return 0;
 }
 
@@ -2699,7 +2729,7 @@
 						     DE_PIPE_VBLANK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv, bit);
+	ilk_disable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -2720,9 +2750,7 @@
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_VBLANK;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	bdw_disable_pipe_irq(dev_priv, pipe, GEN8_PIPE_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -3451,7 +3479,7 @@
 		 * setup is guaranteed to run in single-threaded context. But we
 		 * need it to make the assert_spin_locked happy. */
 		spin_lock_irq(&dev_priv->irq_lock);
-		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
+		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
 		spin_unlock_irq(&dev_priv->irq_lock);
 	}
 
@@ -3869,7 +3897,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/*
@@ -4050,7 +4078,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/* Clear the PIPE*STAT regs before the IIR */
@@ -4272,7 +4300,7 @@
 			DRM_DEBUG("Command parser error, iir 0x%08x\n", iir);
 
 		for_each_pipe(dev_priv, pipe) {
-			int reg = PIPESTAT(pipe);
+			i915_reg_t reg = PIPESTAT(pipe);
 			pipe_stats[pipe] = I915_READ(reg);
 
 			/*
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 96bb238..835d609 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -32,6 +32,7 @@
 	.panel_use_ssc = -1,
 	.vbt_sdvo_panel_type = -1,
 	.enable_rc6 = -1,
+	.enable_dc = -1,
 	.enable_fbc = -1,
 	.enable_execlists = -1,
 	.enable_hangcheck = true,
@@ -40,6 +41,7 @@
 	.preliminary_hw_support = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT),
 	.disable_power_well = -1,
 	.enable_ips = 1,
+	.fastboot = 0,
 	.prefault_disable = 0,
 	.load_detect_test = 0,
 	.reset = true,
@@ -79,6 +81,11 @@
 	"For example, 3 would enable rc6 and deep rc6, and 7 would enable everything. "
 	"default: -1 (use per-chip default)");
 
+module_param_named_unsafe(enable_dc, i915.enable_dc, int, 0400);
+MODULE_PARM_DESC(enable_dc,
+	"Enable power-saving display C-states. "
+	"(-1=auto [default]; 0=disable; 1=up to DC5; 2=up to DC6)");
+
 module_param_named_unsafe(enable_fbc, i915.enable_fbc, int, 0600);
 MODULE_PARM_DESC(enable_fbc,
 	"Enable frame buffer compression for power savings "
@@ -111,7 +118,7 @@
 module_param_named_unsafe(enable_ppgtt, i915.enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(enable_ppgtt,
 	"Override PPGTT usage. "
-	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
+	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full, 3=full with extended address space)");
 
 module_param_named_unsafe(enable_execlists, i915.enable_execlists, int, 0400);
 MODULE_PARM_DESC(enable_execlists,
@@ -125,7 +132,7 @@
 MODULE_PARM_DESC(preliminary_hw_support,
 	"Enable preliminary hardware support.");
 
-module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0600);
+module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0400);
 MODULE_PARM_DESC(disable_power_well,
 	"Disable display power wells when possible "
 	"(-1=auto [default], 0=power wells always on, 1=power wells disabled when possible)");
@@ -133,6 +140,10 @@
 module_param_named_unsafe(enable_ips, i915.enable_ips, int, 0600);
 MODULE_PARM_DESC(enable_ips, "Enable IPS (default: true)");
 
+module_param_named(fastboot, i915.fastboot, bool, 0600);
+MODULE_PARM_DESC(fastboot,
+	"Try to skip unnecessary mode sets at boot time (default: false)");
+
 module_param_named_unsafe(prefault_disable, i915.prefault_disable, bool, 0600);
 MODULE_PARM_DESC(prefault_disable,
 	"Disable page prefaulting for pread/pwrite/reloc (default:false). "
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bc7b8fa..206b213 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -25,14 +25,43 @@
 #ifndef _I915_REG_H_
 #define _I915_REG_H_
 
+typedef struct {
+	uint32_t reg;
+} i915_reg_t;
+
+#define _MMIO(r) ((const i915_reg_t){ .reg = (r) })
+
+#define INVALID_MMIO_REG _MMIO(0)
+
+static inline uint32_t i915_mmio_reg_offset(i915_reg_t reg)
+{
+	return reg.reg;
+}
+
+static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b)
+{
+	return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b);
+}
+
+static inline bool i915_mmio_reg_valid(i915_reg_t reg)
+{
+	return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
+}
+
 #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a)))
+#define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b))
 #define _PLANE(plane, a, b) _PIPE(plane, a, b)
-#define _TRANSCODER(tran, a, b) ((a) + (tran)*((b)-(a)))
+#define _MMIO_PLANE(plane, a, b) _MMIO_PIPE(plane, a, b)
+#define _TRANS(tran, a, b) ((a) + (tran)*((b)-(a)))
+#define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b))
 #define _PORT(port, a, b) ((a) + (port)*((b)-(a)))
+#define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b))
 #define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \
 			       (pipe) == PIPE_B ? (b) : (c))
+#define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c))
 #define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \
 			       (port) == PORT_B ? (b) : (c))
+#define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c))
 
 #define _MASKED_FIELD(mask, value) ({					   \
 	if (__builtin_constant_p(mask))					   \
@@ -105,14 +134,14 @@
 #define  GRDOM_RESET_STATUS (1<<1)
 #define  GRDOM_RESET_ENABLE (1<<0)
 
-#define ILK_GDSR (MCHBAR_MIRROR_BASE + 0x2ca4)
+#define ILK_GDSR _MMIO(MCHBAR_MIRROR_BASE + 0x2ca4)
 #define  ILK_GRDOM_FULL		(0<<1)
 #define  ILK_GRDOM_RENDER	(1<<1)
 #define  ILK_GRDOM_MEDIA	(3<<1)
 #define  ILK_GRDOM_MASK		(3<<1)
 #define  ILK_GRDOM_RESET_ENABLE (1<<0)
 
-#define GEN6_MBCUNIT_SNPCR	0x900c /* for LLC config */
+#define GEN6_MBCUNIT_SNPCR	_MMIO(0x900c) /* for LLC config */
 #define   GEN6_MBC_SNPCR_SHIFT	21
 #define   GEN6_MBC_SNPCR_MASK	(3<<21)
 #define   GEN6_MBC_SNPCR_MAX	(0<<21)
@@ -120,31 +149,31 @@
 #define   GEN6_MBC_SNPCR_LOW	(2<<21)
 #define   GEN6_MBC_SNPCR_MIN	(3<<21) /* only 1/16th of the cache is shared */
 
-#define VLV_G3DCTL		0x9024
-#define VLV_GSCKGCTL		0x9028
+#define VLV_G3DCTL		_MMIO(0x9024)
+#define VLV_GSCKGCTL		_MMIO(0x9028)
 
-#define GEN6_MBCTL		0x0907c
+#define GEN6_MBCTL		_MMIO(0x0907c)
 #define   GEN6_MBCTL_ENABLE_BOOT_FETCH	(1 << 4)
 #define   GEN6_MBCTL_CTX_FETCH_NEEDED	(1 << 3)
 #define   GEN6_MBCTL_BME_UPDATE_ENABLE	(1 << 2)
 #define   GEN6_MBCTL_MAE_UPDATE_ENABLE	(1 << 1)
 #define   GEN6_MBCTL_BOOT_FETCH_MECH	(1 << 0)
 
-#define GEN6_GDRST	0x941c
+#define GEN6_GDRST	_MMIO(0x941c)
 #define  GEN6_GRDOM_FULL		(1 << 0)
 #define  GEN6_GRDOM_RENDER		(1 << 1)
 #define  GEN6_GRDOM_MEDIA		(1 << 2)
 #define  GEN6_GRDOM_BLT			(1 << 3)
 
-#define RING_PP_DIR_BASE(ring)		((ring)->mmio_base+0x228)
-#define RING_PP_DIR_BASE_READ(ring)	((ring)->mmio_base+0x518)
-#define RING_PP_DIR_DCLV(ring)		((ring)->mmio_base+0x220)
+#define RING_PP_DIR_BASE(ring)		_MMIO((ring)->mmio_base+0x228)
+#define RING_PP_DIR_BASE_READ(ring)	_MMIO((ring)->mmio_base+0x518)
+#define RING_PP_DIR_DCLV(ring)		_MMIO((ring)->mmio_base+0x220)
 #define   PP_DIR_DCLV_2G		0xffffffff
 
-#define GEN8_RING_PDP_UDW(ring, n)	((ring)->mmio_base+0x270 + ((n) * 8 + 4))
-#define GEN8_RING_PDP_LDW(ring, n)	((ring)->mmio_base+0x270 + (n) * 8)
+#define GEN8_RING_PDP_UDW(ring, n)	_MMIO((ring)->mmio_base+0x270 + (n) * 8 + 4)
+#define GEN8_RING_PDP_LDW(ring, n)	_MMIO((ring)->mmio_base+0x270 + (n) * 8)
 
-#define GEN8_R_PWR_CLK_STATE		0x20C8
+#define GEN8_R_PWR_CLK_STATE		_MMIO(0x20C8)
 #define   GEN8_RPCS_ENABLE		(1 << 31)
 #define   GEN8_RPCS_S_CNT_ENABLE	(1 << 18)
 #define   GEN8_RPCS_S_CNT_SHIFT		15
@@ -157,7 +186,7 @@
 #define   GEN8_RPCS_EU_MIN_SHIFT	0
 #define   GEN8_RPCS_EU_MIN_MASK		(0xf << GEN8_RPCS_EU_MIN_SHIFT)
 
-#define GAM_ECOCHK			0x4090
+#define GAM_ECOCHK			_MMIO(0x4090)
 #define   BDW_DISABLE_HDC_INVALIDATION	(1<<25)
 #define   ECOCHK_SNB_BIT		(1<<10)
 #define   ECOCHK_DIS_TLB		(1<<8)
@@ -170,15 +199,15 @@
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
-#define GAC_ECO_BITS			0x14090
+#define GAC_ECO_BITS			_MMIO(0x14090)
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
 #define   ECOBITS_PPGTT_CACHE4B		(0<<8)
 
-#define GAB_CTL				0x24000
+#define GAB_CTL				_MMIO(0x24000)
 #define   GAB_CTL_CONT_AFTER_PAGEFAULT	(1<<8)
 
-#define GEN6_STOLEN_RESERVED		0x1082C0
+#define GEN6_STOLEN_RESERVED		_MMIO(0x1082C0)
 #define GEN6_STOLEN_RESERVED_ADDR_MASK	(0xFFF << 20)
 #define GEN7_STOLEN_RESERVED_ADDR_MASK	(0x3FFF << 18)
 #define GEN6_STOLEN_RESERVED_SIZE_MASK	(3 << 4)
@@ -200,6 +229,7 @@
 #define VGA_ST01_MDA 0x3ba
 #define VGA_ST01_CGA 0x3da
 
+#define _VGA_MSR_WRITE _MMIO(0x3c2)
 #define VGA_MSR_WRITE 0x3c2
 #define VGA_MSR_READ 0x3cc
 #define   VGA_MSR_MEM_EN (1<<1)
@@ -377,10 +407,12 @@
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
 #define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
-#define MI_PREDICATE_SRC0	(0x2400)
-#define MI_PREDICATE_SRC1	(0x2408)
+#define MI_PREDICATE_SRC0	_MMIO(0x2400)
+#define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
+#define MI_PREDICATE_SRC1	_MMIO(0x2408)
+#define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
 
-#define MI_PREDICATE_RESULT_2	(0x2214)
+#define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1<<0)
 #define  LOWER_SLICE_DISABLED	(0<<0)
 
@@ -509,49 +541,61 @@
 /*
  * Registers used only by the command parser
  */
-#define BCS_SWCTRL 0x22200
+#define BCS_SWCTRL _MMIO(0x22200)
 
-#define GPGPU_THREADS_DISPATCHED        0x2290
-#define HS_INVOCATION_COUNT             0x2300
-#define DS_INVOCATION_COUNT             0x2308
-#define IA_VERTICES_COUNT               0x2310
-#define IA_PRIMITIVES_COUNT             0x2318
-#define VS_INVOCATION_COUNT             0x2320
-#define GS_INVOCATION_COUNT             0x2328
-#define GS_PRIMITIVES_COUNT             0x2330
-#define CL_INVOCATION_COUNT             0x2338
-#define CL_PRIMITIVES_COUNT             0x2340
-#define PS_INVOCATION_COUNT             0x2348
-#define PS_DEPTH_COUNT                  0x2350
+#define GPGPU_THREADS_DISPATCHED        _MMIO(0x2290)
+#define GPGPU_THREADS_DISPATCHED_UDW	_MMIO(0x2290 + 4)
+#define HS_INVOCATION_COUNT             _MMIO(0x2300)
+#define HS_INVOCATION_COUNT_UDW		_MMIO(0x2300 + 4)
+#define DS_INVOCATION_COUNT             _MMIO(0x2308)
+#define DS_INVOCATION_COUNT_UDW		_MMIO(0x2308 + 4)
+#define IA_VERTICES_COUNT               _MMIO(0x2310)
+#define IA_VERTICES_COUNT_UDW		_MMIO(0x2310 + 4)
+#define IA_PRIMITIVES_COUNT             _MMIO(0x2318)
+#define IA_PRIMITIVES_COUNT_UDW		_MMIO(0x2318 + 4)
+#define VS_INVOCATION_COUNT             _MMIO(0x2320)
+#define VS_INVOCATION_COUNT_UDW		_MMIO(0x2320 + 4)
+#define GS_INVOCATION_COUNT             _MMIO(0x2328)
+#define GS_INVOCATION_COUNT_UDW		_MMIO(0x2328 + 4)
+#define GS_PRIMITIVES_COUNT             _MMIO(0x2330)
+#define GS_PRIMITIVES_COUNT_UDW		_MMIO(0x2330 + 4)
+#define CL_INVOCATION_COUNT             _MMIO(0x2338)
+#define CL_INVOCATION_COUNT_UDW		_MMIO(0x2338 + 4)
+#define CL_PRIMITIVES_COUNT             _MMIO(0x2340)
+#define CL_PRIMITIVES_COUNT_UDW		_MMIO(0x2340 + 4)
+#define PS_INVOCATION_COUNT             _MMIO(0x2348)
+#define PS_INVOCATION_COUNT_UDW		_MMIO(0x2348 + 4)
+#define PS_DEPTH_COUNT                  _MMIO(0x2350)
+#define PS_DEPTH_COUNT_UDW		_MMIO(0x2350 + 4)
 
 /* There are the 4 64-bit counter registers, one for each stream output */
-#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n)		_MMIO(0x5200 + (n) * 8)
+#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n)	_MMIO(0x5200 + (n) * 8 + 4)
 
-#define GEN7_SO_PRIM_STORAGE_NEEDED(n)  (0x5240 + (n) * 8)
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n)		_MMIO(0x5240 + (n) * 8)
+#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n)	_MMIO(0x5240 + (n) * 8 + 4)
 
-#define GEN7_3DPRIM_END_OFFSET          0x2420
-#define GEN7_3DPRIM_START_VERTEX        0x2430
-#define GEN7_3DPRIM_VERTEX_COUNT        0x2434
-#define GEN7_3DPRIM_INSTANCE_COUNT      0x2438
-#define GEN7_3DPRIM_START_INSTANCE      0x243C
-#define GEN7_3DPRIM_BASE_VERTEX         0x2440
+#define GEN7_3DPRIM_END_OFFSET          _MMIO(0x2420)
+#define GEN7_3DPRIM_START_VERTEX        _MMIO(0x2430)
+#define GEN7_3DPRIM_VERTEX_COUNT        _MMIO(0x2434)
+#define GEN7_3DPRIM_INSTANCE_COUNT      _MMIO(0x2438)
+#define GEN7_3DPRIM_START_INSTANCE      _MMIO(0x243C)
+#define GEN7_3DPRIM_BASE_VERTEX         _MMIO(0x2440)
 
-#define GEN7_GPGPU_DISPATCHDIMX         0x2500
-#define GEN7_GPGPU_DISPATCHDIMY         0x2504
-#define GEN7_GPGPU_DISPATCHDIMZ         0x2508
+#define GEN7_GPGPU_DISPATCHDIMX         _MMIO(0x2500)
+#define GEN7_GPGPU_DISPATCHDIMY         _MMIO(0x2504)
+#define GEN7_GPGPU_DISPATCHDIMZ         _MMIO(0x2508)
 
-#define OACONTROL 0x2360
+#define OACONTROL _MMIO(0x2360)
 
 #define _GEN7_PIPEA_DE_LOAD_SL	0x70068
 #define _GEN7_PIPEB_DE_LOAD_SL	0x71068
-#define GEN7_PIPE_DE_LOAD_SL(pipe) _PIPE(pipe, \
-					 _GEN7_PIPEA_DE_LOAD_SL, \
-					 _GEN7_PIPEB_DE_LOAD_SL)
+#define GEN7_PIPE_DE_LOAD_SL(pipe) _MMIO_PIPE(pipe, _GEN7_PIPEA_DE_LOAD_SL, _GEN7_PIPEB_DE_LOAD_SL)
 
 /*
  * Reset registers
  */
-#define DEBUG_RESET_I830		0x6070
+#define DEBUG_RESET_I830		_MMIO(0x6070)
 #define  DEBUG_RESET_FULL		(1<<7)
 #define  DEBUG_RESET_RENDER		(1<<8)
 #define  DEBUG_RESET_DISPLAY		(1<<9)
@@ -559,7 +603,7 @@
 /*
  * IOSF sideband
  */
-#define VLV_IOSF_DOORBELL_REQ			(VLV_DISPLAY_BASE + 0x2100)
+#define VLV_IOSF_DOORBELL_REQ			_MMIO(VLV_DISPLAY_BASE + 0x2100)
 #define   IOSF_DEVFN_SHIFT			24
 #define   IOSF_OPCODE_SHIFT			16
 #define   IOSF_PORT_SHIFT			8
@@ -576,8 +620,8 @@
 #define   IOSF_PORT_CCU				0xA9
 #define   IOSF_PORT_GPS_CORE			0x48
 #define   IOSF_PORT_FLISDSI			0x1B
-#define VLV_IOSF_DATA				(VLV_DISPLAY_BASE + 0x2104)
-#define VLV_IOSF_ADDR				(VLV_DISPLAY_BASE + 0x2108)
+#define VLV_IOSF_DATA				_MMIO(VLV_DISPLAY_BASE + 0x2104)
+#define VLV_IOSF_ADDR				_MMIO(VLV_DISPLAY_BASE + 0x2108)
 
 /* See configdb bunit SB addr map */
 #define BUNIT_REG_BISOC				0x11
@@ -609,6 +653,7 @@
 
 /* See the PUNIT HAS v0.8 for the below bits */
 enum punit_power_well {
+	/* These numbers are fixed and must match the position of the pw bits */
 	PUNIT_POWER_WELL_RENDER			= 0,
 	PUNIT_POWER_WELL_MEDIA			= 1,
 	PUNIT_POWER_WELL_DISP2D			= 3,
@@ -621,10 +666,12 @@
 	PUNIT_POWER_WELL_DPIO_RX1		= 11,
 	PUNIT_POWER_WELL_DPIO_CMN_D		= 12,
 
-	PUNIT_POWER_WELL_NUM,
+	/* Not actual bit groups. Used as IDs for lookup_power_well() */
+	PUNIT_POWER_WELL_ALWAYS_ON,
 };
 
 enum skl_disp_power_wells {
+	/* These numbers are fixed and must match the position of the pw bits */
 	SKL_DISP_PW_MISC_IO,
 	SKL_DISP_PW_DDI_A_E,
 	SKL_DISP_PW_DDI_B,
@@ -632,6 +679,10 @@
 	SKL_DISP_PW_DDI_D,
 	SKL_DISP_PW_1 = 14,
 	SKL_DISP_PW_2,
+
+	/* Not actual bit groups. Used as IDs for lookup_power_well() */
+	SKL_DISP_PW_ALWAYS_ON,
+	SKL_DISP_PW_DC_OFF,
 };
 
 #define SKL_POWER_WELL_STATE(pw) (1 << ((pw) * 2))
@@ -832,7 +883,7 @@
  */
 #define DPIO_DEVFN			0
 
-#define DPIO_CTL			(VLV_DISPLAY_BASE + 0x2110)
+#define DPIO_CTL			_MMIO(VLV_DISPLAY_BASE + 0x2110)
 #define  DPIO_MODSEL1			(1<<3) /* if ref clk b == 27 */
 #define  DPIO_MODSEL0			(1<<2) /* if ref clk a == 27 */
 #define  DPIO_SFR_BYPASS		(1<<1)
@@ -1185,9 +1236,9 @@
 #define   DPIO_UPAR_SHIFT		30
 
 /* BXT PHY registers */
-#define _BXT_PHY(phy, a, b)		_PIPE((phy), (a), (b))
+#define _BXT_PHY(phy, a, b)		_MMIO_PIPE((phy), (a), (b))
 
-#define BXT_P_CR_GT_DISP_PWRON		0x138090
+#define BXT_P_CR_GT_DISP_PWRON		_MMIO(0x138090)
 #define   GT_DISPLAY_POWER_ON(phy)	(1 << (phy))
 
 #define _PHY_CTL_FAMILY_EDP		0x64C80
@@ -1203,7 +1254,7 @@
 #define   PORT_PLL_ENABLE		(1 << 31)
 #define   PORT_PLL_LOCK			(1 << 30)
 #define   PORT_PLL_REF_SEL		(1 << 27)
-#define BXT_PORT_PLL_ENABLE(port)	_PORT(port, _PORT_PLL_A, _PORT_PLL_B)
+#define BXT_PORT_PLL_ENABLE(port)	_MMIO_PORT(port, _PORT_PLL_A, _PORT_PLL_B)
 
 #define _PORT_PLL_EBB_0_A		0x162034
 #define _PORT_PLL_EBB_0_B		0x6C034
@@ -1214,7 +1265,7 @@
 #define   PORT_PLL_P2_SHIFT		8
 #define   PORT_PLL_P2_MASK		(0x1f << PORT_PLL_P2_SHIFT)
 #define   PORT_PLL_P2(x)		((x)  << PORT_PLL_P2_SHIFT)
-#define BXT_PORT_PLL_EBB_0(port)	_PORT3(port, _PORT_PLL_EBB_0_A, \
+#define BXT_PORT_PLL_EBB_0(port)	_MMIO_PORT3(port, _PORT_PLL_EBB_0_A, \
 						_PORT_PLL_EBB_0_B,	\
 						_PORT_PLL_EBB_0_C)
 
@@ -1223,7 +1274,7 @@
 #define _PORT_PLL_EBB_4_C		0x6C344
 #define   PORT_PLL_10BIT_CLK_ENABLE	(1 << 13)
 #define   PORT_PLL_RECALIBRATE		(1 << 14)
-#define BXT_PORT_PLL_EBB_4(port)	_PORT3(port, _PORT_PLL_EBB_4_A, \
+#define BXT_PORT_PLL_EBB_4(port)	_MMIO_PORT3(port, _PORT_PLL_EBB_4_A, \
 						_PORT_PLL_EBB_4_B,	\
 						_PORT_PLL_EBB_4_C)
 
@@ -1259,7 +1310,7 @@
 #define _PORT_PLL_BASE(port)		_PORT3(port, _PORT_PLL_0_A,	\
 						_PORT_PLL_0_B,		\
 						_PORT_PLL_0_C)
-#define BXT_PORT_PLL(port, idx)		(_PORT_PLL_BASE(port) + (idx) * 4)
+#define BXT_PORT_PLL(port, idx)		_MMIO(_PORT_PLL_BASE(port) + (idx) * 4)
 
 /* BXT PHY common lane registers */
 #define _PORT_CL1CM_DW0_A		0x162000
@@ -1297,7 +1348,7 @@
 							_PORT_CL1CM_DW30_A)
 
 /* Defined for PHY0 only */
-#define BXT_PORT_CL2CM_DW6_BC		0x6C358
+#define BXT_PORT_CL2CM_DW6_BC		_MMIO(0x6C358)
 #define   DW6_OLDO_DYN_PWR_DOWN_EN	(1 << 28)
 
 /* BXT PHY Ref registers */
@@ -1337,10 +1388,10 @@
 #define _PORT_PCS_DW10_GRP_A		0x162C28
 #define _PORT_PCS_DW10_GRP_B		0x6CC28
 #define _PORT_PCS_DW10_GRP_C		0x6CE28
-#define BXT_PORT_PCS_DW10_LN01(port)	_PORT3(port, _PORT_PCS_DW10_LN01_A, \
+#define BXT_PORT_PCS_DW10_LN01(port)	_MMIO_PORT3(port, _PORT_PCS_DW10_LN01_A, \
 						     _PORT_PCS_DW10_LN01_B, \
 						     _PORT_PCS_DW10_LN01_C)
-#define BXT_PORT_PCS_DW10_GRP(port)	_PORT3(port, _PORT_PCS_DW10_GRP_A,  \
+#define BXT_PORT_PCS_DW10_GRP(port)	_MMIO_PORT3(port, _PORT_PCS_DW10_GRP_A,  \
 						     _PORT_PCS_DW10_GRP_B,  \
 						     _PORT_PCS_DW10_GRP_C)
 #define   TX2_SWING_CALC_INIT		(1 << 31)
@@ -1357,13 +1408,13 @@
 #define _PORT_PCS_DW12_GRP_C		0x6CE30
 #define   LANESTAGGER_STRAP_OVRD	(1 << 6)
 #define   LANE_STAGGER_MASK		0x1F
-#define BXT_PORT_PCS_DW12_LN01(port)	_PORT3(port, _PORT_PCS_DW12_LN01_A, \
+#define BXT_PORT_PCS_DW12_LN01(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_LN01_A, \
 						     _PORT_PCS_DW12_LN01_B, \
 						     _PORT_PCS_DW12_LN01_C)
-#define BXT_PORT_PCS_DW12_LN23(port)	_PORT3(port, _PORT_PCS_DW12_LN23_A, \
+#define BXT_PORT_PCS_DW12_LN23(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_LN23_A, \
 						     _PORT_PCS_DW12_LN23_B, \
 						     _PORT_PCS_DW12_LN23_C)
-#define BXT_PORT_PCS_DW12_GRP(port)	_PORT3(port, _PORT_PCS_DW12_GRP_A, \
+#define BXT_PORT_PCS_DW12_GRP(port)	_MMIO_PORT3(port, _PORT_PCS_DW12_GRP_A, \
 						     _PORT_PCS_DW12_GRP_B, \
 						     _PORT_PCS_DW12_GRP_C)
 
@@ -1377,10 +1428,10 @@
 #define _PORT_TX_DW2_GRP_A		0x162D08
 #define _PORT_TX_DW2_GRP_B		0x6CD08
 #define _PORT_TX_DW2_GRP_C		0x6CF08
-#define BXT_PORT_TX_DW2_GRP(port)	_PORT3(port, _PORT_TX_DW2_GRP_A,  \
+#define BXT_PORT_TX_DW2_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW2_GRP_A,  \
 						     _PORT_TX_DW2_GRP_B,  \
 						     _PORT_TX_DW2_GRP_C)
-#define BXT_PORT_TX_DW2_LN0(port)	_PORT3(port, _PORT_TX_DW2_LN0_A,  \
+#define BXT_PORT_TX_DW2_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW2_LN0_A,  \
 						     _PORT_TX_DW2_LN0_B,  \
 						     _PORT_TX_DW2_LN0_C)
 #define   MARGIN_000_SHIFT		16
@@ -1394,10 +1445,10 @@
 #define _PORT_TX_DW3_GRP_A		0x162D0C
 #define _PORT_TX_DW3_GRP_B		0x6CD0C
 #define _PORT_TX_DW3_GRP_C		0x6CF0C
-#define BXT_PORT_TX_DW3_GRP(port)	_PORT3(port, _PORT_TX_DW3_GRP_A,  \
+#define BXT_PORT_TX_DW3_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW3_GRP_A,  \
 						     _PORT_TX_DW3_GRP_B,  \
 						     _PORT_TX_DW3_GRP_C)
-#define BXT_PORT_TX_DW3_LN0(port)	_PORT3(port, _PORT_TX_DW3_LN0_A,  \
+#define BXT_PORT_TX_DW3_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW3_LN0_A,  \
 						     _PORT_TX_DW3_LN0_B,  \
 						     _PORT_TX_DW3_LN0_C)
 #define   SCALE_DCOMP_METHOD		(1 << 26)
@@ -1409,10 +1460,10 @@
 #define _PORT_TX_DW4_GRP_A		0x162D10
 #define _PORT_TX_DW4_GRP_B		0x6CD10
 #define _PORT_TX_DW4_GRP_C		0x6CF10
-#define BXT_PORT_TX_DW4_LN0(port)	_PORT3(port, _PORT_TX_DW4_LN0_A,  \
+#define BXT_PORT_TX_DW4_LN0(port)	_MMIO_PORT3(port, _PORT_TX_DW4_LN0_A,  \
 						     _PORT_TX_DW4_LN0_B,  \
 						     _PORT_TX_DW4_LN0_C)
-#define BXT_PORT_TX_DW4_GRP(port)	_PORT3(port, _PORT_TX_DW4_GRP_A,  \
+#define BXT_PORT_TX_DW4_GRP(port)	_MMIO_PORT3(port, _PORT_TX_DW4_GRP_A,  \
 						     _PORT_TX_DW4_GRP_B,  \
 						     _PORT_TX_DW4_GRP_C)
 #define   DEEMPH_SHIFT			24
@@ -1423,17 +1474,17 @@
 #define _PORT_TX_DW14_LN0_C		0x6C938
 #define   LATENCY_OPTIM_SHIFT		30
 #define   LATENCY_OPTIM			(1 << LATENCY_OPTIM_SHIFT)
-#define BXT_PORT_TX_DW14_LN(port, lane)	(_PORT3((port), _PORT_TX_DW14_LN0_A,   \
+#define BXT_PORT_TX_DW14_LN(port, lane)	_MMIO(_PORT3((port), _PORT_TX_DW14_LN0_A,   \
 							_PORT_TX_DW14_LN0_B,   \
 							_PORT_TX_DW14_LN0_C) + \
 					 _BXT_LANE_OFFSET(lane))
 
 /* UAIMI scratch pad register 1 */
-#define UAIMI_SPR1			0x4F074
+#define UAIMI_SPR1			_MMIO(0x4F074)
 /* SKL VccIO mask */
 #define SKL_VCCIO_MASK			0x1
 /* SKL balance leg register */
-#define DISPIO_CR_TX_BMU_CR0		0x6C00C
+#define DISPIO_CR_TX_BMU_CR0		_MMIO(0x6C00C)
 /* I_boost values */
 #define BALANCE_LEG_SHIFT(port)		(8+3*(port))
 #define BALANCE_LEG_MASK(port)		(7<<(8+3*(port)))
@@ -1450,7 +1501,7 @@
  * [0-15] @ 0x100000 gen6,vlv,chv
  * [0-31] @ 0x100000 gen7+
  */
-#define FENCE_REG(i)			(0x2000 + (((i) & 8) << 9) + ((i) & 7) * 4)
+#define FENCE_REG(i)			_MMIO(0x2000 + (((i) & 8) << 9) + ((i) & 7) * 4)
 #define   I830_FENCE_START_MASK		0x07f80000
 #define   I830_FENCE_TILING_Y_SHIFT	12
 #define   I830_FENCE_SIZE_BITS(size)	((ffs((size) >> 19) - 1) << 8)
@@ -1463,21 +1514,21 @@
 #define   I915_FENCE_START_MASK		0x0ff00000
 #define   I915_FENCE_SIZE_BITS(size)	((ffs((size) >> 20) - 1) << 8)
 
-#define FENCE_REG_965_LO(i)		(0x03000 + (i) * 8)
-#define FENCE_REG_965_HI(i)		(0x03000 + (i) * 8 + 4)
+#define FENCE_REG_965_LO(i)		_MMIO(0x03000 + (i) * 8)
+#define FENCE_REG_965_HI(i)		_MMIO(0x03000 + (i) * 8 + 4)
 #define   I965_FENCE_PITCH_SHIFT	2
 #define   I965_FENCE_TILING_Y_SHIFT	1
 #define   I965_FENCE_REG_VALID		(1<<0)
 #define   I965_FENCE_MAX_PITCH_VAL	0x0400
 
-#define FENCE_REG_GEN6_LO(i)	(0x100000 + (i) * 8)
-#define FENCE_REG_GEN6_HI(i)	(0x100000 + (i) * 8 + 4)
+#define FENCE_REG_GEN6_LO(i)		_MMIO(0x100000 + (i) * 8)
+#define FENCE_REG_GEN6_HI(i)		_MMIO(0x100000 + (i) * 8 + 4)
 #define   GEN6_FENCE_PITCH_SHIFT	32
 #define   GEN7_FENCE_MAX_PITCH_VAL	0x0800
 
 
 /* control register for cpu gtt access */
-#define TILECTL				0x101000
+#define TILECTL				_MMIO(0x101000)
 #define   TILECTL_SWZCTL			(1 << 0)
 #define   TILECTL_TLBPF			(1 << 1)
 #define   TILECTL_TLB_PREFETCH_DIS	(1 << 2)
@@ -1486,30 +1537,30 @@
 /*
  * Instruction and interrupt control regs
  */
-#define PGTBL_CTL	0x02020
+#define PGTBL_CTL	_MMIO(0x02020)
 #define   PGTBL_ADDRESS_LO_MASK	0xfffff000 /* bits [31:12] */
 #define   PGTBL_ADDRESS_HI_MASK	0x000000f0 /* bits [35:32] (gen4) */
-#define PGTBL_ER	0x02024
-#define PRB0_BASE (0x2030-0x30)
-#define PRB1_BASE (0x2040-0x30) /* 830,gen3 */
-#define PRB2_BASE (0x2050-0x30) /* gen3 */
-#define SRB0_BASE (0x2100-0x30) /* gen2 */
-#define SRB1_BASE (0x2110-0x30) /* gen2 */
-#define SRB2_BASE (0x2120-0x30) /* 830 */
-#define SRB3_BASE (0x2130-0x30) /* 830 */
+#define PGTBL_ER	_MMIO(0x02024)
+#define PRB0_BASE	(0x2030-0x30)
+#define PRB1_BASE	(0x2040-0x30) /* 830,gen3 */
+#define PRB2_BASE	(0x2050-0x30) /* gen3 */
+#define SRB0_BASE	(0x2100-0x30) /* gen2 */
+#define SRB1_BASE	(0x2110-0x30) /* gen2 */
+#define SRB2_BASE	(0x2120-0x30) /* 830 */
+#define SRB3_BASE	(0x2130-0x30) /* 830 */
 #define RENDER_RING_BASE	0x02000
 #define BSD_RING_BASE		0x04000
 #define GEN6_BSD_RING_BASE	0x12000
 #define GEN8_BSD2_RING_BASE	0x1c000
 #define VEBOX_RING_BASE		0x1a000
 #define BLT_RING_BASE		0x22000
-#define RING_TAIL(base)		((base)+0x30)
-#define RING_HEAD(base)		((base)+0x34)
-#define RING_START(base)	((base)+0x38)
-#define RING_CTL(base)		((base)+0x3c)
-#define RING_SYNC_0(base)	((base)+0x40)
-#define RING_SYNC_1(base)	((base)+0x44)
-#define RING_SYNC_2(base)	((base)+0x48)
+#define RING_TAIL(base)		_MMIO((base)+0x30)
+#define RING_HEAD(base)		_MMIO((base)+0x34)
+#define RING_START(base)	_MMIO((base)+0x38)
+#define RING_CTL(base)		_MMIO((base)+0x3c)
+#define RING_SYNC_0(base)	_MMIO((base)+0x40)
+#define RING_SYNC_1(base)	_MMIO((base)+0x44)
+#define RING_SYNC_2(base)	_MMIO((base)+0x48)
 #define GEN6_RVSYNC	(RING_SYNC_0(RENDER_RING_BASE))
 #define GEN6_RBSYNC	(RING_SYNC_1(RENDER_RING_BASE))
 #define GEN6_RVESYNC	(RING_SYNC_2(RENDER_RING_BASE))
@@ -1522,51 +1573,52 @@
 #define GEN6_VEBSYNC	(RING_SYNC_0(VEBOX_RING_BASE))
 #define GEN6_VERSYNC	(RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC	(RING_SYNC_2(VEBOX_RING_BASE))
-#define GEN6_NOSYNC 0
-#define RING_PSMI_CTL(base)	((base)+0x50)
-#define RING_MAX_IDLE(base)	((base)+0x54)
-#define RING_HWS_PGA(base)	((base)+0x80)
-#define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
-#define RING_RESET_CTL(base)	((base)+0xd0)
+#define GEN6_NOSYNC	INVALID_MMIO_REG
+#define RING_PSMI_CTL(base)	_MMIO((base)+0x50)
+#define RING_MAX_IDLE(base)	_MMIO((base)+0x54)
+#define RING_HWS_PGA(base)	_MMIO((base)+0x80)
+#define RING_HWS_PGA_GEN6(base)	_MMIO((base)+0x2080)
+#define RING_RESET_CTL(base)	_MMIO((base)+0xd0)
 #define   RESET_CTL_REQUEST_RESET  (1 << 0)
 #define   RESET_CTL_READY_TO_RESET (1 << 1)
 
-#define HSW_GTT_CACHE_EN	0x4024
+#define HSW_GTT_CACHE_EN	_MMIO(0x4024)
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
-#define GEN7_WR_WATERMARK	0x4028
-#define GEN7_GFX_PRIO_CTRL	0x402C
-#define ARB_MODE		0x4030
+#define GEN7_WR_WATERMARK	_MMIO(0x4028)
+#define GEN7_GFX_PRIO_CTRL	_MMIO(0x402C)
+#define ARB_MODE		_MMIO(0x4030)
 #define   ARB_MODE_SWIZZLE_SNB	(1<<4)
 #define   ARB_MODE_SWIZZLE_IVB	(1<<5)
-#define GEN7_GFX_PEND_TLB0	0x4034
-#define GEN7_GFX_PEND_TLB1	0x4038
+#define GEN7_GFX_PEND_TLB0	_MMIO(0x4034)
+#define GEN7_GFX_PEND_TLB1	_MMIO(0x4038)
 /* L3, CVS, ZTLB, RCC, CASC LRA min, max values */
-#define GEN7_LRA_LIMITS(i)	(0x403C + (i) * 4)
+#define GEN7_LRA_LIMITS(i)	_MMIO(0x403C + (i) * 4)
 #define GEN7_LRA_LIMITS_REG_NUM	13
-#define GEN7_MEDIA_MAX_REQ_COUNT	0x4070
-#define GEN7_GFX_MAX_REQ_COUNT		0x4074
+#define GEN7_MEDIA_MAX_REQ_COUNT	_MMIO(0x4070)
+#define GEN7_GFX_MAX_REQ_COUNT		_MMIO(0x4074)
 
-#define GAMTARBMODE		0x04a08
+#define GAMTARBMODE		_MMIO(0x04a08)
 #define   ARB_MODE_BWGTLB_DISABLE (1<<9)
 #define   ARB_MODE_SWIZZLE_BDW	(1<<1)
-#define RENDER_HWS_PGA_GEN7	(0x04080)
-#define RING_FAULT_REG(ring)	(0x4094 + 0x100*(ring)->id)
+#define RENDER_HWS_PGA_GEN7	_MMIO(0x04080)
+#define RING_FAULT_REG(ring)	_MMIO(0x4094 + 0x100*(ring)->id)
 #define   RING_FAULT_GTTSEL_MASK (1<<11)
 #define   RING_FAULT_SRCID(x)	(((x) >> 3) & 0xff)
 #define   RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
 #define   RING_FAULT_VALID	(1<<0)
-#define DONE_REG		0x40b0
-#define GEN8_PRIVATE_PAT_LO	0x40e0
-#define GEN8_PRIVATE_PAT_HI	(0x40e0 + 4)
-#define BSD_HWS_PGA_GEN7	(0x04180)
-#define BLT_HWS_PGA_GEN7	(0x04280)
-#define VEBOX_HWS_PGA_GEN7	(0x04380)
-#define RING_ACTHD(base)	((base)+0x74)
-#define RING_ACTHD_UDW(base)	((base)+0x5c)
-#define RING_NOPID(base)	((base)+0x94)
-#define RING_IMR(base)		((base)+0xa8)
-#define RING_HWSTAM(base)	((base)+0x98)
-#define RING_TIMESTAMP(base)	((base)+0x358)
+#define DONE_REG		_MMIO(0x40b0)
+#define GEN8_PRIVATE_PAT_LO	_MMIO(0x40e0)
+#define GEN8_PRIVATE_PAT_HI	_MMIO(0x40e0 + 4)
+#define BSD_HWS_PGA_GEN7	_MMIO(0x04180)
+#define BLT_HWS_PGA_GEN7	_MMIO(0x04280)
+#define VEBOX_HWS_PGA_GEN7	_MMIO(0x04380)
+#define RING_ACTHD(base)	_MMIO((base)+0x74)
+#define RING_ACTHD_UDW(base)	_MMIO((base)+0x5c)
+#define RING_NOPID(base)	_MMIO((base)+0x94)
+#define RING_IMR(base)		_MMIO((base)+0xa8)
+#define RING_HWSTAM(base)	_MMIO((base)+0x98)
+#define RING_TIMESTAMP(base)		_MMIO((base)+0x358)
+#define RING_TIMESTAMP_UDW(base)	_MMIO((base)+0x358 + 4)
 #define   TAIL_ADDR		0x001FFFF8
 #define   HEAD_WRAP_COUNT	0xFFE00000
 #define   HEAD_WRAP_ONE		0x00200000
@@ -1583,57 +1635,65 @@
 #define   RING_WAIT		(1<<11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE	(1<<10) /* gen6+ */
 
-#define GEN7_TLB_RD_ADDR	0x4700
+#define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
 #if 0
-#define PRB0_TAIL	0x02030
-#define PRB0_HEAD	0x02034
-#define PRB0_START	0x02038
-#define PRB0_CTL	0x0203c
-#define PRB1_TAIL	0x02040 /* 915+ only */
-#define PRB1_HEAD	0x02044 /* 915+ only */
-#define PRB1_START	0x02048 /* 915+ only */
-#define PRB1_CTL	0x0204c /* 915+ only */
+#define PRB0_TAIL	_MMIO(0x2030)
+#define PRB0_HEAD	_MMIO(0x2034)
+#define PRB0_START	_MMIO(0x2038)
+#define PRB0_CTL	_MMIO(0x203c)
+#define PRB1_TAIL	_MMIO(0x2040) /* 915+ only */
+#define PRB1_HEAD	_MMIO(0x2044) /* 915+ only */
+#define PRB1_START	_MMIO(0x2048) /* 915+ only */
+#define PRB1_CTL	_MMIO(0x204c) /* 915+ only */
 #endif
-#define IPEIR_I965	0x02064
-#define IPEHR_I965	0x02068
-#define GEN7_SC_INSTDONE	0x07100
-#define GEN7_SAMPLER_INSTDONE	0x0e160
-#define GEN7_ROW_INSTDONE	0x0e164
+#define IPEIR_I965	_MMIO(0x2064)
+#define IPEHR_I965	_MMIO(0x2068)
+#define GEN7_SC_INSTDONE	_MMIO(0x7100)
+#define GEN7_SAMPLER_INSTDONE	_MMIO(0xe160)
+#define GEN7_ROW_INSTDONE	_MMIO(0xe164)
 #define I915_NUM_INSTDONE_REG	4
-#define RING_IPEIR(base)	((base)+0x64)
-#define RING_IPEHR(base)	((base)+0x68)
+#define RING_IPEIR(base)	_MMIO((base)+0x64)
+#define RING_IPEHR(base)	_MMIO((base)+0x68)
 /*
  * On GEN4, only the render ring INSTDONE exists and has a different
  * layout than the GEN7+ version.
  * The GEN2 counterpart of this register is GEN2_INSTDONE.
  */
-#define RING_INSTDONE(base)	((base)+0x6c)
-#define RING_INSTPS(base)	((base)+0x70)
-#define RING_DMA_FADD(base)	((base)+0x78)
-#define RING_DMA_FADD_UDW(base)	((base)+0x60) /* gen8+ */
-#define RING_INSTPM(base)	((base)+0xc0)
-#define RING_MI_MODE(base)	((base)+0x9c)
-#define INSTPS		0x02070 /* 965+ only */
-#define GEN4_INSTDONE1	0x0207c /* 965+ only, aka INSTDONE_2 on SNB */
-#define ACTHD_I965	0x02074
-#define HWS_PGA		0x02080
+#define RING_INSTDONE(base)	_MMIO((base)+0x6c)
+#define RING_INSTPS(base)	_MMIO((base)+0x70)
+#define RING_DMA_FADD(base)	_MMIO((base)+0x78)
+#define RING_DMA_FADD_UDW(base)	_MMIO((base)+0x60) /* gen8+ */
+#define RING_INSTPM(base)	_MMIO((base)+0xc0)
+#define RING_MI_MODE(base)	_MMIO((base)+0x9c)
+#define INSTPS		_MMIO(0x2070) /* 965+ only */
+#define GEN4_INSTDONE1	_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
+#define ACTHD_I965	_MMIO(0x2074)
+#define HWS_PGA		_MMIO(0x2080)
 #define HWS_ADDRESS_MASK	0xfffff000
 #define HWS_START_ADDRESS_SHIFT	4
-#define PWRCTXA		0x2088 /* 965GM+ only */
+#define PWRCTXA		_MMIO(0x2088) /* 965GM+ only */
 #define   PWRCTX_EN	(1<<0)
-#define IPEIR		0x02088
-#define IPEHR		0x0208c
-#define GEN2_INSTDONE	0x02090
-#define NOPID		0x02094
-#define HWSTAM		0x02098
-#define DMA_FADD_I8XX	0x020d0
-#define RING_BBSTATE(base)	((base)+0x110)
-#define RING_BBADDR(base)	((base)+0x140)
-#define RING_BBADDR_UDW(base)	((base)+0x168) /* gen8+ */
+#define IPEIR		_MMIO(0x2088)
+#define IPEHR		_MMIO(0x208c)
+#define GEN2_INSTDONE	_MMIO(0x2090)
+#define NOPID		_MMIO(0x2094)
+#define HWSTAM		_MMIO(0x2098)
+#define DMA_FADD_I8XX	_MMIO(0x20d0)
+#define RING_BBSTATE(base)	_MMIO((base)+0x110)
+#define   RING_BB_PPGTT		(1 << 5)
+#define RING_SBBADDR(base)	_MMIO((base)+0x114) /* hsw+ */
+#define RING_SBBSTATE(base)	_MMIO((base)+0x118) /* hsw+ */
+#define RING_SBBADDR_UDW(base)	_MMIO((base)+0x11c) /* gen8+ */
+#define RING_BBADDR(base)	_MMIO((base)+0x140)
+#define RING_BBADDR_UDW(base)	_MMIO((base)+0x168) /* gen8+ */
+#define RING_BB_PER_CTX_PTR(base)	_MMIO((base)+0x1c0) /* gen8+ */
+#define RING_INDIRECT_CTX(base)		_MMIO((base)+0x1c4) /* gen8+ */
+#define RING_INDIRECT_CTX_OFFSET(base)	_MMIO((base)+0x1c8) /* gen8+ */
+#define RING_CTX_TIMESTAMP(base)	_MMIO((base)+0x3a8) /* gen8+ */
 
-#define ERROR_GEN6	0x040a0
-#define GEN7_ERR_INT	0x44040
+#define ERROR_GEN6	_MMIO(0x40a0)
+#define GEN7_ERR_INT	_MMIO(0x44040)
 #define   ERR_INT_POISON		(1<<31)
 #define   ERR_INT_MMIO_UNCLAIMED	(1<<13)
 #define   ERR_INT_PIPE_CRC_DONE_C	(1<<8)
@@ -1645,13 +1705,13 @@
 #define   ERR_INT_FIFO_UNDERRUN_A	(1<<0)
 #define   ERR_INT_FIFO_UNDERRUN(pipe)	(1<<((pipe)*3))
 
-#define GEN8_FAULT_TLB_DATA0		0x04b10
-#define GEN8_FAULT_TLB_DATA1		0x04b14
+#define GEN8_FAULT_TLB_DATA0		_MMIO(0x4b10)
+#define GEN8_FAULT_TLB_DATA1		_MMIO(0x4b14)
 
-#define FPGA_DBG		0x42300
+#define FPGA_DBG		_MMIO(0x42300)
 #define   FPGA_DBG_RM_NOCLAIM	(1<<31)
 
-#define DERRMR		0x44050
+#define DERRMR		_MMIO(0x44050)
 /* Note that HBLANK events are reserved on bdw+ */
 #define   DERRMR_PIPEA_SCANLINE		(1<<0)
 #define   DERRMR_PIPEA_PRI_FLIP_DONE	(1<<1)
@@ -1675,29 +1735,29 @@
  * for various sorts of correct behavior.  The top 16 bits of each are
  * the enables for writing to the corresponding low bit.
  */
-#define _3D_CHICKEN	0x02084
+#define _3D_CHICKEN	_MMIO(0x2084)
 #define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
-#define _3D_CHICKEN2	0x0208c
+#define _3D_CHICKEN2	_MMIO(0x208c)
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
-#define _3D_CHICKEN3	0x02090
+#define _3D_CHICKEN3	_MMIO(0x2090)
 #define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
 #define  _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x)	((x)<<1) /* gen8+ */
 #define  _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH	(1 << 1) /* gen6 */
 
-#define MI_MODE		0x0209c
+#define MI_MODE		_MMIO(0x209c)
 # define VS_TIMER_DISPATCH				(1 << 6)
 # define MI_FLUSH_ENABLE				(1 << 12)
 # define ASYNC_FLIP_PERF_DISABLE			(1 << 14)
 # define MODE_IDLE					(1 << 9)
 # define STOP_RING					(1 << 8)
 
-#define GEN6_GT_MODE	0x20d0
-#define GEN7_GT_MODE	0x7008
+#define GEN6_GT_MODE	_MMIO(0x20d0)
+#define GEN7_GT_MODE	_MMIO(0x7008)
 #define   GEN6_WIZ_HASHING(hi, lo)			(((hi) << 9) | ((lo) << 7))
 #define   GEN6_WIZ_HASHING_8x8				GEN6_WIZ_HASHING(0, 0)
 #define   GEN6_WIZ_HASHING_8x4				GEN6_WIZ_HASHING(0, 1)
@@ -1707,9 +1767,9 @@
 #define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
 #define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
 
-#define GFX_MODE	0x02520
-#define GFX_MODE_GEN7	0x0229c
-#define RING_MODE_GEN7(ring)	((ring)->mmio_base+0x29c)
+#define GFX_MODE	_MMIO(0x2520)
+#define GFX_MODE_GEN7	_MMIO(0x229c)
+#define RING_MODE_GEN7(ring)	_MMIO((ring)->mmio_base+0x29c)
 #define   GFX_RUN_LIST_ENABLE		(1<<15)
 #define   GFX_INTERRUPT_STEERING	(1<<14)
 #define   GFX_TLB_INVALIDATE_EXPLICIT	(1<<13)
@@ -1727,36 +1787,36 @@
 #define VLV_DISPLAY_BASE 0x180000
 #define VLV_MIPI_BASE VLV_DISPLAY_BASE
 
-#define VLV_GU_CTL0	(VLV_DISPLAY_BASE + 0x2030)
-#define VLV_GU_CTL1	(VLV_DISPLAY_BASE + 0x2034)
-#define SCPD0		0x0209c /* 915+ only */
-#define IER		0x020a0
-#define IIR		0x020a4
-#define IMR		0x020a8
-#define ISR		0x020ac
-#define VLV_GUNIT_CLOCK_GATE	(VLV_DISPLAY_BASE + 0x2060)
+#define VLV_GU_CTL0	_MMIO(VLV_DISPLAY_BASE + 0x2030)
+#define VLV_GU_CTL1	_MMIO(VLV_DISPLAY_BASE + 0x2034)
+#define SCPD0		_MMIO(0x209c) /* 915+ only */
+#define IER		_MMIO(0x20a0)
+#define IIR		_MMIO(0x20a4)
+#define IMR		_MMIO(0x20a8)
+#define ISR		_MMIO(0x20ac)
+#define VLV_GUNIT_CLOCK_GATE	_MMIO(VLV_DISPLAY_BASE + 0x2060)
 #define   GINT_DIS		(1<<22)
 #define   GCFG_DIS		(1<<8)
-#define VLV_GUNIT_CLOCK_GATE2	(VLV_DISPLAY_BASE + 0x2064)
-#define VLV_IIR_RW	(VLV_DISPLAY_BASE + 0x2084)
-#define VLV_IER		(VLV_DISPLAY_BASE + 0x20a0)
-#define VLV_IIR		(VLV_DISPLAY_BASE + 0x20a4)
-#define VLV_IMR		(VLV_DISPLAY_BASE + 0x20a8)
-#define VLV_ISR		(VLV_DISPLAY_BASE + 0x20ac)
-#define VLV_PCBR	(VLV_DISPLAY_BASE + 0x2120)
+#define VLV_GUNIT_CLOCK_GATE2	_MMIO(VLV_DISPLAY_BASE + 0x2064)
+#define VLV_IIR_RW	_MMIO(VLV_DISPLAY_BASE + 0x2084)
+#define VLV_IER		_MMIO(VLV_DISPLAY_BASE + 0x20a0)
+#define VLV_IIR		_MMIO(VLV_DISPLAY_BASE + 0x20a4)
+#define VLV_IMR		_MMIO(VLV_DISPLAY_BASE + 0x20a8)
+#define VLV_ISR		_MMIO(VLV_DISPLAY_BASE + 0x20ac)
+#define VLV_PCBR	_MMIO(VLV_DISPLAY_BASE + 0x2120)
 #define VLV_PCBR_ADDR_SHIFT	12
 
 #define   DISPLAY_PLANE_FLIP_PENDING(plane) (1<<(11-(plane))) /* A and B only */
-#define EIR		0x020b0
-#define EMR		0x020b4
-#define ESR		0x020b8
+#define EIR		_MMIO(0x20b0)
+#define EMR		_MMIO(0x20b4)
+#define ESR		_MMIO(0x20b8)
 #define   GM45_ERROR_PAGE_TABLE				(1<<5)
 #define   GM45_ERROR_MEM_PRIV				(1<<4)
 #define   I915_ERROR_PAGE_TABLE				(1<<4)
 #define   GM45_ERROR_CP_PRIV				(1<<3)
 #define   I915_ERROR_MEMORY_REFRESH			(1<<1)
 #define   I915_ERROR_INSTRUCTION			(1<<0)
-#define INSTPM	        0x020c0
+#define INSTPM	        _MMIO(0x20c0)
 #define   INSTPM_SELF_EN (1<<12) /* 915GM only */
 #define   INSTPM_AGPBUSY_INT_EN (1<<11) /* gen3: when disabled, pending interrupts
 					will not assert AGPBUSY# and will only
@@ -1764,14 +1824,14 @@
 #define   INSTPM_FORCE_ORDERING				(1<<7) /* GEN6+ */
 #define   INSTPM_TLB_INVALIDATE	(1<<9)
 #define   INSTPM_SYNC_FLUSH	(1<<5)
-#define ACTHD	        0x020c8
-#define MEM_MODE	0x020cc
+#define ACTHD	        _MMIO(0x20c8)
+#define MEM_MODE	_MMIO(0x20cc)
 #define   MEM_DISPLAY_B_TRICKLE_FEED_DISABLE (1<<3) /* 830 only */
 #define   MEM_DISPLAY_A_TRICKLE_FEED_DISABLE (1<<2) /* 830/845 only */
 #define   MEM_DISPLAY_TRICKLE_FEED_DISABLE (1<<2) /* 85x only */
-#define FW_BLC		0x020d8
-#define FW_BLC2		0x020dc
-#define FW_BLC_SELF	0x020e0 /* 915+ only */
+#define FW_BLC		_MMIO(0x20d8)
+#define FW_BLC2		_MMIO(0x20dc)
+#define FW_BLC_SELF	_MMIO(0x20e0) /* 915+ only */
 #define   FW_BLC_SELF_EN_MASK      (1<<31)
 #define   FW_BLC_SELF_FIFO_MASK    (1<<16) /* 945 only */
 #define   FW_BLC_SELF_EN           (1<<15) /* 945 only */
@@ -1779,7 +1839,7 @@
 #define MM_FIFO_WATERMARK   0x0001F000
 #define LM_BURST_LENGTH     0x00000700
 #define LM_FIFO_WATERMARK   0x0000001F
-#define MI_ARB_STATE	0x020e4 /* 915+ only */
+#define MI_ARB_STATE	_MMIO(0x20e4) /* 915+ only */
 
 /* Make render/texture TLB fetches lower priorty than associated data
  *   fetches. This is not turned on by default
@@ -1843,11 +1903,11 @@
 #define   MI_ARB_DISPLAY_PRIORITY_A_B		(0 << 0)	/* display A > display B */
 #define   MI_ARB_DISPLAY_PRIORITY_B_A		(1 << 0)	/* display B > display A */
 
-#define MI_STATE	0x020e4 /* gen2 only */
+#define MI_STATE	_MMIO(0x20e4) /* gen2 only */
 #define   MI_AGPBUSY_INT_EN			(1 << 1) /* 85x only */
 #define   MI_AGPBUSY_830_MODE			(1 << 0) /* 85x only */
 
-#define CACHE_MODE_0	0x02120 /* 915+ only */
+#define CACHE_MODE_0	_MMIO(0x2120) /* 915+ only */
 #define   CM0_PIPELINED_RENDER_FLUSH_DISABLE (1<<8)
 #define   CM0_IZ_OPT_DISABLE      (1<<6)
 #define   CM0_ZR_OPT_DISABLE      (1<<5)
@@ -1856,32 +1916,32 @@
 #define   CM0_COLOR_EVICT_DISABLE (1<<3)
 #define   CM0_DEPTH_WRITE_DISABLE (1<<1)
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
-#define GFX_FLSH_CNTL	0x02170 /* 915+ only */
-#define GFX_FLSH_CNTL_GEN6	0x101008
+#define GFX_FLSH_CNTL	_MMIO(0x2170) /* 915+ only */
+#define GFX_FLSH_CNTL_GEN6	_MMIO(0x101008)
 #define   GFX_FLSH_CNTL_EN	(1<<0)
-#define ECOSKPD		0x021d0
+#define ECOSKPD		_MMIO(0x21d0)
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)
 
-#define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
+#define CACHE_MODE_0_GEN7	_MMIO(0x7000) /* IVB+ */
 #define RC_OP_FLUSH_ENABLE (1<<0)
 #define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
-#define CACHE_MODE_1		0x7004 /* IVB+ */
+#define CACHE_MODE_1		_MMIO(0x7004) /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1<<6)
 #define   GEN8_4x4_STC_OPTIMIZATION_DISABLE	(1<<6)
 #define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE	(1<<1)
 
-#define GEN6_BLITTER_ECOSKPD	0x221d0
+#define GEN6_BLITTER_ECOSKPD	_MMIO(0x221d0)
 #define   GEN6_BLITTER_LOCK_SHIFT			16
 #define   GEN6_BLITTER_FBC_NOTIFY			(1<<3)
 
-#define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
+#define GEN6_RC_SLEEP_PSMI_CONTROL	_MMIO(0x2050)
 #define   GEN6_PSMI_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 
 /* Fuse readout registers for GT */
-#define CHV_FUSE_GT			(VLV_DISPLAY_BASE + 0x2168)
+#define CHV_FUSE_GT			_MMIO(VLV_DISPLAY_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0		(1 << 10)
 #define   CHV_FGT_DISABLE_SS1		(1 << 11)
 #define   CHV_FGT_EU_DIS_SS0_R0_SHIFT	16
@@ -1893,7 +1953,7 @@
 #define   CHV_FGT_EU_DIS_SS1_R1_SHIFT	28
 #define   CHV_FGT_EU_DIS_SS1_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
 
-#define GEN8_FUSE2			0x9120
+#define GEN8_FUSE2			_MMIO(0x9120)
 #define   GEN8_F2_SS_DIS_SHIFT		21
 #define   GEN8_F2_SS_DIS_MASK		(0x7 << GEN8_F2_SS_DIS_SHIFT)
 #define   GEN8_F2_S_ENA_SHIFT		25
@@ -1902,22 +1962,22 @@
 #define   GEN9_F2_SS_DIS_SHIFT		20
 #define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
 
-#define GEN8_EU_DISABLE0		0x9134
+#define GEN8_EU_DISABLE0		_MMIO(0x9134)
 #define   GEN8_EU_DIS0_S0_MASK		0xffffff
 #define   GEN8_EU_DIS0_S1_SHIFT		24
 #define   GEN8_EU_DIS0_S1_MASK		(0xff << GEN8_EU_DIS0_S1_SHIFT)
 
-#define GEN8_EU_DISABLE1		0x9138
+#define GEN8_EU_DISABLE1		_MMIO(0x9138)
 #define   GEN8_EU_DIS1_S1_MASK		0xffff
 #define   GEN8_EU_DIS1_S2_SHIFT		16
 #define   GEN8_EU_DIS1_S2_MASK		(0xffff << GEN8_EU_DIS1_S2_SHIFT)
 
-#define GEN8_EU_DISABLE2		0x913c
+#define GEN8_EU_DISABLE2		_MMIO(0x913c)
 #define   GEN8_EU_DIS2_S2_MASK		0xff
 
-#define GEN9_EU_DISABLE(slice)		(0x9134 + (slice)*0x4)
+#define GEN9_EU_DISABLE(slice)		_MMIO(0x9134 + (slice)*0x4)
 
-#define GEN6_BSD_SLEEP_PSMI_CONTROL	0x12050
+#define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
 #define   GEN6_BSD_SLEEP_INDICATOR	(1 << 3)
@@ -1995,9 +2055,9 @@
 #define I915_ASLE_INTERRUPT				(1<<0)
 #define I915_BSD_USER_INTERRUPT				(1<<25)
 
-#define GEN6_BSD_RNCID			0x12198
+#define GEN6_BSD_RNCID			_MMIO(0x12198)
 
-#define GEN7_FF_THREAD_MODE		0x20a0
+#define GEN7_FF_THREAD_MODE		_MMIO(0x20a0)
 #define   GEN7_FF_SCHED_MASK		0x0077070
 #define   GEN8_FF_DS_REF_CNT_FFME	(1 << 19)
 #define   GEN7_FF_TS_SCHED_HS1		(0x5<<16)
@@ -2018,9 +2078,9 @@
  * Framebuffer compression (915+ only)
  */
 
-#define FBC_CFB_BASE		0x03200 /* 4k page aligned */
-#define FBC_LL_BASE		0x03204 /* 4k page aligned */
-#define FBC_CONTROL		0x03208
+#define FBC_CFB_BASE		_MMIO(0x3200) /* 4k page aligned */
+#define FBC_LL_BASE		_MMIO(0x3204) /* 4k page aligned */
+#define FBC_CONTROL		_MMIO(0x3208)
 #define   FBC_CTL_EN		(1<<31)
 #define   FBC_CTL_PERIODIC	(1<<30)
 #define   FBC_CTL_INTERVAL_SHIFT (16)
@@ -2028,14 +2088,14 @@
 #define   FBC_CTL_C3_IDLE	(1<<13)
 #define   FBC_CTL_STRIDE_SHIFT	(5)
 #define   FBC_CTL_FENCENO_SHIFT	(0)
-#define FBC_COMMAND		0x0320c
+#define FBC_COMMAND		_MMIO(0x320c)
 #define   FBC_CMD_COMPRESS	(1<<0)
-#define FBC_STATUS		0x03210
+#define FBC_STATUS		_MMIO(0x3210)
 #define   FBC_STAT_COMPRESSING	(1<<31)
 #define   FBC_STAT_COMPRESSED	(1<<30)
 #define   FBC_STAT_MODIFIED	(1<<29)
 #define   FBC_STAT_CURRENT_LINE_SHIFT	(0)
-#define FBC_CONTROL2		0x03214
+#define FBC_CONTROL2		_MMIO(0x3214)
 #define   FBC_CTL_FENCE_DBL	(0<<4)
 #define   FBC_CTL_IDLE_IMM	(0<<2)
 #define   FBC_CTL_IDLE_FULL	(1<<2)
@@ -2043,17 +2103,17 @@
 #define   FBC_CTL_IDLE_DEBUG	(3<<2)
 #define   FBC_CTL_CPU_FENCE	(1<<1)
 #define   FBC_CTL_PLANE(plane)	((plane)<<0)
-#define FBC_FENCE_OFF		0x03218 /* BSpec typo has 321Bh */
-#define FBC_TAG(i)		(0x03300 + (i) * 4)
+#define FBC_FENCE_OFF		_MMIO(0x3218) /* BSpec typo has 321Bh */
+#define FBC_TAG(i)		_MMIO(0x3300 + (i) * 4)
 
-#define FBC_STATUS2		0x43214
+#define FBC_STATUS2		_MMIO(0x43214)
 #define  FBC_COMPRESSION_MASK	0x7ff
 
 #define FBC_LL_SIZE		(1536)
 
 /* Framebuffer compression for GM45+ */
-#define DPFC_CB_BASE		0x3200
-#define DPFC_CONTROL		0x3208
+#define DPFC_CB_BASE		_MMIO(0x3200)
+#define DPFC_CONTROL		_MMIO(0x3208)
 #define   DPFC_CTL_EN		(1<<31)
 #define   DPFC_CTL_PLANE(plane)	((plane)<<30)
 #define   IVB_DPFC_CTL_PLANE(plane)	((plane)<<29)
@@ -2064,37 +2124,37 @@
 #define   DPFC_CTL_LIMIT_1X	(0<<6)
 #define   DPFC_CTL_LIMIT_2X	(1<<6)
 #define   DPFC_CTL_LIMIT_4X	(2<<6)
-#define DPFC_RECOMP_CTL		0x320c
+#define DPFC_RECOMP_CTL		_MMIO(0x320c)
 #define   DPFC_RECOMP_STALL_EN	(1<<27)
 #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
 #define   DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
 #define   DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
 #define   DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
-#define DPFC_STATUS		0x3210
+#define DPFC_STATUS		_MMIO(0x3210)
 #define   DPFC_INVAL_SEG_SHIFT  (16)
 #define   DPFC_INVAL_SEG_MASK	(0x07ff0000)
 #define   DPFC_COMP_SEG_SHIFT	(0)
 #define   DPFC_COMP_SEG_MASK	(0x000003ff)
-#define DPFC_STATUS2		0x3214
-#define DPFC_FENCE_YOFF		0x3218
-#define DPFC_CHICKEN		0x3224
+#define DPFC_STATUS2		_MMIO(0x3214)
+#define DPFC_FENCE_YOFF		_MMIO(0x3218)
+#define DPFC_CHICKEN		_MMIO(0x3224)
 #define   DPFC_HT_MODIFY	(1<<31)
 
 /* Framebuffer compression for Ironlake */
-#define ILK_DPFC_CB_BASE	0x43200
-#define ILK_DPFC_CONTROL	0x43208
+#define ILK_DPFC_CB_BASE	_MMIO(0x43200)
+#define ILK_DPFC_CONTROL	_MMIO(0x43208)
 #define   FBC_CTL_FALSE_COLOR	(1<<10)
 /* The bit 28-8 is reserved */
 #define   DPFC_RESERVED		(0x1FFFFF00)
-#define ILK_DPFC_RECOMP_CTL	0x4320c
-#define ILK_DPFC_STATUS		0x43210
-#define ILK_DPFC_FENCE_YOFF	0x43218
-#define ILK_DPFC_CHICKEN	0x43224
-#define ILK_FBC_RT_BASE		0x2128
+#define ILK_DPFC_RECOMP_CTL	_MMIO(0x4320c)
+#define ILK_DPFC_STATUS		_MMIO(0x43210)
+#define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
+#define ILK_DPFC_CHICKEN	_MMIO(0x43224)
+#define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1<<0)
 #define   SNB_FBC_FRONT_BUFFER	(1<<1)
 
-#define ILK_DISPLAY_CHICKEN1	0x42000
+#define ILK_DISPLAY_CHICKEN1	_MMIO(0x42000)
 #define   ILK_FBCQ_DIS		(1<<22)
 #define	  ILK_PABSTRETCH_DIS	(1<<21)
 
@@ -2104,31 +2164,31 @@
  *
  * The following two registers are of type GTTMMADR
  */
-#define SNB_DPFC_CTL_SA		0x100100
+#define SNB_DPFC_CTL_SA		_MMIO(0x100100)
 #define   SNB_CPU_FENCE_ENABLE	(1<<29)
-#define DPFC_CPU_FENCE_OFFSET	0x100104
+#define DPFC_CPU_FENCE_OFFSET	_MMIO(0x100104)
 
 /* Framebuffer compression for Ivybridge */
-#define IVB_FBC_RT_BASE			0x7020
+#define IVB_FBC_RT_BASE			_MMIO(0x7020)
 
-#define IPS_CTL		0x43408
+#define IPS_CTL		_MMIO(0x43408)
 #define   IPS_ENABLE	(1 << 31)
 
-#define MSG_FBC_REND_STATE	0x50380
+#define MSG_FBC_REND_STATE	_MMIO(0x50380)
 #define   FBC_REND_NUKE		(1<<2)
 #define   FBC_REND_CACHE_CLEAN	(1<<1)
 
 /*
  * GPIO regs
  */
-#define GPIOA			0x5010
-#define GPIOB			0x5014
-#define GPIOC			0x5018
-#define GPIOD			0x501c
-#define GPIOE			0x5020
-#define GPIOF			0x5024
-#define GPIOG			0x5028
-#define GPIOH			0x502c
+#define GPIOA			_MMIO(0x5010)
+#define GPIOB			_MMIO(0x5014)
+#define GPIOC			_MMIO(0x5018)
+#define GPIOD			_MMIO(0x501c)
+#define GPIOE			_MMIO(0x5020)
+#define GPIOF			_MMIO(0x5024)
+#define GPIOG			_MMIO(0x5028)
+#define GPIOH			_MMIO(0x502c)
 # define GPIO_CLOCK_DIR_MASK		(1 << 0)
 # define GPIO_CLOCK_DIR_IN		(0 << 1)
 # define GPIO_CLOCK_DIR_OUT		(1 << 1)
@@ -2144,7 +2204,7 @@
 # define GPIO_DATA_VAL_IN		(1 << 12)
 # define GPIO_DATA_PULLUP_DISABLE	(1 << 13)
 
-#define GMBUS0			(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */
+#define GMBUS0			_MMIO(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */
 #define   GMBUS_RATE_100KHZ	(0<<8)
 #define   GMBUS_RATE_50KHZ	(1<<8)
 #define   GMBUS_RATE_400KHZ	(2<<8) /* reserved on Pineview */
@@ -2163,7 +2223,7 @@
 #define   GMBUS_PIN_2_BXT	2
 #define   GMBUS_PIN_3_BXT	3
 #define   GMBUS_NUM_PINS	7 /* including 0 */
-#define GMBUS1			(dev_priv->gpio_mmio_base + 0x5104) /* command/status */
+#define GMBUS1			_MMIO(dev_priv->gpio_mmio_base + 0x5104) /* command/status */
 #define   GMBUS_SW_CLR_INT	(1<<31)
 #define   GMBUS_SW_RDY		(1<<30)
 #define   GMBUS_ENT		(1<<29) /* enable timeout */
@@ -2177,7 +2237,7 @@
 #define   GMBUS_SLAVE_ADDR_SHIFT 1
 #define   GMBUS_SLAVE_READ	(1<<0)
 #define   GMBUS_SLAVE_WRITE	(0<<0)
-#define GMBUS2			(dev_priv->gpio_mmio_base + 0x5108) /* status */
+#define GMBUS2			_MMIO(dev_priv->gpio_mmio_base + 0x5108) /* status */
 #define   GMBUS_INUSE		(1<<15)
 #define   GMBUS_HW_WAIT_PHASE	(1<<14)
 #define   GMBUS_STALL_TIMEOUT	(1<<13)
@@ -2185,14 +2245,14 @@
 #define   GMBUS_HW_RDY		(1<<11)
 #define   GMBUS_SATOER		(1<<10)
 #define   GMBUS_ACTIVE		(1<<9)
-#define GMBUS3			(dev_priv->gpio_mmio_base + 0x510c) /* data buffer bytes 3-0 */
-#define GMBUS4			(dev_priv->gpio_mmio_base + 0x5110) /* interrupt mask (Pineview+) */
+#define GMBUS3			_MMIO(dev_priv->gpio_mmio_base + 0x510c) /* data buffer bytes 3-0 */
+#define GMBUS4			_MMIO(dev_priv->gpio_mmio_base + 0x5110) /* interrupt mask (Pineview+) */
 #define   GMBUS_SLAVE_TIMEOUT_EN (1<<4)
 #define   GMBUS_NAK_EN		(1<<3)
 #define   GMBUS_IDLE_EN		(1<<2)
 #define   GMBUS_HW_WAIT_EN	(1<<1)
 #define   GMBUS_HW_RDY_EN	(1<<0)
-#define GMBUS5			(dev_priv->gpio_mmio_base + 0x5120) /* byte index */
+#define GMBUS5			_MMIO(dev_priv->gpio_mmio_base + 0x5120) /* byte index */
 #define   GMBUS_2BYTE_INDEX_EN	(1<<31)
 
 /*
@@ -2201,11 +2261,11 @@
 #define _DPLL_A (dev_priv->info.display_mmio_offset + 0x6014)
 #define _DPLL_B (dev_priv->info.display_mmio_offset + 0x6018)
 #define _CHV_DPLL_C (dev_priv->info.display_mmio_offset + 0x6030)
-#define DPLL(pipe) _PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
+#define DPLL(pipe) _MMIO_PIPE3((pipe), _DPLL_A, _DPLL_B, _CHV_DPLL_C)
 
-#define VGA0	0x6000
-#define VGA1	0x6004
-#define VGA_PD	0x6010
+#define VGA0	_MMIO(0x6000)
+#define VGA1	_MMIO(0x6004)
+#define VGA_PD	_MMIO(0x6010)
 #define   VGA0_PD_P2_DIV_4	(1 << 7)
 #define   VGA0_PD_P1_DIV_2	(1 << 5)
 #define   VGA0_PD_P1_SHIFT	0
@@ -2241,9 +2301,9 @@
 #define   DPLL_FPA01_P1_POST_DIV_MASK_I830	0x001f0000
 
 /* Additional CHV pll/phy registers */
-#define DPIO_PHY_STATUS			(VLV_DISPLAY_BASE + 0x6240)
+#define DPIO_PHY_STATUS			_MMIO(VLV_DISPLAY_BASE + 0x6240)
 #define   DPLL_PORTD_READY_MASK		(0xf)
-#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
+#define DISPLAY_PHY_CONTROL _MMIO(VLV_DISPLAY_BASE + 0x60100)
 #define   PHY_CH_POWER_DOWN_OVRD_EN(phy, ch)	(1 << (2*(phy)+(ch)+27))
 #define   PHY_LDO_DELAY_0NS			0x0
 #define   PHY_LDO_DELAY_200NS			0x1
@@ -2254,7 +2314,7 @@
 #define   PHY_CH_DEEP_PSR			0x7
 #define   PHY_CH_POWER_MODE(mode, phy, ch)	((mode) << (6*(phy)+3*(ch)+2))
 #define   PHY_COM_LANE_RESET_DEASSERT(phy)	(1 << (phy))
-#define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104)
+#define DISPLAY_PHY_STATUS _MMIO(VLV_DISPLAY_BASE + 0x60104)
 #define   PHY_POWERGOOD(phy)	(((phy) == DPIO_PHY0) ? (1<<31) : (1<<30))
 #define   PHY_STATUS_CMN_LDO(phy, ch)                   (1 << (6-(6*(phy)+3*(ch))))
 #define   PHY_STATUS_SPLINE_LDO(phy, ch, spline)        (1 << (8-(6*(phy)+3*(ch)+(spline))))
@@ -2300,7 +2360,7 @@
 #define _DPLL_A_MD (dev_priv->info.display_mmio_offset + 0x601c)
 #define _DPLL_B_MD (dev_priv->info.display_mmio_offset + 0x6020)
 #define _CHV_DPLL_C_MD (dev_priv->info.display_mmio_offset + 0x603c)
-#define DPLL_MD(pipe) _PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
+#define DPLL_MD(pipe) _MMIO_PIPE3((pipe), _DPLL_A_MD, _DPLL_B_MD, _CHV_DPLL_C_MD)
 
 /*
  * UDI pixel divider, controlling how many pixels are stuffed into a packet.
@@ -2339,12 +2399,12 @@
 #define   DPLL_MD_VGA_UDI_MULTIPLIER_MASK	0x0000003f
 #define   DPLL_MD_VGA_UDI_MULTIPLIER_SHIFT	0
 
-#define _FPA0	0x06040
-#define _FPA1	0x06044
-#define _FPB0	0x06048
-#define _FPB1	0x0604c
-#define FP0(pipe) _PIPE(pipe, _FPA0, _FPB0)
-#define FP1(pipe) _PIPE(pipe, _FPA1, _FPB1)
+#define _FPA0	0x6040
+#define _FPA1	0x6044
+#define _FPB0	0x6048
+#define _FPB1	0x604c
+#define FP0(pipe) _MMIO_PIPE(pipe, _FPA0, _FPB0)
+#define FP1(pipe) _MMIO_PIPE(pipe, _FPA1, _FPB1)
 #define   FP_N_DIV_MASK		0x003f0000
 #define   FP_N_PINEVIEW_DIV_MASK	0x00ff0000
 #define   FP_N_DIV_SHIFT		16
@@ -2353,7 +2413,7 @@
 #define   FP_M2_DIV_MASK	0x0000003f
 #define   FP_M2_PINEVIEW_DIV_MASK	0x000000ff
 #define   FP_M2_DIV_SHIFT		 0
-#define DPLL_TEST	0x606c
+#define DPLL_TEST	_MMIO(0x606c)
 #define   DPLLB_TEST_SDVO_DIV_1		(0 << 22)
 #define   DPLLB_TEST_SDVO_DIV_2		(1 << 22)
 #define   DPLLB_TEST_SDVO_DIV_4		(2 << 22)
@@ -2364,12 +2424,12 @@
 #define   DPLLA_TEST_N_BYPASS		(1 << 3)
 #define   DPLLA_TEST_M_BYPASS		(1 << 2)
 #define   DPLLA_INPUT_BUFFER_ENABLE	(1 << 0)
-#define D_STATE		0x6104
+#define D_STATE		_MMIO(0x6104)
 #define  DSTATE_GFX_RESET_I830			(1<<6)
 #define  DSTATE_PLL_D3_OFF			(1<<3)
 #define  DSTATE_GFX_CLOCK_GATING		(1<<1)
 #define  DSTATE_DOT_CLOCK_GATING		(1<<0)
-#define DSPCLK_GATE_D	(dev_priv->info.display_mmio_offset + 0x6200)
+#define DSPCLK_GATE_D	_MMIO(dev_priv->info.display_mmio_offset + 0x6200)
 # define DPUNIT_B_CLOCK_GATE_DISABLE		(1 << 30) /* 965 */
 # define VSUNIT_CLOCK_GATE_DISABLE		(1 << 29) /* 965 */
 # define VRHUNIT_CLOCK_GATE_DISABLE		(1 << 28) /* 965 */
@@ -2408,7 +2468,7 @@
 # define ZVUNIT_CLOCK_GATE_DISABLE		(1 << 0) /* 830 */
 # define OVLUNIT_CLOCK_GATE_DISABLE		(1 << 0) /* 845,865 */
 
-#define RENCLK_GATE_D1		0x6204
+#define RENCLK_GATE_D1		_MMIO(0x6204)
 # define BLITTER_CLOCK_GATE_DISABLE		(1 << 13) /* 945GM only */
 # define MPEG_CLOCK_GATE_DISABLE		(1 << 12) /* 945GM only */
 # define PC_FE_CLOCK_GATE_DISABLE		(1 << 11)
@@ -2472,35 +2532,35 @@
 # define I965_FT_CLOCK_GATE_DISABLE		(1 << 1)
 # define I965_DM_CLOCK_GATE_DISABLE		(1 << 0)
 
-#define RENCLK_GATE_D2		0x6208
+#define RENCLK_GATE_D2		_MMIO(0x6208)
 #define VF_UNIT_CLOCK_GATE_DISABLE		(1 << 9)
 #define GS_UNIT_CLOCK_GATE_DISABLE		(1 << 7)
 #define CL_UNIT_CLOCK_GATE_DISABLE		(1 << 6)
 
-#define VDECCLK_GATE_D		0x620C		/* g4x only */
+#define VDECCLK_GATE_D		_MMIO(0x620C)		/* g4x only */
 #define  VCP_UNIT_CLOCK_GATE_DISABLE		(1 << 4)
 
-#define RAMCLK_GATE_D		0x6210		/* CRL only */
-#define DEUC			0x6214          /* CRL only */
+#define RAMCLK_GATE_D		_MMIO(0x6210)		/* CRL only */
+#define DEUC			_MMIO(0x6214)          /* CRL only */
 
-#define FW_BLC_SELF_VLV		(VLV_DISPLAY_BASE + 0x6500)
+#define FW_BLC_SELF_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6500)
 #define  FW_CSPWRDWNEN		(1<<15)
 
-#define MI_ARB_VLV		(VLV_DISPLAY_BASE + 0x6504)
+#define MI_ARB_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6504)
 
-#define CZCLK_CDCLK_FREQ_RATIO	(VLV_DISPLAY_BASE + 0x6508)
+#define CZCLK_CDCLK_FREQ_RATIO	_MMIO(VLV_DISPLAY_BASE + 0x6508)
 #define   CDCLK_FREQ_SHIFT	4
 #define   CDCLK_FREQ_MASK	(0x1f << CDCLK_FREQ_SHIFT)
 #define   CZCLK_FREQ_MASK	0xf
 
-#define GCI_CONTROL		(VLV_DISPLAY_BASE + 0x650C)
+#define GCI_CONTROL		_MMIO(VLV_DISPLAY_BASE + 0x650C)
 #define   PFI_CREDIT_63		(9 << 28)		/* chv only */
 #define   PFI_CREDIT_31		(8 << 28)		/* chv only */
 #define   PFI_CREDIT(x)		(((x) - 8) << 28)	/* 8-15 */
 #define   PFI_CREDIT_RESEND	(1 << 27)
 #define   VGA_FAST_MODE_DISABLE	(1 << 14)
 
-#define GMBUSFREQ_VLV		(VLV_DISPLAY_BASE + 0x6510)
+#define GMBUSFREQ_VLV		_MMIO(VLV_DISPLAY_BASE + 0x6510)
 
 /*
  * Palette regs
@@ -2508,8 +2568,8 @@
 #define PALETTE_A_OFFSET 0xa000
 #define PALETTE_B_OFFSET 0xa800
 #define CHV_PALETTE_C_OFFSET 0xc000
-#define PALETTE(pipe, i) (dev_priv->info.palette_offsets[pipe] + \
-			  dev_priv->info.display_mmio_offset + (i) * 4)
+#define PALETTE(pipe, i) _MMIO(dev_priv->info.palette_offsets[pipe] +	\
+			      dev_priv->info.display_mmio_offset + (i) * 4)
 
 /* MCH MMIO space */
 
@@ -2527,37 +2587,37 @@
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
-#define CTG_STOLEN_RESERVED		(MCHBAR_MIRROR_BASE + 0x34)
-#define ELK_STOLEN_RESERVED		(MCHBAR_MIRROR_BASE + 0x48)
+#define CTG_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x34)
+#define ELK_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x48)
 #define G4X_STOLEN_RESERVED_ADDR1_MASK	(0xFFFF << 16)
 #define G4X_STOLEN_RESERVED_ADDR2_MASK	(0xFFF << 4)
 
 /* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
-#define DCLK (MCHBAR_MIRROR_BASE_SNB + 0x5e04)
+#define DCLK _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04)
 
 /* 915-945 and GM965 MCH register controlling DRAM channel access */
-#define DCC			0x10200
+#define DCC			_MMIO(MCHBAR_MIRROR_BASE + 0x200)
 #define DCC_ADDRESSING_MODE_SINGLE_CHANNEL		(0 << 0)
 #define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC	(1 << 0)
 #define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED	(2 << 0)
 #define DCC_ADDRESSING_MODE_MASK			(3 << 0)
 #define DCC_CHANNEL_XOR_DISABLE				(1 << 10)
 #define DCC_CHANNEL_XOR_BIT_17				(1 << 9)
-#define DCC2			0x10204
+#define DCC2			_MMIO(MCHBAR_MIRROR_BASE + 0x204)
 #define DCC2_MODIFIED_ENHANCED_DISABLE			(1 << 20)
 
 /* Pineview MCH register contains DDR3 setting */
-#define CSHRDDR3CTL            0x101a8
+#define CSHRDDR3CTL            _MMIO(MCHBAR_MIRROR_BASE + 0x1a8)
 #define CSHRDDR3CTL_DDR3       (1 << 2)
 
 /* 965 MCH register controlling DRAM channel configuration */
-#define C0DRB3			0x10206
-#define C1DRB3			0x10606
+#define C0DRB3			_MMIO(MCHBAR_MIRROR_BASE + 0x206)
+#define C1DRB3			_MMIO(MCHBAR_MIRROR_BASE + 0x606)
 
 /* snb MCH registers for reading the DRAM channel configuration */
-#define MAD_DIMM_C0			(MCHBAR_MIRROR_BASE_SNB + 0x5004)
-#define MAD_DIMM_C1			(MCHBAR_MIRROR_BASE_SNB + 0x5008)
-#define MAD_DIMM_C2			(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define MAD_DIMM_C0			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define MAD_DIMM_C1			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define MAD_DIMM_C2			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
 #define   MAD_DIMM_ECC_MASK		(0x3 << 24)
 #define   MAD_DIMM_ECC_OFF		(0x0 << 24)
 #define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
@@ -2577,14 +2637,14 @@
 #define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
 
 /* snb MCH registers for priority tuning */
-#define MCH_SSKPD			(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
+#define MCH_SSKPD			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
 #define   MCH_SSKPD_WM0_MASK		0x3f
 #define   MCH_SSKPD_WM0_VAL		0xc
 
-#define MCH_SECP_NRG_STTS		(MCHBAR_MIRROR_BASE_SNB + 0x592c)
+#define MCH_SECP_NRG_STTS		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x592c)
 
 /* Clocking configuration register */
-#define CLKCFG			0x10c00
+#define CLKCFG			_MMIO(MCHBAR_MIRROR_BASE + 0xc00)
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
 #define CLKCFG_FSB_533					(1 << 0)	/* hrawclk 133 */
 #define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
@@ -2600,26 +2660,26 @@
 #define CLKCFG_MEM_800					(3 << 4)
 #define CLKCFG_MEM_MASK					(7 << 4)
 
-#define HPLLVCO                 (MCHBAR_MIRROR_BASE + 0xc38)
-#define HPLLVCO_MOBILE          (MCHBAR_MIRROR_BASE + 0xc0f)
+#define HPLLVCO                 _MMIO(MCHBAR_MIRROR_BASE + 0xc38)
+#define HPLLVCO_MOBILE          _MMIO(MCHBAR_MIRROR_BASE + 0xc0f)
 
-#define TSC1			0x11001
+#define TSC1			_MMIO(0x11001)
 #define   TSE			(1<<0)
-#define TR1			0x11006
-#define TSFS			0x11020
+#define TR1			_MMIO(0x11006)
+#define TSFS			_MMIO(0x11020)
 #define   TSFS_SLOPE_MASK	0x0000ff00
 #define   TSFS_SLOPE_SHIFT	8
 #define   TSFS_INTR_MASK	0x000000ff
 
-#define CRSTANDVID		0x11100
-#define PXVFREQ(i)		(0x11110 + (i) * 4) /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
+#define CRSTANDVID		_MMIO(0x11100)
+#define PXVFREQ(fstart)		_MMIO(0x11110 + (fstart) * 4)  /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
 #define   PXVFREQ_PX_MASK	0x7f000000
 #define   PXVFREQ_PX_SHIFT	24
-#define VIDFREQ_BASE		0x11110
-#define VIDFREQ1		0x11110 /* VIDFREQ1-4 (0x1111c) (Cantiga) */
-#define VIDFREQ2		0x11114
-#define VIDFREQ3		0x11118
-#define VIDFREQ4		0x1111c
+#define VIDFREQ_BASE		_MMIO(0x11110)
+#define VIDFREQ1		_MMIO(0x11110) /* VIDFREQ1-4 (0x1111c) (Cantiga) */
+#define VIDFREQ2		_MMIO(0x11114)
+#define VIDFREQ3		_MMIO(0x11118)
+#define VIDFREQ4		_MMIO(0x1111c)
 #define   VIDFREQ_P0_MASK	0x1f000000
 #define   VIDFREQ_P0_SHIFT	24
 #define   VIDFREQ_P0_CSCLK_MASK	0x00f00000
@@ -2631,8 +2691,8 @@
 #define   VIDFREQ_P1_CSCLK_MASK	0x000000f0
 #define   VIDFREQ_P1_CSCLK_SHIFT 4
 #define   VIDFREQ_P1_CRCLK_MASK	0x0000000f
-#define INTTOEXT_BASE_ILK	0x11300
-#define INTTOEXT_BASE		0x11120 /* INTTOEXT1-8 (0x1113c) */
+#define INTTOEXT_BASE_ILK	_MMIO(0x11300)
+#define INTTOEXT_BASE		_MMIO(0x11120) /* INTTOEXT1-8 (0x1113c) */
 #define   INTTOEXT_MAP3_SHIFT	24
 #define   INTTOEXT_MAP3_MASK	(0x1f << INTTOEXT_MAP3_SHIFT)
 #define   INTTOEXT_MAP2_SHIFT	16
@@ -2641,7 +2701,7 @@
 #define   INTTOEXT_MAP1_MASK	(0x1f << INTTOEXT_MAP1_SHIFT)
 #define   INTTOEXT_MAP0_SHIFT	0
 #define   INTTOEXT_MAP0_MASK	(0x1f << INTTOEXT_MAP0_SHIFT)
-#define MEMSWCTL		0x11170 /* Ironlake only */
+#define MEMSWCTL		_MMIO(0x11170) /* Ironlake only */
 #define   MEMCTL_CMD_MASK	0xe000
 #define   MEMCTL_CMD_SHIFT	13
 #define   MEMCTL_CMD_RCLK_OFF	0
@@ -2656,8 +2716,8 @@
 #define   MEMCTL_FREQ_SHIFT	8
 #define   MEMCTL_SFCAVM		(1<<7)
 #define   MEMCTL_TGT_VID_MASK	0x007f
-#define MEMIHYST		0x1117c
-#define MEMINTREN		0x11180 /* 16 bits */
+#define MEMIHYST		_MMIO(0x1117c)
+#define MEMINTREN		_MMIO(0x11180) /* 16 bits */
 #define   MEMINT_RSEXIT_EN	(1<<8)
 #define   MEMINT_CX_SUPR_EN	(1<<7)
 #define   MEMINT_CONT_BUSY_EN	(1<<6)
@@ -2667,7 +2727,7 @@
 #define   MEMINT_UP_EVAL_EN	(1<<2)
 #define   MEMINT_DOWN_EVAL_EN	(1<<1)
 #define   MEMINT_SW_CMD_EN	(1<<0)
-#define MEMINTRSTR		0x11182 /* 16 bits */
+#define MEMINTRSTR		_MMIO(0x11182) /* 16 bits */
 #define   MEM_RSEXIT_MASK	0xc000
 #define   MEM_RSEXIT_SHIFT	14
 #define   MEM_CONT_BUSY_MASK	0x3000
@@ -2687,7 +2747,7 @@
 #define   MEM_INT_STEER_CMR	1
 #define   MEM_INT_STEER_SMI	2
 #define   MEM_INT_STEER_SCI	3
-#define MEMINTRSTS		0x11184
+#define MEMINTRSTS		_MMIO(0x11184)
 #define   MEMINT_RSEXIT		(1<<7)
 #define   MEMINT_CONT_BUSY	(1<<6)
 #define   MEMINT_AVG_BUSY	(1<<5)
@@ -2696,7 +2756,7 @@
 #define   MEMINT_UP_EVAL	(1<<2)
 #define   MEMINT_DOWN_EVAL	(1<<1)
 #define   MEMINT_SW_CMD		(1<<0)
-#define MEMMODECTL		0x11190
+#define MEMMODECTL		_MMIO(0x11190)
 #define   MEMMODE_BOOST_EN	(1<<31)
 #define   MEMMODE_BOOST_FREQ_MASK 0x0f000000 /* jitter for boost, 0-15 */
 #define   MEMMODE_BOOST_FREQ_SHIFT 24
@@ -2713,8 +2773,8 @@
 #define   MEMMODE_FMAX_MASK	0x000000f0 /* max jitter, 0-15 */
 #define   MEMMODE_FMAX_SHIFT	4
 #define   MEMMODE_FMIN_MASK	0x0000000f /* min jitter, 0-15 */
-#define RCBMAXAVG		0x1119c
-#define MEMSWCTL2		0x1119e /* Cantiga only */
+#define RCBMAXAVG		_MMIO(0x1119c)
+#define MEMSWCTL2		_MMIO(0x1119e) /* Cantiga only */
 #define   SWMEMCMD_RENDER_OFF	(0 << 13)
 #define   SWMEMCMD_RENDER_ON	(1 << 13)
 #define   SWMEMCMD_SWFREQ	(2 << 13)
@@ -2726,11 +2786,11 @@
 #define   SWFREQ_MASK		0x0380 /* P0-7 */
 #define   SWFREQ_SHIFT		7
 #define   TARVID_MASK		0x001f
-#define MEMSTAT_CTG		0x111a0
-#define RCBMINAVG		0x111a0
-#define RCUPEI			0x111b0
-#define RCDNEI			0x111b4
-#define RSTDBYCTL		0x111b8
+#define MEMSTAT_CTG		_MMIO(0x111a0)
+#define RCBMINAVG		_MMIO(0x111a0)
+#define RCUPEI			_MMIO(0x111b0)
+#define RCDNEI			_MMIO(0x111b4)
+#define RSTDBYCTL		_MMIO(0x111b8)
 #define   RS1EN			(1<<31)
 #define   RS2EN			(1<<30)
 #define   RS3EN			(1<<29)
@@ -2774,10 +2834,10 @@
 #define   RS_CSTATE_C367_RS2	(3<<4)
 #define   REDSAVES		(1<<3) /* no context save if was idle during rs0 */
 #define   REDRESTORES		(1<<2) /* no restore if was idle during rs0 */
-#define VIDCTL			0x111c0
-#define VIDSTS			0x111c8
-#define VIDSTART		0x111cc /* 8 bits */
-#define MEMSTAT_ILK			0x111f8
+#define VIDCTL			_MMIO(0x111c0)
+#define VIDSTS			_MMIO(0x111c8)
+#define VIDSTART		_MMIO(0x111cc) /* 8 bits */
+#define MEMSTAT_ILK		_MMIO(0x111f8)
 #define   MEMSTAT_VID_MASK	0x7f00
 #define   MEMSTAT_VID_SHIFT	8
 #define   MEMSTAT_PSTATE_MASK	0x00f8
@@ -2788,55 +2848,55 @@
 #define   MEMSTAT_SRC_CTL_TRB	1
 #define   MEMSTAT_SRC_CTL_THM	2
 #define   MEMSTAT_SRC_CTL_STDBY 3
-#define RCPREVBSYTUPAVG		0x113b8
-#define RCPREVBSYTDNAVG		0x113bc
-#define PMMISC			0x11214
+#define RCPREVBSYTUPAVG		_MMIO(0x113b8)
+#define RCPREVBSYTDNAVG		_MMIO(0x113bc)
+#define PMMISC			_MMIO(0x11214)
 #define   MCPPCE_EN		(1<<0) /* enable PM_MSG from PCH->MPC */
-#define SDEW			0x1124c
-#define CSIEW0			0x11250
-#define CSIEW1			0x11254
-#define CSIEW2			0x11258
-#define PEW(i)			(0x1125c + (i) * 4) /* 5 registers */
-#define DEW(i)			(0x11270 + (i) * 4) /* 3 registers */
-#define MCHAFE			0x112c0
-#define CSIEC			0x112e0
-#define DMIEC			0x112e4
-#define DDREC			0x112e8
-#define PEG0EC			0x112ec
-#define PEG1EC			0x112f0
-#define GFXEC			0x112f4
-#define RPPREVBSYTUPAVG		0x113b8
-#define RPPREVBSYTDNAVG		0x113bc
-#define ECR			0x11600
+#define SDEW			_MMIO(0x1124c)
+#define CSIEW0			_MMIO(0x11250)
+#define CSIEW1			_MMIO(0x11254)
+#define CSIEW2			_MMIO(0x11258)
+#define PEW(i)			_MMIO(0x1125c + (i) * 4) /* 5 registers */
+#define DEW(i)			_MMIO(0x11270 + (i) * 4) /* 3 registers */
+#define MCHAFE			_MMIO(0x112c0)
+#define CSIEC			_MMIO(0x112e0)
+#define DMIEC			_MMIO(0x112e4)
+#define DDREC			_MMIO(0x112e8)
+#define PEG0EC			_MMIO(0x112ec)
+#define PEG1EC			_MMIO(0x112f0)
+#define GFXEC			_MMIO(0x112f4)
+#define RPPREVBSYTUPAVG		_MMIO(0x113b8)
+#define RPPREVBSYTDNAVG		_MMIO(0x113bc)
+#define ECR			_MMIO(0x11600)
 #define   ECR_GPFE		(1<<31)
 #define   ECR_IMONE		(1<<30)
 #define   ECR_CAP_MASK		0x0000001f /* Event range, 0-31 */
-#define OGW0			0x11608
-#define OGW1			0x1160c
-#define EG0			0x11610
-#define EG1			0x11614
-#define EG2			0x11618
-#define EG3			0x1161c
-#define EG4			0x11620
-#define EG5			0x11624
-#define EG6			0x11628
-#define EG7			0x1162c
-#define PXW(i)			(0x11664 + (i) * 4) /* 4 registers */
-#define PXWL(i)			(0x11680 + (i) * 4) /* 8 registers */
-#define LCFUSE02		0x116c0
+#define OGW0			_MMIO(0x11608)
+#define OGW1			_MMIO(0x1160c)
+#define EG0			_MMIO(0x11610)
+#define EG1			_MMIO(0x11614)
+#define EG2			_MMIO(0x11618)
+#define EG3			_MMIO(0x1161c)
+#define EG4			_MMIO(0x11620)
+#define EG5			_MMIO(0x11624)
+#define EG6			_MMIO(0x11628)
+#define EG7			_MMIO(0x1162c)
+#define PXW(i)			_MMIO(0x11664 + (i) * 4) /* 4 registers */
+#define PXWL(i)			_MMIO(0x11680 + (i) * 8) /* 8 registers */
+#define LCFUSE02		_MMIO(0x116c0)
 #define   LCFUSE_HIV_MASK	0x000000ff
-#define CSIPLL0			0x12c10
-#define DDRMPLL1		0X12c20
-#define PEG_BAND_GAP_DATA	0x14d68
+#define CSIPLL0			_MMIO(0x12c10)
+#define DDRMPLL1		_MMIO(0X12c20)
+#define PEG_BAND_GAP_DATA	_MMIO(0x14d68)
 
-#define GEN6_GT_THREAD_STATUS_REG 0x13805c
+#define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c)
 #define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7
 
-#define GEN6_GT_PERF_STATUS	(MCHBAR_MIRROR_BASE_SNB + 0x5948)
-#define BXT_GT_PERF_STATUS      (MCHBAR_MIRROR_BASE_SNB + 0x7070)
-#define GEN6_RP_STATE_LIMITS	(MCHBAR_MIRROR_BASE_SNB + 0x5994)
-#define GEN6_RP_STATE_CAP	(MCHBAR_MIRROR_BASE_SNB + 0x5998)
-#define BXT_RP_STATE_CAP        0x138170
+#define GEN6_GT_PERF_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
+#define BXT_GT_PERF_STATUS      _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070)
+#define GEN6_RP_STATE_LIMITS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
+#define GEN6_RP_STATE_CAP	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define BXT_RP_STATE_CAP        _MMIO(0x138170)
 
 #define INTERVAL_1_28_US(us)	(((us) * 100) >> 7)
 #define INTERVAL_1_33_US(us)	(((us) * 3)   >> 2)
@@ -2850,7 +2910,7 @@
 /*
  * Logical Context regs
  */
-#define CCID			0x2180
+#define CCID			_MMIO(0x2180)
 #define   CCID_EN		(1<<0)
 /*
  * Notes on SNB/IVB/VLV context size:
@@ -2865,7 +2925,7 @@
  * - GT1 size just indicates how much of render context
  *   doesn't need saving on GT1
  */
-#define CXT_SIZE		0x21a0
+#define CXT_SIZE		_MMIO(0x21a0)
 #define GEN6_CXT_POWER_SIZE(cxt_reg)	(((cxt_reg) >> 24) & 0x3f)
 #define GEN6_CXT_RING_SIZE(cxt_reg)	(((cxt_reg) >> 18) & 0x3f)
 #define GEN6_CXT_RENDER_SIZE(cxt_reg)	(((cxt_reg) >> 12) & 0x3f)
@@ -2874,7 +2934,7 @@
 #define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_RING_SIZE(cxt_reg) + \
 					GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
 					GEN6_CXT_PIPELINE_SIZE(cxt_reg))
-#define GEN7_CXT_SIZE		0x21a8
+#define GEN7_CXT_SIZE		_MMIO(0x21a8)
 #define GEN7_CXT_POWER_SIZE(ctx_reg)	(((ctx_reg) >> 25) & 0x7f)
 #define GEN7_CXT_RING_SIZE(ctx_reg)	(((ctx_reg) >> 22) & 0x7)
 #define GEN7_CXT_RENDER_SIZE(ctx_reg)	(((ctx_reg) >> 16) & 0x3f)
@@ -2894,23 +2954,30 @@
 /* Same as Haswell, but 72064 bytes now. */
 #define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)
 
-#define CHV_CLK_CTL1			0x101100
-#define VLV_CLK_CTL2			0x101104
+#define CHV_CLK_CTL1			_MMIO(0x101100)
+#define VLV_CLK_CTL2			_MMIO(0x101104)
 #define   CLK_CTL2_CZCOUNT_30NS_SHIFT	28
 
 /*
  * Overlay regs
  */
 
-#define OVADD			0x30000
-#define DOVSTA			0x30008
+#define OVADD			_MMIO(0x30000)
+#define DOVSTA			_MMIO(0x30008)
 #define OC_BUF			(0x3<<20)
-#define OGAMC5			0x30010
-#define OGAMC4			0x30014
-#define OGAMC3			0x30018
-#define OGAMC2			0x3001c
-#define OGAMC1			0x30020
-#define OGAMC0			0x30024
+#define OGAMC5			_MMIO(0x30010)
+#define OGAMC4			_MMIO(0x30014)
+#define OGAMC3			_MMIO(0x30018)
+#define OGAMC2			_MMIO(0x3001c)
+#define OGAMC1			_MMIO(0x30020)
+#define OGAMC0			_MMIO(0x30024)
+
+/*
+ * GEN9 clock gating regs
+ */
+#define GEN9_CLKGATE_DIS_0		_MMIO(0x46530)
+#define   PWM2_GATING_DIS		(1 << 14)
+#define   PWM1_GATING_DIS		(1 << 13)
 
 /*
  * Display engine regs
@@ -2970,28 +3037,18 @@
 #define _PIPE_CRC_RES_4_B_IVB		0x61070
 #define _PIPE_CRC_RES_5_B_IVB		0x61074
 
-#define PIPE_CRC_CTL(pipe) _TRANSCODER2(pipe, _PIPE_CRC_CTL_A)
-#define PIPE_CRC_RES_1_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_1_A_IVB)
-#define PIPE_CRC_RES_2_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_2_A_IVB)
-#define PIPE_CRC_RES_3_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_3_A_IVB)
-#define PIPE_CRC_RES_4_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_4_A_IVB)
-#define PIPE_CRC_RES_5_IVB(pipe)	\
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_5_A_IVB)
+#define PIPE_CRC_CTL(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_CTL_A)
+#define PIPE_CRC_RES_1_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_1_A_IVB)
+#define PIPE_CRC_RES_2_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_2_A_IVB)
+#define PIPE_CRC_RES_3_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_3_A_IVB)
+#define PIPE_CRC_RES_4_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_4_A_IVB)
+#define PIPE_CRC_RES_5_IVB(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_5_A_IVB)
 
-#define PIPE_CRC_RES_RED(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RED_A)
-#define PIPE_CRC_RES_GREEN(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_GREEN_A)
-#define PIPE_CRC_RES_BLUE(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_BLUE_A)
-#define PIPE_CRC_RES_RES1_I915(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RES1_A_I915)
-#define PIPE_CRC_RES_RES2_G4X(pipe) \
-	_TRANSCODER2(pipe, _PIPE_CRC_RES_RES2_A_G4X)
+#define PIPE_CRC_RES_RED(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RED_A)
+#define PIPE_CRC_RES_GREEN(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_GREEN_A)
+#define PIPE_CRC_RES_BLUE(pipe)		_MMIO_TRANS2(pipe, _PIPE_CRC_RES_BLUE_A)
+#define PIPE_CRC_RES_RES1_I915(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RES1_A_I915)
+#define PIPE_CRC_RES_RES2_G4X(pipe)	_MMIO_TRANS2(pipe, _PIPE_CRC_RES_RES2_A_G4X)
 
 /* Pipe A timing regs */
 #define _HTOTAL_A	0x60000
@@ -3023,20 +3080,20 @@
 #define CHV_TRANSCODER_C_OFFSET 0x63000
 #define TRANSCODER_EDP_OFFSET 0x6f000
 
-#define _TRANSCODER2(pipe, reg) (dev_priv->info.trans_offsets[(pipe)] - \
+#define _MMIO_TRANS2(pipe, reg) _MMIO(dev_priv->info.trans_offsets[(pipe)] - \
 	dev_priv->info.trans_offsets[TRANSCODER_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
-#define HTOTAL(trans) _TRANSCODER2(trans, _HTOTAL_A)
-#define HBLANK(trans) _TRANSCODER2(trans, _HBLANK_A)
-#define HSYNC(trans) _TRANSCODER2(trans, _HSYNC_A)
-#define VTOTAL(trans) _TRANSCODER2(trans, _VTOTAL_A)
-#define VBLANK(trans) _TRANSCODER2(trans, _VBLANK_A)
-#define VSYNC(trans) _TRANSCODER2(trans, _VSYNC_A)
-#define BCLRPAT(trans) _TRANSCODER2(trans, _BCLRPAT_A)
-#define VSYNCSHIFT(trans) _TRANSCODER2(trans, _VSYNCSHIFT_A)
-#define PIPESRC(trans) _TRANSCODER2(trans, _PIPEASRC)
-#define PIPE_MULT(trans) _TRANSCODER2(trans, _PIPE_MULT_A)
+#define HTOTAL(trans)		_MMIO_TRANS2(trans, _HTOTAL_A)
+#define HBLANK(trans)		_MMIO_TRANS2(trans, _HBLANK_A)
+#define HSYNC(trans)		_MMIO_TRANS2(trans, _HSYNC_A)
+#define VTOTAL(trans)		_MMIO_TRANS2(trans, _VTOTAL_A)
+#define VBLANK(trans)		_MMIO_TRANS2(trans, _VBLANK_A)
+#define VSYNC(trans)		_MMIO_TRANS2(trans, _VSYNC_A)
+#define BCLRPAT(trans)		_MMIO_TRANS2(trans, _BCLRPAT_A)
+#define VSYNCSHIFT(trans)	_MMIO_TRANS2(trans, _VSYNCSHIFT_A)
+#define PIPESRC(trans)		_MMIO_TRANS2(trans, _PIPEASRC)
+#define PIPE_MULT(trans)	_MMIO_TRANS2(trans, _PIPE_MULT_A)
 
 /* VLV eDP PSR registers */
 #define _PSRCTLA				(VLV_DISPLAY_BASE + 0x60090)
@@ -3052,14 +3109,14 @@
 #define  VLV_EDP_PSR_DBL_FRAME			(1<<10)
 #define  VLV_EDP_PSR_FRAME_COUNT_MASK		(0xff<<16)
 #define  VLV_EDP_PSR_IDLE_FRAME_SHIFT		16
-#define VLV_PSRCTL(pipe) _PIPE(pipe, _PSRCTLA, _PSRCTLB)
+#define VLV_PSRCTL(pipe)	_MMIO_PIPE(pipe, _PSRCTLA, _PSRCTLB)
 
 #define _VSCSDPA			(VLV_DISPLAY_BASE + 0x600a0)
 #define _VSCSDPB			(VLV_DISPLAY_BASE + 0x610a0)
 #define  VLV_EDP_PSR_SDP_FREQ_MASK	(3<<30)
 #define  VLV_EDP_PSR_SDP_FREQ_ONCE	(1<<31)
 #define  VLV_EDP_PSR_SDP_FREQ_EVFRAME	(1<<30)
-#define VLV_VSCSDP(pipe)	_PIPE(pipe, _VSCSDPA, _VSCSDPB)
+#define VLV_VSCSDP(pipe)	_MMIO_PIPE(pipe, _VSCSDPA, _VSCSDPB)
 
 #define _PSRSTATA			(VLV_DISPLAY_BASE + 0x60094)
 #define _PSRSTATB			(VLV_DISPLAY_BASE + 0x61094)
@@ -3072,11 +3129,12 @@
 #define  VLV_EDP_PSR_ACTIVE_SF_UPDATE	(4<<0)
 #define  VLV_EDP_PSR_EXIT		(5<<0)
 #define  VLV_EDP_PSR_IN_TRANS		(1<<7)
-#define VLV_PSRSTAT(pipe) _PIPE(pipe, _PSRSTATA, _PSRSTATB)
+#define VLV_PSRSTAT(pipe)	_MMIO_PIPE(pipe, _PSRSTATA, _PSRSTATB)
 
 /* HSW+ eDP PSR registers */
-#define EDP_PSR_BASE(dev)                       (IS_HASWELL(dev) ? 0x64800 : 0x6f800)
-#define EDP_PSR_CTL(dev)			(EDP_PSR_BASE(dev) + 0)
+#define HSW_EDP_PSR_BASE	0x64800
+#define BDW_EDP_PSR_BASE	0x6f800
+#define EDP_PSR_CTL				_MMIO(dev_priv->psr_mmio_base + 0)
 #define   EDP_PSR_ENABLE			(1<<31)
 #define   BDW_PSR_SINGLE_FRAME			(1<<30)
 #define   EDP_PSR_LINK_STANDBY			(1<<27)
@@ -3099,14 +3157,10 @@
 #define   EDP_PSR_TP1_TIME_0us			(3<<4)
 #define   EDP_PSR_IDLE_FRAME_SHIFT		0
 
-#define EDP_PSR_AUX_CTL(dev)			(EDP_PSR_BASE(dev) + 0x10)
-#define EDP_PSR_AUX_DATA1(dev)			(EDP_PSR_BASE(dev) + 0x14)
-#define EDP_PSR_AUX_DATA2(dev)			(EDP_PSR_BASE(dev) + 0x18)
-#define EDP_PSR_AUX_DATA3(dev)			(EDP_PSR_BASE(dev) + 0x1c)
-#define EDP_PSR_AUX_DATA4(dev)			(EDP_PSR_BASE(dev) + 0x20)
-#define EDP_PSR_AUX_DATA5(dev)			(EDP_PSR_BASE(dev) + 0x24)
+#define EDP_PSR_AUX_CTL				_MMIO(dev_priv->psr_mmio_base + 0x10)
+#define EDP_PSR_AUX_DATA(i)			_MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */
 
-#define EDP_PSR_STATUS_CTL(dev)			(EDP_PSR_BASE(dev) + 0x40)
+#define EDP_PSR_STATUS_CTL			_MMIO(dev_priv->psr_mmio_base + 0x40)
 #define   EDP_PSR_STATUS_STATE_MASK		(7<<29)
 #define   EDP_PSR_STATUS_STATE_IDLE		(0<<29)
 #define   EDP_PSR_STATUS_STATE_SRDONACK		(1<<29)
@@ -3130,15 +3184,15 @@
 #define   EDP_PSR_STATUS_SENDING_TP1		(1<<4)
 #define   EDP_PSR_STATUS_IDLE_MASK		0xf
 
-#define EDP_PSR_PERF_CNT(dev)		(EDP_PSR_BASE(dev) + 0x44)
+#define EDP_PSR_PERF_CNT		_MMIO(dev_priv->psr_mmio_base + 0x44)
 #define   EDP_PSR_PERF_CNT_MASK		0xffffff
 
-#define EDP_PSR_DEBUG_CTL(dev)		(EDP_PSR_BASE(dev) + 0x60)
+#define EDP_PSR_DEBUG_CTL		_MMIO(dev_priv->psr_mmio_base + 0x60)
 #define   EDP_PSR_DEBUG_MASK_LPSP	(1<<27)
 #define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
 #define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
 
-#define EDP_PSR2_CTL			0x6f900
+#define EDP_PSR2_CTL			_MMIO(0x6f900)
 #define   EDP_PSR2_ENABLE		(1<<31)
 #define   EDP_SU_TRACK_ENABLE		(1<<30)
 #define   EDP_MAX_SU_DISABLE_TIME(t)	((t)<<20)
@@ -3153,9 +3207,9 @@
 #define   EDP_PSR2_IDLE_MASK		0xf
 
 /* VGA port control */
-#define ADPA			0x61100
-#define PCH_ADPA                0xe1100
-#define VLV_ADPA		(VLV_DISPLAY_BASE + ADPA)
+#define ADPA			_MMIO(0x61100)
+#define PCH_ADPA                _MMIO(0xe1100)
+#define VLV_ADPA		_MMIO(VLV_DISPLAY_BASE + 0x61100)
 
 #define   ADPA_DAC_ENABLE	(1<<31)
 #define   ADPA_DAC_DISABLE	0
@@ -3201,7 +3255,7 @@
 
 
 /* Hotplug control (945+ only) */
-#define PORT_HOTPLUG_EN		(dev_priv->info.display_mmio_offset + 0x61110)
+#define PORT_HOTPLUG_EN		_MMIO(dev_priv->info.display_mmio_offset + 0x61110)
 #define   PORTB_HOTPLUG_INT_EN			(1 << 29)
 #define   PORTC_HOTPLUG_INT_EN			(1 << 28)
 #define   PORTD_HOTPLUG_INT_EN			(1 << 27)
@@ -3231,7 +3285,7 @@
 #define CRT_HOTPLUG_DETECT_VOLTAGE_325MV	(0 << 2)
 #define CRT_HOTPLUG_DETECT_VOLTAGE_475MV	(1 << 2)
 
-#define PORT_HOTPLUG_STAT	(dev_priv->info.display_mmio_offset + 0x61114)
+#define PORT_HOTPLUG_STAT	_MMIO(dev_priv->info.display_mmio_offset + 0x61114)
 /*
  * HDMI/DP bits are gen4+
  *
@@ -3296,21 +3350,23 @@
 
 /* SDVO and HDMI port control.
  * The same register may be used for SDVO or HDMI */
-#define GEN3_SDVOB	0x61140
-#define GEN3_SDVOC	0x61160
+#define _GEN3_SDVOB	0x61140
+#define _GEN3_SDVOC	0x61160
+#define GEN3_SDVOB	_MMIO(_GEN3_SDVOB)
+#define GEN3_SDVOC	_MMIO(_GEN3_SDVOC)
 #define GEN4_HDMIB	GEN3_SDVOB
 #define GEN4_HDMIC	GEN3_SDVOC
-#define VLV_HDMIB	(VLV_DISPLAY_BASE + GEN4_HDMIB)
-#define VLV_HDMIC	(VLV_DISPLAY_BASE + GEN4_HDMIC)
-#define CHV_HDMID	(VLV_DISPLAY_BASE + 0x6116C)
-#define PCH_SDVOB	0xe1140
+#define VLV_HDMIB	_MMIO(VLV_DISPLAY_BASE + 0x61140)
+#define VLV_HDMIC	_MMIO(VLV_DISPLAY_BASE + 0x61160)
+#define CHV_HDMID	_MMIO(VLV_DISPLAY_BASE + 0x6116C)
+#define PCH_SDVOB	_MMIO(0xe1140)
 #define PCH_HDMIB	PCH_SDVOB
-#define PCH_HDMIC	0xe1150
-#define PCH_HDMID	0xe1160
+#define PCH_HDMIC	_MMIO(0xe1150)
+#define PCH_HDMID	_MMIO(0xe1160)
 
-#define PORT_DFT_I9XX				0x61150
+#define PORT_DFT_I9XX				_MMIO(0x61150)
 #define   DC_BALANCE_RESET			(1 << 25)
-#define PORT_DFT2_G4X		(dev_priv->info.display_mmio_offset + 0x61154)
+#define PORT_DFT2_G4X		_MMIO(dev_priv->info.display_mmio_offset + 0x61154)
 #define   DC_BALANCE_RESET_VLV			(1 << 31)
 #define   PIPE_SCRAMBLE_RESET_MASK		((1 << 14) | (0x3 << 0))
 #define   PIPE_C_SCRAMBLE_RESET			(1 << 14) /* chv */
@@ -3370,9 +3426,12 @@
 
 
 /* DVO port control */
-#define DVOA			0x61120
-#define DVOB			0x61140
-#define DVOC			0x61160
+#define _DVOA			0x61120
+#define DVOA			_MMIO(_DVOA)
+#define _DVOB			0x61140
+#define DVOB			_MMIO(_DVOB)
+#define _DVOC			0x61160
+#define DVOC			_MMIO(_DVOC)
 #define   DVO_ENABLE			(1 << 31)
 #define   DVO_PIPE_B_SELECT		(1 << 30)
 #define   DVO_PIPE_STALL_UNUSED		(0 << 28)
@@ -3397,14 +3456,14 @@
 #define   DVO_OUTPUT_CSTATE_PIXELS	(1 << 1)	/* SDG only */
 #define   DVO_OUTPUT_SOURCE_SIZE_PIXELS	(1 << 0)	/* SDG only */
 #define   DVO_PRESERVE_MASK		(0x7<<24)
-#define DVOA_SRCDIM		0x61124
-#define DVOB_SRCDIM		0x61144
-#define DVOC_SRCDIM		0x61164
+#define DVOA_SRCDIM		_MMIO(0x61124)
+#define DVOB_SRCDIM		_MMIO(0x61144)
+#define DVOC_SRCDIM		_MMIO(0x61164)
 #define   DVO_SRCDIM_HORIZONTAL_SHIFT	12
 #define   DVO_SRCDIM_VERTICAL_SHIFT	0
 
 /* LVDS port control */
-#define LVDS			0x61180
+#define LVDS			_MMIO(0x61180)
 /*
  * Enables the LVDS port.  This bit must be set before DPLLs are enabled, as
  * the DPLL semantics change when the LVDS is assigned to that pipe.
@@ -3454,13 +3513,13 @@
 #define   LVDS_B0B3_POWER_UP		(3 << 2)
 
 /* Video Data Island Packet control */
-#define VIDEO_DIP_DATA		0x61178
+#define VIDEO_DIP_DATA		_MMIO(0x61178)
 /* Read the description of VIDEO_DIP_DATA (before Haswell) or VIDEO_DIP_ECC
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
 #define   VIDEO_DIP_VSC_DATA_SIZE	36
-#define VIDEO_DIP_CTL		0x61170
+#define VIDEO_DIP_CTL		_MMIO(0x61170)
 /* Pre HSW: */
 #define   VIDEO_DIP_ENABLE		(1 << 31)
 #define   VIDEO_DIP_PORT(port)		((port) << 29)
@@ -3487,7 +3546,7 @@
 #define   VIDEO_DIP_ENABLE_SPD_HSW	(1 << 0)
 
 /* Panel power sequencing */
-#define PP_STATUS	0x61200
+#define PP_STATUS	_MMIO(0x61200)
 #define   PP_ON		(1 << 31)
 /*
  * Indicates that all dependencies of the panel are on:
@@ -3513,14 +3572,14 @@
 #define   PP_SEQUENCE_STATE_ON_S1_2	(0xa << 0)
 #define   PP_SEQUENCE_STATE_ON_S1_3	(0xb << 0)
 #define   PP_SEQUENCE_STATE_RESET	(0xf << 0)
-#define PP_CONTROL	0x61204
+#define PP_CONTROL	_MMIO(0x61204)
 #define   POWER_TARGET_ON	(1 << 0)
-#define PP_ON_DELAYS	0x61208
-#define PP_OFF_DELAYS	0x6120c
-#define PP_DIVISOR	0x61210
+#define PP_ON_DELAYS	_MMIO(0x61208)
+#define PP_OFF_DELAYS	_MMIO(0x6120c)
+#define PP_DIVISOR	_MMIO(0x61210)
 
 /* Panel fitting */
-#define PFIT_CONTROL	(dev_priv->info.display_mmio_offset + 0x61230)
+#define PFIT_CONTROL	_MMIO(dev_priv->info.display_mmio_offset + 0x61230)
 #define   PFIT_ENABLE		(1 << 31)
 #define   PFIT_PIPE_MASK	(3 << 29)
 #define   PFIT_PIPE_SHIFT	29
@@ -3538,7 +3597,7 @@
 #define   PFIT_SCALING_PROGRAMMED (1 << 26)
 #define   PFIT_SCALING_PILLAR	(2 << 26)
 #define   PFIT_SCALING_LETTER	(3 << 26)
-#define PFIT_PGM_RATIOS	(dev_priv->info.display_mmio_offset + 0x61234)
+#define PFIT_PGM_RATIOS _MMIO(dev_priv->info.display_mmio_offset + 0x61234)
 /* Pre-965 */
 #define		PFIT_VERT_SCALE_SHIFT		20
 #define		PFIT_VERT_SCALE_MASK		0xfff00000
@@ -3550,25 +3609,25 @@
 #define		PFIT_HORIZ_SCALE_SHIFT_965	0
 #define		PFIT_HORIZ_SCALE_MASK_965	0x00001fff
 
-#define PFIT_AUTO_RATIOS (dev_priv->info.display_mmio_offset + 0x61238)
+#define PFIT_AUTO_RATIOS _MMIO(dev_priv->info.display_mmio_offset + 0x61238)
 
 #define _VLV_BLC_PWM_CTL2_A (dev_priv->info.display_mmio_offset + 0x61250)
 #define _VLV_BLC_PWM_CTL2_B (dev_priv->info.display_mmio_offset + 0x61350)
-#define VLV_BLC_PWM_CTL2(pipe) _PIPE(pipe, _VLV_BLC_PWM_CTL2_A, \
-				     _VLV_BLC_PWM_CTL2_B)
+#define VLV_BLC_PWM_CTL2(pipe) _MMIO_PIPE(pipe, _VLV_BLC_PWM_CTL2_A, \
+					 _VLV_BLC_PWM_CTL2_B)
 
 #define _VLV_BLC_PWM_CTL_A (dev_priv->info.display_mmio_offset + 0x61254)
 #define _VLV_BLC_PWM_CTL_B (dev_priv->info.display_mmio_offset + 0x61354)
-#define VLV_BLC_PWM_CTL(pipe) _PIPE(pipe, _VLV_BLC_PWM_CTL_A, \
-				    _VLV_BLC_PWM_CTL_B)
+#define VLV_BLC_PWM_CTL(pipe) _MMIO_PIPE(pipe, _VLV_BLC_PWM_CTL_A, \
+					_VLV_BLC_PWM_CTL_B)
 
 #define _VLV_BLC_HIST_CTL_A (dev_priv->info.display_mmio_offset + 0x61260)
 #define _VLV_BLC_HIST_CTL_B (dev_priv->info.display_mmio_offset + 0x61360)
-#define VLV_BLC_HIST_CTL(pipe) _PIPE(pipe, _VLV_BLC_HIST_CTL_A, \
-				     _VLV_BLC_HIST_CTL_B)
+#define VLV_BLC_HIST_CTL(pipe) _MMIO_PIPE(pipe, _VLV_BLC_HIST_CTL_A, \
+					 _VLV_BLC_HIST_CTL_B)
 
 /* Backlight control */
-#define BLC_PWM_CTL2	(dev_priv->info.display_mmio_offset + 0x61250) /* 965+ only */
+#define BLC_PWM_CTL2	_MMIO(dev_priv->info.display_mmio_offset + 0x61250) /* 965+ only */
 #define   BLM_PWM_ENABLE		(1 << 31)
 #define   BLM_COMBINATION_MODE		(1 << 30) /* gen4 only */
 #define   BLM_PIPE_SELECT		(1 << 29)
@@ -3591,7 +3650,7 @@
 #define   BLM_PHASE_IN_COUNT_MASK	(0xff << 8)
 #define   BLM_PHASE_IN_INCR_SHIFT	(0)
 #define   BLM_PHASE_IN_INCR_MASK	(0xff << 0)
-#define BLC_PWM_CTL	(dev_priv->info.display_mmio_offset + 0x61254)
+#define BLC_PWM_CTL	_MMIO(dev_priv->info.display_mmio_offset + 0x61254)
 /*
  * This is the most significant 15 bits of the number of backlight cycles in a
  * complete cycle of the modulated backlight control.
@@ -3613,25 +3672,25 @@
 #define   BACKLIGHT_DUTY_CYCLE_MASK_PNV		(0xfffe)
 #define   BLM_POLARITY_PNV			(1 << 0) /* pnv only */
 
-#define BLC_HIST_CTL	(dev_priv->info.display_mmio_offset + 0x61260)
+#define BLC_HIST_CTL	_MMIO(dev_priv->info.display_mmio_offset + 0x61260)
 #define  BLM_HISTOGRAM_ENABLE			(1 << 31)
 
 /* New registers for PCH-split platforms. Safe where new bits show up, the
  * register layout machtes with gen4 BLC_PWM_CTL[12]. */
-#define BLC_PWM_CPU_CTL2	0x48250
-#define BLC_PWM_CPU_CTL		0x48254
+#define BLC_PWM_CPU_CTL2	_MMIO(0x48250)
+#define BLC_PWM_CPU_CTL		_MMIO(0x48254)
 
-#define HSW_BLC_PWM2_CTL	0x48350
+#define HSW_BLC_PWM2_CTL	_MMIO(0x48350)
 
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
-#define BLC_PWM_PCH_CTL1	0xc8250
+#define BLC_PWM_PCH_CTL1	_MMIO(0xc8250)
 #define   BLM_PCH_PWM_ENABLE			(1 << 31)
 #define   BLM_PCH_OVERRIDE_ENABLE		(1 << 30)
 #define   BLM_PCH_POLARITY			(1 << 29)
-#define BLC_PWM_PCH_CTL2	0xc8254
+#define BLC_PWM_PCH_CTL2	_MMIO(0xc8254)
 
-#define UTIL_PIN_CTL		0x48400
+#define UTIL_PIN_CTL		_MMIO(0x48400)
 #define   UTIL_PIN_ENABLE	(1 << 31)
 
 #define   UTIL_PIN_PIPE(x)     ((x) << 29)
@@ -3651,18 +3710,18 @@
 #define _BXT_BLC_PWM_FREQ2			0xC8354
 #define _BXT_BLC_PWM_DUTY2			0xC8358
 
-#define BXT_BLC_PWM_CTL(controller)    _PIPE(controller, \
+#define BXT_BLC_PWM_CTL(controller)    _MMIO_PIPE(controller,		\
 					_BXT_BLC_PWM_CTL1, _BXT_BLC_PWM_CTL2)
-#define BXT_BLC_PWM_FREQ(controller)   _PIPE(controller, \
+#define BXT_BLC_PWM_FREQ(controller)   _MMIO_PIPE(controller, \
 					_BXT_BLC_PWM_FREQ1, _BXT_BLC_PWM_FREQ2)
-#define BXT_BLC_PWM_DUTY(controller)   _PIPE(controller, \
+#define BXT_BLC_PWM_DUTY(controller)   _MMIO_PIPE(controller, \
 					_BXT_BLC_PWM_DUTY1, _BXT_BLC_PWM_DUTY2)
 
-#define PCH_GTC_CTL		0xe7000
+#define PCH_GTC_CTL		_MMIO(0xe7000)
 #define   PCH_GTC_ENABLE	(1 << 31)
 
 /* TV port control */
-#define TV_CTL			0x68000
+#define TV_CTL			_MMIO(0x68000)
 /* Enables the TV encoder */
 # define TV_ENC_ENABLE			(1 << 31)
 /* Sources the TV encoder input from pipe B instead of A. */
@@ -3729,7 +3788,7 @@
 # define TV_TEST_MODE_MONITOR_DETECT	(7 << 0)
 # define TV_TEST_MODE_MASK		(7 << 0)
 
-#define TV_DAC			0x68004
+#define TV_DAC			_MMIO(0x68004)
 # define TV_DAC_SAVE		0x00ffff00
 /*
  * Reports that DAC state change logic has reported change (RO).
@@ -3780,13 +3839,13 @@
  * where 2-bit exponents are unsigned n, and 3-bit exponents are signed n with
  * -1 (0x3) being the only legal negative value.
  */
-#define TV_CSC_Y		0x68010
+#define TV_CSC_Y		_MMIO(0x68010)
 # define TV_RY_MASK			0x07ff0000
 # define TV_RY_SHIFT			16
 # define TV_GY_MASK			0x00000fff
 # define TV_GY_SHIFT			0
 
-#define TV_CSC_Y2		0x68014
+#define TV_CSC_Y2		_MMIO(0x68014)
 # define TV_BY_MASK			0x07ff0000
 # define TV_BY_SHIFT			16
 /*
@@ -3797,13 +3856,13 @@
 # define TV_AY_MASK			0x000003ff
 # define TV_AY_SHIFT			0
 
-#define TV_CSC_U		0x68018
+#define TV_CSC_U		_MMIO(0x68018)
 # define TV_RU_MASK			0x07ff0000
 # define TV_RU_SHIFT			16
 # define TV_GU_MASK			0x000007ff
 # define TV_GU_SHIFT			0
 
-#define TV_CSC_U2		0x6801c
+#define TV_CSC_U2		_MMIO(0x6801c)
 # define TV_BU_MASK			0x07ff0000
 # define TV_BU_SHIFT			16
 /*
@@ -3814,13 +3873,13 @@
 # define TV_AU_MASK			0x000003ff
 # define TV_AU_SHIFT			0
 
-#define TV_CSC_V		0x68020
+#define TV_CSC_V		_MMIO(0x68020)
 # define TV_RV_MASK			0x0fff0000
 # define TV_RV_SHIFT			16
 # define TV_GV_MASK			0x000007ff
 # define TV_GV_SHIFT			0
 
-#define TV_CSC_V2		0x68024
+#define TV_CSC_V2		_MMIO(0x68024)
 # define TV_BV_MASK			0x07ff0000
 # define TV_BV_SHIFT			16
 /*
@@ -3831,7 +3890,7 @@
 # define TV_AV_MASK			0x000007ff
 # define TV_AV_SHIFT			0
 
-#define TV_CLR_KNOBS		0x68028
+#define TV_CLR_KNOBS		_MMIO(0x68028)
 /* 2s-complement brightness adjustment */
 # define TV_BRIGHTNESS_MASK		0xff000000
 # define TV_BRIGHTNESS_SHIFT		24
@@ -3845,7 +3904,7 @@
 # define TV_HUE_MASK			0x000000ff
 # define TV_HUE_SHIFT			0
 
-#define TV_CLR_LEVEL		0x6802c
+#define TV_CLR_LEVEL		_MMIO(0x6802c)
 /* Controls the DAC level for black */
 # define TV_BLACK_LEVEL_MASK		0x01ff0000
 # define TV_BLACK_LEVEL_SHIFT		16
@@ -3853,7 +3912,7 @@
 # define TV_BLANK_LEVEL_MASK		0x000001ff
 # define TV_BLANK_LEVEL_SHIFT		0
 
-#define TV_H_CTL_1		0x68030
+#define TV_H_CTL_1		_MMIO(0x68030)
 /* Number of pixels in the hsync. */
 # define TV_HSYNC_END_MASK		0x1fff0000
 # define TV_HSYNC_END_SHIFT		16
@@ -3861,7 +3920,7 @@
 # define TV_HTOTAL_MASK			0x00001fff
 # define TV_HTOTAL_SHIFT		0
 
-#define TV_H_CTL_2		0x68034
+#define TV_H_CTL_2		_MMIO(0x68034)
 /* Enables the colorburst (needed for non-component color) */
 # define TV_BURST_ENA			(1 << 31)
 /* Offset of the colorburst from the start of hsync, in pixels minus one. */
@@ -3871,7 +3930,7 @@
 # define TV_HBURST_LEN_SHIFT		0
 # define TV_HBURST_LEN_MASK		0x0001fff
 
-#define TV_H_CTL_3		0x68038
+#define TV_H_CTL_3		_MMIO(0x68038)
 /* End of hblank, measured in pixels minus one from start of hsync */
 # define TV_HBLANK_END_SHIFT		16
 # define TV_HBLANK_END_MASK		0x1fff0000
@@ -3879,7 +3938,7 @@
 # define TV_HBLANK_START_SHIFT		0
 # define TV_HBLANK_START_MASK		0x0001fff
 
-#define TV_V_CTL_1		0x6803c
+#define TV_V_CTL_1		_MMIO(0x6803c)
 /* XXX */
 # define TV_NBR_END_SHIFT		16
 # define TV_NBR_END_MASK		0x07ff0000
@@ -3890,7 +3949,7 @@
 # define TV_VI_END_F2_SHIFT		0
 # define TV_VI_END_F2_MASK		0x0000003f
 
-#define TV_V_CTL_2		0x68040
+#define TV_V_CTL_2		_MMIO(0x68040)
 /* Length of vsync, in half lines */
 # define TV_VSYNC_LEN_MASK		0x07ff0000
 # define TV_VSYNC_LEN_SHIFT		16
@@ -3906,7 +3965,7 @@
 # define TV_VSYNC_START_F2_MASK		0x0000007f
 # define TV_VSYNC_START_F2_SHIFT	0
 
-#define TV_V_CTL_3		0x68044
+#define TV_V_CTL_3		_MMIO(0x68044)
 /* Enables generation of the equalization signal */
 # define TV_EQUAL_ENA			(1 << 31)
 /* Length of vsync, in half lines */
@@ -3924,7 +3983,7 @@
 # define TV_VEQ_START_F2_MASK		0x000007f
 # define TV_VEQ_START_F2_SHIFT		0
 
-#define TV_V_CTL_4		0x68048
+#define TV_V_CTL_4		_MMIO(0x68048)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3938,7 +3997,7 @@
 # define TV_VBURST_END_F1_MASK		0x000000ff
 # define TV_VBURST_END_F1_SHIFT		0
 
-#define TV_V_CTL_5		0x6804c
+#define TV_V_CTL_5		_MMIO(0x6804c)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3952,7 +4011,7 @@
 # define TV_VBURST_END_F2_MASK		0x000000ff
 # define TV_VBURST_END_F2_SHIFT		0
 
-#define TV_V_CTL_6		0x68050
+#define TV_V_CTL_6		_MMIO(0x68050)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3966,7 +4025,7 @@
 # define TV_VBURST_END_F3_MASK		0x000000ff
 # define TV_VBURST_END_F3_SHIFT		0
 
-#define TV_V_CTL_7		0x68054
+#define TV_V_CTL_7		_MMIO(0x68054)
 /*
  * Offset to start of vertical colorburst, measured in one less than the
  * number of lines from vertical start.
@@ -3980,7 +4039,7 @@
 # define TV_VBURST_END_F4_MASK		0x000000ff
 # define TV_VBURST_END_F4_SHIFT		0
 
-#define TV_SC_CTL_1		0x68060
+#define TV_SC_CTL_1		_MMIO(0x68060)
 /* Turns on the first subcarrier phase generation DDA */
 # define TV_SC_DDA1_EN			(1 << 31)
 /* Turns on the first subcarrier phase generation DDA */
@@ -4002,7 +4061,7 @@
 # define TV_SCDDA1_INC_MASK		0x00000fff
 # define TV_SCDDA1_INC_SHIFT		0
 
-#define TV_SC_CTL_2		0x68064
+#define TV_SC_CTL_2		_MMIO(0x68064)
 /* Sets the rollover for the second subcarrier phase generation DDA */
 # define TV_SCDDA2_SIZE_MASK		0x7fff0000
 # define TV_SCDDA2_SIZE_SHIFT		16
@@ -4010,7 +4069,7 @@
 # define TV_SCDDA2_INC_MASK		0x00007fff
 # define TV_SCDDA2_INC_SHIFT		0
 
-#define TV_SC_CTL_3		0x68068
+#define TV_SC_CTL_3		_MMIO(0x68068)
 /* Sets the rollover for the third subcarrier phase generation DDA */
 # define TV_SCDDA3_SIZE_MASK		0x7fff0000
 # define TV_SCDDA3_SIZE_SHIFT		16
@@ -4018,7 +4077,7 @@
 # define TV_SCDDA3_INC_MASK		0x00007fff
 # define TV_SCDDA3_INC_SHIFT		0
 
-#define TV_WIN_POS		0x68070
+#define TV_WIN_POS		_MMIO(0x68070)
 /* X coordinate of the display from the start of horizontal active */
 # define TV_XPOS_MASK			0x1fff0000
 # define TV_XPOS_SHIFT			16
@@ -4026,7 +4085,7 @@
 # define TV_YPOS_MASK			0x00000fff
 # define TV_YPOS_SHIFT			0
 
-#define TV_WIN_SIZE		0x68074
+#define TV_WIN_SIZE		_MMIO(0x68074)
 /* Horizontal size of the display window, measured in pixels*/
 # define TV_XSIZE_MASK			0x1fff0000
 # define TV_XSIZE_SHIFT			16
@@ -4038,7 +4097,7 @@
 # define TV_YSIZE_MASK			0x00000fff
 # define TV_YSIZE_SHIFT			0
 
-#define TV_FILTER_CTL_1		0x68080
+#define TV_FILTER_CTL_1		_MMIO(0x68080)
 /*
  * Enables automatic scaling calculation.
  *
@@ -4071,7 +4130,7 @@
 # define TV_HSCALE_FRAC_MASK		0x00003fff
 # define TV_HSCALE_FRAC_SHIFT		0
 
-#define TV_FILTER_CTL_2		0x68084
+#define TV_FILTER_CTL_2		_MMIO(0x68084)
 /*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
@@ -4087,7 +4146,7 @@
 # define TV_VSCALE_FRAC_MASK		0x00007fff
 # define TV_VSCALE_FRAC_SHIFT		0
 
-#define TV_FILTER_CTL_3		0x68088
+#define TV_FILTER_CTL_3		_MMIO(0x68088)
 /*
  * Sets the integer part of the 3.15 fixed-point vertical scaling factor.
  *
@@ -4107,7 +4166,7 @@
 # define TV_VSCALE_IP_FRAC_MASK		0x00007fff
 # define TV_VSCALE_IP_FRAC_SHIFT		0
 
-#define TV_CC_CONTROL		0x68090
+#define TV_CC_CONTROL		_MMIO(0x68090)
 # define TV_CC_ENABLE			(1 << 31)
 /*
  * Specifies which field to send the CC data in.
@@ -4123,7 +4182,7 @@
 # define TV_CC_LINE_MASK		0x0000003f
 # define TV_CC_LINE_SHIFT		0
 
-#define TV_CC_DATA		0x68094
+#define TV_CC_DATA		_MMIO(0x68094)
 # define TV_CC_RDY			(1 << 31)
 /* Second word of CC data to be transmitted. */
 # define TV_CC_DATA_2_MASK		0x007f0000
@@ -4132,20 +4191,20 @@
 # define TV_CC_DATA_1_MASK		0x0000007f
 # define TV_CC_DATA_1_SHIFT		0
 
-#define TV_H_LUMA(i)		(0x68100 + (i) * 4) /* 60 registers */
-#define TV_H_CHROMA(i)		(0x68200 + (i) * 4) /* 60 registers */
-#define TV_V_LUMA(i)		(0x68300 + (i) * 4) /* 43 registers */
-#define TV_V_CHROMA(i)		(0x68400 + (i) * 4) /* 43 registers */
+#define TV_H_LUMA(i)		_MMIO(0x68100 + (i) * 4) /* 60 registers */
+#define TV_H_CHROMA(i)		_MMIO(0x68200 + (i) * 4) /* 60 registers */
+#define TV_V_LUMA(i)		_MMIO(0x68300 + (i) * 4) /* 43 registers */
+#define TV_V_CHROMA(i)		_MMIO(0x68400 + (i) * 4) /* 43 registers */
 
 /* Display Port */
-#define DP_A				0x64000 /* eDP */
-#define DP_B				0x64100
-#define DP_C				0x64200
-#define DP_D				0x64300
+#define DP_A			_MMIO(0x64000) /* eDP */
+#define DP_B			_MMIO(0x64100)
+#define DP_C			_MMIO(0x64200)
+#define DP_D			_MMIO(0x64300)
 
-#define VLV_DP_B			(VLV_DISPLAY_BASE + DP_B)
-#define VLV_DP_C			(VLV_DISPLAY_BASE + DP_C)
-#define CHV_DP_D			(VLV_DISPLAY_BASE + DP_D)
+#define VLV_DP_B		_MMIO(VLV_DISPLAY_BASE + 0x64100)
+#define VLV_DP_C		_MMIO(VLV_DISPLAY_BASE + 0x64200)
+#define CHV_DP_D		_MMIO(VLV_DISPLAY_BASE + 0x64300)
 
 #define   DP_PORT_EN			(1 << 31)
 #define   DP_PIPEB_SELECT		(1 << 30)
@@ -4199,7 +4258,7 @@
 
 /* eDP */
 #define   DP_PLL_FREQ_270MHZ		(0 << 16)
-#define   DP_PLL_FREQ_160MHZ		(1 << 16)
+#define   DP_PLL_FREQ_162MHZ		(1 << 16)
 #define   DP_PLL_FREQ_MASK		(3 << 16)
 
 /* locked once port is enabled */
@@ -4232,33 +4291,36 @@
  * is 20 bytes in each direction, hence the 5 fixed
  * data registers
  */
-#define DPA_AUX_CH_CTL			0x64010
-#define DPA_AUX_CH_DATA1		0x64014
-#define DPA_AUX_CH_DATA2		0x64018
-#define DPA_AUX_CH_DATA3		0x6401c
-#define DPA_AUX_CH_DATA4		0x64020
-#define DPA_AUX_CH_DATA5		0x64024
+#define _DPA_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64010)
+#define _DPA_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64014)
+#define _DPA_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64018)
+#define _DPA_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6401c)
+#define _DPA_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64020)
+#define _DPA_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64024)
 
-#define DPB_AUX_CH_CTL			0x64110
-#define DPB_AUX_CH_DATA1		0x64114
-#define DPB_AUX_CH_DATA2		0x64118
-#define DPB_AUX_CH_DATA3		0x6411c
-#define DPB_AUX_CH_DATA4		0x64120
-#define DPB_AUX_CH_DATA5		0x64124
+#define _DPB_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64110)
+#define _DPB_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64114)
+#define _DPB_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64118)
+#define _DPB_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6411c)
+#define _DPB_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64120)
+#define _DPB_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64124)
 
-#define DPC_AUX_CH_CTL			0x64210
-#define DPC_AUX_CH_DATA1		0x64214
-#define DPC_AUX_CH_DATA2		0x64218
-#define DPC_AUX_CH_DATA3		0x6421c
-#define DPC_AUX_CH_DATA4		0x64220
-#define DPC_AUX_CH_DATA5		0x64224
+#define _DPC_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64210)
+#define _DPC_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64214)
+#define _DPC_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64218)
+#define _DPC_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6421c)
+#define _DPC_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64220)
+#define _DPC_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64224)
 
-#define DPD_AUX_CH_CTL			0x64310
-#define DPD_AUX_CH_DATA1		0x64314
-#define DPD_AUX_CH_DATA2		0x64318
-#define DPD_AUX_CH_DATA3		0x6431c
-#define DPD_AUX_CH_DATA4		0x64320
-#define DPD_AUX_CH_DATA5		0x64324
+#define _DPD_AUX_CH_CTL		(dev_priv->info.display_mmio_offset + 0x64310)
+#define _DPD_AUX_CH_DATA1	(dev_priv->info.display_mmio_offset + 0x64314)
+#define _DPD_AUX_CH_DATA2	(dev_priv->info.display_mmio_offset + 0x64318)
+#define _DPD_AUX_CH_DATA3	(dev_priv->info.display_mmio_offset + 0x6431c)
+#define _DPD_AUX_CH_DATA4	(dev_priv->info.display_mmio_offset + 0x64320)
+#define _DPD_AUX_CH_DATA5	(dev_priv->info.display_mmio_offset + 0x64324)
+
+#define DP_AUX_CH_CTL(port)	_MMIO_PORT(port, _DPA_AUX_CH_CTL, _DPB_AUX_CH_CTL)
+#define DP_AUX_CH_DATA(port, i)	_MMIO(_PORT(port, _DPA_AUX_CH_DATA1, _DPB_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
 
 #define   DP_AUX_CH_CTL_SEND_BUSY	    (1 << 31)
 #define   DP_AUX_CH_CTL_DONE		    (1 << 30)
@@ -4335,10 +4397,10 @@
 #define _PIPEB_LINK_N_G4X	0x71064
 #define   PIPEA_DP_LINK_N_MASK			(0xffffff)
 
-#define PIPE_DATA_M_G4X(pipe) _PIPE(pipe, _PIPEA_DATA_M_G4X, _PIPEB_DATA_M_G4X)
-#define PIPE_DATA_N_G4X(pipe) _PIPE(pipe, _PIPEA_DATA_N_G4X, _PIPEB_DATA_N_G4X)
-#define PIPE_LINK_M_G4X(pipe) _PIPE(pipe, _PIPEA_LINK_M_G4X, _PIPEB_LINK_M_G4X)
-#define PIPE_LINK_N_G4X(pipe) _PIPE(pipe, _PIPEA_LINK_N_G4X, _PIPEB_LINK_N_G4X)
+#define PIPE_DATA_M_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_DATA_M_G4X, _PIPEB_DATA_M_G4X)
+#define PIPE_DATA_N_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_DATA_N_G4X, _PIPEB_DATA_N_G4X)
+#define PIPE_LINK_M_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_LINK_M_G4X, _PIPEB_LINK_M_G4X)
+#define PIPE_LINK_N_G4X(pipe) _MMIO_PIPE(pipe, _PIPEA_LINK_N_G4X, _PIPEB_LINK_N_G4X)
 
 /* Display & cursor control */
 
@@ -4454,15 +4516,15 @@
  */
 #define PIPE_EDP_OFFSET	0x7f000
 
-#define _PIPE2(pipe, reg) (dev_priv->info.pipe_offsets[pipe] - \
+#define _MMIO_PIPE2(pipe, reg) _MMIO(dev_priv->info.pipe_offsets[pipe] - \
 	dev_priv->info.pipe_offsets[PIPE_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
-#define PIPECONF(pipe) _PIPE2(pipe, _PIPEACONF)
-#define PIPEDSL(pipe)  _PIPE2(pipe, _PIPEADSL)
-#define PIPEFRAME(pipe) _PIPE2(pipe, _PIPEAFRAMEHIGH)
-#define PIPEFRAMEPIXEL(pipe)  _PIPE2(pipe, _PIPEAFRAMEPIXEL)
-#define PIPESTAT(pipe) _PIPE2(pipe, _PIPEASTAT)
+#define PIPECONF(pipe)		_MMIO_PIPE2(pipe, _PIPEACONF)
+#define PIPEDSL(pipe)		_MMIO_PIPE2(pipe, _PIPEADSL)
+#define PIPEFRAME(pipe)		_MMIO_PIPE2(pipe, _PIPEAFRAMEHIGH)
+#define PIPEFRAMEPIXEL(pipe)	_MMIO_PIPE2(pipe, _PIPEAFRAMEPIXEL)
+#define PIPESTAT(pipe)		_MMIO_PIPE2(pipe, _PIPEASTAT)
 
 #define _PIPE_MISC_A			0x70030
 #define _PIPE_MISC_B			0x71030
@@ -4474,9 +4536,9 @@
 #define   PIPEMISC_DITHER_ENABLE	(1<<4)
 #define   PIPEMISC_DITHER_TYPE_MASK	(3<<2)
 #define   PIPEMISC_DITHER_TYPE_SP	(0<<2)
-#define PIPEMISC(pipe) _PIPE2(pipe, _PIPE_MISC_A)
+#define PIPEMISC(pipe)			_MMIO_PIPE2(pipe, _PIPE_MISC_A)
 
-#define VLV_DPFLIPSTAT				(VLV_DISPLAY_BASE + 0x70028)
+#define VLV_DPFLIPSTAT				_MMIO(VLV_DISPLAY_BASE + 0x70028)
 #define   PIPEB_LINE_COMPARE_INT_EN		(1<<29)
 #define   PIPEB_HLINE_INT_EN			(1<<28)
 #define   PIPEB_VBLANK_INT_EN			(1<<27)
@@ -4497,7 +4559,7 @@
 #define   SPRITEE_FLIPDONE_INT_EN		(1<<9)
 #define   PLANEC_FLIPDONE_INT_EN		(1<<8)
 
-#define DPINVGTT				(VLV_DISPLAY_BASE + 0x7002c) /* VLV/CHV only */
+#define DPINVGTT				_MMIO(VLV_DISPLAY_BASE + 0x7002c) /* VLV/CHV only */
 #define   SPRITEF_INVALID_GTT_INT_EN		(1<<27)
 #define   SPRITEE_INVALID_GTT_INT_EN		(1<<26)
 #define   PLANEC_INVALID_GTT_INT_EN		(1<<25)
@@ -4527,7 +4589,7 @@
 #define   DPINVGTT_STATUS_MASK			0xff
 #define   DPINVGTT_STATUS_MASK_CHV		0xfff
 
-#define DSPARB			(dev_priv->info.display_mmio_offset + 0x70030)
+#define DSPARB			_MMIO(dev_priv->info.display_mmio_offset + 0x70030)
 #define   DSPARB_CSTART_MASK	(0x7f << 7)
 #define   DSPARB_CSTART_SHIFT	7
 #define   DSPARB_BSTART_MASK	(0x7f)
@@ -4542,7 +4604,7 @@
 #define   DSPARB_SPRITEC_MASK_VLV	(0xff << 16)
 #define   DSPARB_SPRITED_SHIFT_VLV	24
 #define   DSPARB_SPRITED_MASK_VLV	(0xff << 24)
-#define DSPARB2			(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */
+#define DSPARB2				_MMIO(VLV_DISPLAY_BASE + 0x70060) /* vlv/chv */
 #define   DSPARB_SPRITEA_HI_SHIFT_VLV	0
 #define   DSPARB_SPRITEA_HI_MASK_VLV	(0x1 << 0)
 #define   DSPARB_SPRITEB_HI_SHIFT_VLV	4
@@ -4555,14 +4617,14 @@
 #define   DSPARB_SPRITEE_HI_MASK_VLV	(0x1 << 16)
 #define   DSPARB_SPRITEF_HI_SHIFT_VLV	20
 #define   DSPARB_SPRITEF_HI_MASK_VLV	(0x1 << 20)
-#define DSPARB3			(VLV_DISPLAY_BASE + 0x7006c) /* chv */
+#define DSPARB3				_MMIO(VLV_DISPLAY_BASE + 0x7006c) /* chv */
 #define   DSPARB_SPRITEE_SHIFT_VLV	0
 #define   DSPARB_SPRITEE_MASK_VLV	(0xff << 0)
 #define   DSPARB_SPRITEF_SHIFT_VLV	8
 #define   DSPARB_SPRITEF_MASK_VLV	(0xff << 8)
 
 /* pnv/gen4/g4x/vlv/chv */
-#define DSPFW1			(dev_priv->info.display_mmio_offset + 0x70034)
+#define DSPFW1		_MMIO(dev_priv->info.display_mmio_offset + 0x70034)
 #define   DSPFW_SR_SHIFT		23
 #define   DSPFW_SR_MASK			(0x1ff<<23)
 #define   DSPFW_CURSORB_SHIFT		16
@@ -4573,7 +4635,7 @@
 #define   DSPFW_PLANEA_SHIFT		0
 #define   DSPFW_PLANEA_MASK		(0x7f<<0)
 #define   DSPFW_PLANEA_MASK_VLV		(0xff<<0) /* vlv/chv */
-#define DSPFW2			(dev_priv->info.display_mmio_offset + 0x70038)
+#define DSPFW2		_MMIO(dev_priv->info.display_mmio_offset + 0x70038)
 #define   DSPFW_FBC_SR_EN		(1<<31)	  /* g4x */
 #define   DSPFW_FBC_SR_SHIFT		28
 #define   DSPFW_FBC_SR_MASK		(0x7<<28) /* g4x */
@@ -4589,7 +4651,7 @@
 #define   DSPFW_SPRITEA_SHIFT		0
 #define   DSPFW_SPRITEA_MASK		(0x7f<<0) /* g4x */
 #define   DSPFW_SPRITEA_MASK_VLV	(0xff<<0) /* vlv/chv */
-#define DSPFW3			(dev_priv->info.display_mmio_offset + 0x7003c)
+#define DSPFW3		_MMIO(dev_priv->info.display_mmio_offset + 0x7003c)
 #define   DSPFW_HPLL_SR_EN		(1<<31)
 #define   PINEVIEW_SELF_REFRESH_EN	(1<<30)
 #define   DSPFW_CURSOR_SR_SHIFT		24
@@ -4600,14 +4662,14 @@
 #define   DSPFW_HPLL_SR_MASK		(0x1ff<<0)
 
 /* vlv/chv */
-#define DSPFW4			(VLV_DISPLAY_BASE + 0x70070)
+#define DSPFW4		_MMIO(VLV_DISPLAY_BASE + 0x70070)
 #define   DSPFW_SPRITEB_WM1_SHIFT	16
 #define   DSPFW_SPRITEB_WM1_MASK	(0xff<<16)
 #define   DSPFW_CURSORA_WM1_SHIFT	8
 #define   DSPFW_CURSORA_WM1_MASK	(0x3f<<8)
 #define   DSPFW_SPRITEA_WM1_SHIFT	0
 #define   DSPFW_SPRITEA_WM1_MASK	(0xff<<0)
-#define DSPFW5			(VLV_DISPLAY_BASE + 0x70074)
+#define DSPFW5		_MMIO(VLV_DISPLAY_BASE + 0x70074)
 #define   DSPFW_PLANEB_WM1_SHIFT	24
 #define   DSPFW_PLANEB_WM1_MASK		(0xff<<24)
 #define   DSPFW_PLANEA_WM1_SHIFT	16
@@ -4616,11 +4678,11 @@
 #define   DSPFW_CURSORB_WM1_MASK	(0x3f<<8)
 #define   DSPFW_CURSOR_SR_WM1_SHIFT	0
 #define   DSPFW_CURSOR_SR_WM1_MASK	(0x3f<<0)
-#define DSPFW6			(VLV_DISPLAY_BASE + 0x70078)
+#define DSPFW6		_MMIO(VLV_DISPLAY_BASE + 0x70078)
 #define   DSPFW_SR_WM1_SHIFT		0
 #define   DSPFW_SR_WM1_MASK		(0x1ff<<0)
-#define DSPFW7			(VLV_DISPLAY_BASE + 0x7007c)
-#define DSPFW7_CHV		(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */
+#define DSPFW7		_MMIO(VLV_DISPLAY_BASE + 0x7007c)
+#define DSPFW7_CHV	_MMIO(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */
 #define   DSPFW_SPRITED_WM1_SHIFT	24
 #define   DSPFW_SPRITED_WM1_MASK	(0xff<<24)
 #define   DSPFW_SPRITED_SHIFT		16
@@ -4629,7 +4691,7 @@
 #define   DSPFW_SPRITEC_WM1_MASK	(0xff<<8)
 #define   DSPFW_SPRITEC_SHIFT		0
 #define   DSPFW_SPRITEC_MASK_VLV	(0xff<<0)
-#define DSPFW8_CHV		(VLV_DISPLAY_BASE + 0x700b8)
+#define DSPFW8_CHV	_MMIO(VLV_DISPLAY_BASE + 0x700b8)
 #define   DSPFW_SPRITEF_WM1_SHIFT	24
 #define   DSPFW_SPRITEF_WM1_MASK	(0xff<<24)
 #define   DSPFW_SPRITEF_SHIFT		16
@@ -4638,7 +4700,7 @@
 #define   DSPFW_SPRITEE_WM1_MASK	(0xff<<8)
 #define   DSPFW_SPRITEE_SHIFT		0
 #define   DSPFW_SPRITEE_MASK_VLV	(0xff<<0)
-#define DSPFW9_CHV		(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */
+#define DSPFW9_CHV	_MMIO(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */
 #define   DSPFW_PLANEC_WM1_SHIFT	24
 #define   DSPFW_PLANEC_WM1_MASK		(0xff<<24)
 #define   DSPFW_PLANEC_SHIFT		16
@@ -4649,7 +4711,7 @@
 #define   DSPFW_CURSORC_MASK		(0x3f<<0)
 
 /* vlv/chv high order bits */
-#define DSPHOWM			(VLV_DISPLAY_BASE + 0x70064)
+#define DSPHOWM		_MMIO(VLV_DISPLAY_BASE + 0x70064)
 #define   DSPFW_SR_HI_SHIFT		24
 #define   DSPFW_SR_HI_MASK		(3<<24) /* 2 bits for chv, 1 for vlv */
 #define   DSPFW_SPRITEF_HI_SHIFT	23
@@ -4670,7 +4732,7 @@
 #define   DSPFW_SPRITEA_HI_MASK		(1<<4)
 #define   DSPFW_PLANEA_HI_SHIFT		0
 #define   DSPFW_PLANEA_HI_MASK		(1<<0)
-#define DSPHOWM1		(VLV_DISPLAY_BASE + 0x70068)
+#define DSPHOWM1	_MMIO(VLV_DISPLAY_BASE + 0x70068)
 #define   DSPFW_SR_WM1_HI_SHIFT		24
 #define   DSPFW_SR_WM1_HI_MASK		(3<<24) /* 2 bits for chv, 1 for vlv */
 #define   DSPFW_SPRITEF_WM1_HI_SHIFT	23
@@ -4693,7 +4755,7 @@
 #define   DSPFW_PLANEA_WM1_HI_MASK	(1<<0)
 
 /* drain latency register values*/
-#define VLV_DDL(pipe)			(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe))
+#define VLV_DDL(pipe)			_MMIO(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe))
 #define DDL_CURSOR_SHIFT		24
 #define DDL_SPRITE_SHIFT(sprite)	(8+8*(sprite))
 #define DDL_PLANE_SHIFT			0
@@ -4701,7 +4763,7 @@
 #define DDL_PRECISION_LOW		(0<<7)
 #define DRAIN_LATENCY_MASK		0x7f
 
-#define CBR1_VLV			(VLV_DISPLAY_BASE + 0x70400)
+#define CBR1_VLV			_MMIO(VLV_DISPLAY_BASE + 0x70400)
 #define  CBR_PND_DEADLINE_DISABLE	(1<<31)
 #define  CBR_PWM_CLOCK_MUX_SELECT	(1<<30)
 
@@ -4739,51 +4801,51 @@
 #define I965_CURSOR_DFT_WM	8
 
 /* Watermark register definitions for SKL */
-#define CUR_WM_A_0		0x70140
-#define CUR_WM_B_0		0x71140
-#define PLANE_WM_1_A_0		0x70240
-#define PLANE_WM_1_B_0		0x71240
-#define PLANE_WM_2_A_0		0x70340
-#define PLANE_WM_2_B_0		0x71340
-#define PLANE_WM_TRANS_1_A_0	0x70268
-#define PLANE_WM_TRANS_1_B_0	0x71268
-#define PLANE_WM_TRANS_2_A_0	0x70368
-#define PLANE_WM_TRANS_2_B_0	0x71368
-#define CUR_WM_TRANS_A_0	0x70168
-#define CUR_WM_TRANS_B_0	0x71168
+#define _CUR_WM_A_0		0x70140
+#define _CUR_WM_B_0		0x71140
+#define _PLANE_WM_1_A_0		0x70240
+#define _PLANE_WM_1_B_0		0x71240
+#define _PLANE_WM_2_A_0		0x70340
+#define _PLANE_WM_2_B_0		0x71340
+#define _PLANE_WM_TRANS_1_A_0	0x70268
+#define _PLANE_WM_TRANS_1_B_0	0x71268
+#define _PLANE_WM_TRANS_2_A_0	0x70368
+#define _PLANE_WM_TRANS_2_B_0	0x71368
+#define _CUR_WM_TRANS_A_0	0x70168
+#define _CUR_WM_TRANS_B_0	0x71168
 #define   PLANE_WM_EN		(1 << 31)
 #define   PLANE_WM_LINES_SHIFT	14
 #define   PLANE_WM_LINES_MASK	0x1f
 #define   PLANE_WM_BLOCKS_MASK	0x3ff
 
-#define CUR_WM_0(pipe) _PIPE(pipe, CUR_WM_A_0, CUR_WM_B_0)
-#define CUR_WM(pipe, level) (CUR_WM_0(pipe) + ((4) * (level)))
-#define CUR_WM_TRANS(pipe) _PIPE(pipe, CUR_WM_TRANS_A_0, CUR_WM_TRANS_B_0)
+#define _CUR_WM_0(pipe) _PIPE(pipe, _CUR_WM_A_0, _CUR_WM_B_0)
+#define CUR_WM(pipe, level) _MMIO(_CUR_WM_0(pipe) + ((4) * (level)))
+#define CUR_WM_TRANS(pipe) _MMIO_PIPE(pipe, _CUR_WM_TRANS_A_0, _CUR_WM_TRANS_B_0)
 
-#define _PLANE_WM_1(pipe) _PIPE(pipe, PLANE_WM_1_A_0, PLANE_WM_1_B_0)
-#define _PLANE_WM_2(pipe) _PIPE(pipe, PLANE_WM_2_A_0, PLANE_WM_2_B_0)
+#define _PLANE_WM_1(pipe) _PIPE(pipe, _PLANE_WM_1_A_0, _PLANE_WM_1_B_0)
+#define _PLANE_WM_2(pipe) _PIPE(pipe, _PLANE_WM_2_A_0, _PLANE_WM_2_B_0)
 #define _PLANE_WM_BASE(pipe, plane)	\
 			_PLANE(plane, _PLANE_WM_1(pipe), _PLANE_WM_2(pipe))
 #define PLANE_WM(pipe, plane, level)	\
-			(_PLANE_WM_BASE(pipe, plane) + ((4) * (level)))
+			_MMIO(_PLANE_WM_BASE(pipe, plane) + ((4) * (level)))
 #define _PLANE_WM_TRANS_1(pipe)	\
-			_PIPE(pipe, PLANE_WM_TRANS_1_A_0, PLANE_WM_TRANS_1_B_0)
+			_PIPE(pipe, _PLANE_WM_TRANS_1_A_0, _PLANE_WM_TRANS_1_B_0)
 #define _PLANE_WM_TRANS_2(pipe)	\
-			_PIPE(pipe, PLANE_WM_TRANS_2_A_0, PLANE_WM_TRANS_2_B_0)
+			_PIPE(pipe, _PLANE_WM_TRANS_2_A_0, _PLANE_WM_TRANS_2_B_0)
 #define PLANE_WM_TRANS(pipe, plane)	\
-		_PLANE(plane, _PLANE_WM_TRANS_1(pipe), _PLANE_WM_TRANS_2(pipe))
+	_MMIO(_PLANE(plane, _PLANE_WM_TRANS_1(pipe), _PLANE_WM_TRANS_2(pipe)))
 
 /* define the Watermark register on Ironlake */
-#define WM0_PIPEA_ILK		0x45100
+#define WM0_PIPEA_ILK		_MMIO(0x45100)
 #define  WM0_PIPE_PLANE_MASK	(0xffff<<16)
 #define  WM0_PIPE_PLANE_SHIFT	16
 #define  WM0_PIPE_SPRITE_MASK	(0xff<<8)
 #define  WM0_PIPE_SPRITE_SHIFT	8
 #define  WM0_PIPE_CURSOR_MASK	(0xff)
 
-#define WM0_PIPEB_ILK		0x45104
-#define WM0_PIPEC_IVB		0x45200
-#define WM1_LP_ILK		0x45108
+#define WM0_PIPEB_ILK		_MMIO(0x45104)
+#define WM0_PIPEC_IVB		_MMIO(0x45200)
+#define WM1_LP_ILK		_MMIO(0x45108)
 #define  WM1_LP_SR_EN		(1<<31)
 #define  WM1_LP_LATENCY_SHIFT	24
 #define  WM1_LP_LATENCY_MASK	(0x7f<<24)
@@ -4793,13 +4855,13 @@
 #define  WM1_LP_SR_MASK		(0x7ff<<8)
 #define  WM1_LP_SR_SHIFT	8
 #define  WM1_LP_CURSOR_MASK	(0xff)
-#define WM2_LP_ILK		0x4510c
+#define WM2_LP_ILK		_MMIO(0x4510c)
 #define  WM2_LP_EN		(1<<31)
-#define WM3_LP_ILK		0x45110
+#define WM3_LP_ILK		_MMIO(0x45110)
 #define  WM3_LP_EN		(1<<31)
-#define WM1S_LP_ILK		0x45120
-#define WM2S_LP_IVB		0x45124
-#define WM3S_LP_IVB		0x45128
+#define WM1S_LP_ILK		_MMIO(0x45120)
+#define WM2S_LP_IVB		_MMIO(0x45124)
+#define WM3S_LP_IVB		_MMIO(0x45128)
 #define  WM1S_LP_EN		(1<<31)
 
 #define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
@@ -4807,7 +4869,7 @@
 	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
 
 /* Memory latency timer register */
-#define MLTR_ILK		0x11222
+#define MLTR_ILK		_MMIO(0x11222)
 #define  MLTR_WM1_SHIFT		0
 #define  MLTR_WM2_SHIFT		8
 /* the unit of memory self-refresh latency time is 0.5us */
@@ -4815,7 +4877,7 @@
 
 
 /* the address where we get all kinds of latency value */
-#define SSKPD			0x5d10
+#define SSKPD			_MMIO(0x5d10)
 #define SSKPD_WM_MASK		0x3f
 #define SSKPD_WM0_SHIFT		0
 #define SSKPD_WM1_SHIFT		8
@@ -4848,8 +4910,8 @@
 /* GM45+ just has to be different */
 #define _PIPEA_FRMCOUNT_G4X	0x70040
 #define _PIPEA_FLIPCOUNT_G4X	0x70044
-#define PIPE_FRMCOUNT_G4X(pipe) _PIPE2(pipe, _PIPEA_FRMCOUNT_G4X)
-#define PIPE_FLIPCOUNT_G4X(pipe) _PIPE2(pipe, _PIPEA_FLIPCOUNT_G4X)
+#define PIPE_FRMCOUNT_G4X(pipe) _MMIO_PIPE2(pipe, _PIPEA_FRMCOUNT_G4X)
+#define PIPE_FLIPCOUNT_G4X(pipe) _MMIO_PIPE2(pipe, _PIPEA_FLIPCOUNT_G4X)
 
 /* Cursor A & B regs */
 #define _CURACNTR		0x70080
@@ -4887,7 +4949,7 @@
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
-#define CURSIZE			0x700a0
+#define CURSIZE			_MMIO(0x700a0)
 #define _CURBCNTR		0x700c0
 #define _CURBBASE		0x700c4
 #define _CURBPOS		0x700c8
@@ -4896,7 +4958,7 @@
 #define _CURBBASE_IVB		0x71084
 #define _CURBPOS_IVB		0x71088
 
-#define _CURSOR2(pipe, reg) (dev_priv->info.cursor_offsets[(pipe)] - \
+#define _CURSOR2(pipe, reg) _MMIO(dev_priv->info.cursor_offsets[(pipe)] - \
 	dev_priv->info.cursor_offsets[PIPE_A] + (reg) + \
 	dev_priv->info.display_mmio_offset)
 
@@ -4957,16 +5019,16 @@
 #define _DSPAOFFSET				0x701A4 /* HSW */
 #define _DSPASURFLIVE				0x701AC
 
-#define DSPCNTR(plane) _PIPE2(plane, _DSPACNTR)
-#define DSPADDR(plane) _PIPE2(plane, _DSPAADDR)
-#define DSPSTRIDE(plane) _PIPE2(plane, _DSPASTRIDE)
-#define DSPPOS(plane) _PIPE2(plane, _DSPAPOS)
-#define DSPSIZE(plane) _PIPE2(plane, _DSPASIZE)
-#define DSPSURF(plane) _PIPE2(plane, _DSPASURF)
-#define DSPTILEOFF(plane) _PIPE2(plane, _DSPATILEOFF)
-#define DSPLINOFF(plane) DSPADDR(plane)
-#define DSPOFFSET(plane) _PIPE2(plane, _DSPAOFFSET)
-#define DSPSURFLIVE(plane) _PIPE2(plane, _DSPASURFLIVE)
+#define DSPCNTR(plane)		_MMIO_PIPE2(plane, _DSPACNTR)
+#define DSPADDR(plane)		_MMIO_PIPE2(plane, _DSPAADDR)
+#define DSPSTRIDE(plane)	_MMIO_PIPE2(plane, _DSPASTRIDE)
+#define DSPPOS(plane)		_MMIO_PIPE2(plane, _DSPAPOS)
+#define DSPSIZE(plane)		_MMIO_PIPE2(plane, _DSPASIZE)
+#define DSPSURF(plane)		_MMIO_PIPE2(plane, _DSPASURF)
+#define DSPTILEOFF(plane)	_MMIO_PIPE2(plane, _DSPATILEOFF)
+#define DSPLINOFF(plane)	DSPADDR(plane)
+#define DSPOFFSET(plane)	_MMIO_PIPE2(plane, _DSPAOFFSET)
+#define DSPSURFLIVE(plane)	_MMIO_PIPE2(plane, _DSPASURFLIVE)
 
 /* CHV pipe B blender and primary plane */
 #define _CHV_BLEND_A		0x60a00
@@ -4980,11 +5042,11 @@
 #define _PRIMCNSTALPHA_A	0x60a10
 #define   PRIM_CONST_ALPHA_ENABLE	(1<<31)
 
-#define CHV_BLEND(pipe) _TRANSCODER2(pipe, _CHV_BLEND_A)
-#define CHV_CANVAS(pipe) _TRANSCODER2(pipe, _CHV_CANVAS_A)
-#define PRIMPOS(plane) _TRANSCODER2(plane, _PRIMPOS_A)
-#define PRIMSIZE(plane) _TRANSCODER2(plane, _PRIMSIZE_A)
-#define PRIMCNSTALPHA(plane) _TRANSCODER2(plane, _PRIMCNSTALPHA_A)
+#define CHV_BLEND(pipe)		_MMIO_TRANS2(pipe, _CHV_BLEND_A)
+#define CHV_CANVAS(pipe)	_MMIO_TRANS2(pipe, _CHV_CANVAS_A)
+#define PRIMPOS(plane)		_MMIO_TRANS2(plane, _PRIMPOS_A)
+#define PRIMSIZE(plane)		_MMIO_TRANS2(plane, _PRIMSIZE_A)
+#define PRIMCNSTALPHA(plane)	_MMIO_TRANS2(plane, _PRIMCNSTALPHA_A)
 
 /* Display/Sprite base address macros */
 #define DISP_BASEADDR_MASK	(0xfffff000)
@@ -5002,9 +5064,10 @@
  * [10:1f] all
  * [30:32] all
  */
-#define SWF0(i)	(dev_priv->info.display_mmio_offset + 0x70410 + (i) * 4)
-#define SWF1(i)	(dev_priv->info.display_mmio_offset + 0x71410 + (i) * 4)
-#define SWF3(i)	(dev_priv->info.display_mmio_offset + 0x72414 + (i) * 4)
+#define SWF0(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x70410 + (i) * 4)
+#define SWF1(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x71410 + (i) * 4)
+#define SWF3(i)	_MMIO(dev_priv->info.display_mmio_offset + 0x72414 + (i) * 4)
+#define SWF_ILK(i)	_MMIO(0x4F000 + (i) * 4)
 
 /* Pipe B */
 #define _PIPEBDSL		(dev_priv->info.display_mmio_offset + 0x71000)
@@ -5086,18 +5149,18 @@
 #define _DVSBSCALE		0x73204
 #define _DVSBGAMC		0x73300
 
-#define DVSCNTR(pipe) _PIPE(pipe, _DVSACNTR, _DVSBCNTR)
-#define DVSLINOFF(pipe) _PIPE(pipe, _DVSALINOFF, _DVSBLINOFF)
-#define DVSSTRIDE(pipe) _PIPE(pipe, _DVSASTRIDE, _DVSBSTRIDE)
-#define DVSPOS(pipe) _PIPE(pipe, _DVSAPOS, _DVSBPOS)
-#define DVSSURF(pipe) _PIPE(pipe, _DVSASURF, _DVSBSURF)
-#define DVSKEYMAX(pipe) _PIPE(pipe, _DVSAKEYMAXVAL, _DVSBKEYMAXVAL)
-#define DVSSIZE(pipe) _PIPE(pipe, _DVSASIZE, _DVSBSIZE)
-#define DVSSCALE(pipe) _PIPE(pipe, _DVSASCALE, _DVSBSCALE)
-#define DVSTILEOFF(pipe) _PIPE(pipe, _DVSATILEOFF, _DVSBTILEOFF)
-#define DVSKEYVAL(pipe) _PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL)
-#define DVSKEYMSK(pipe) _PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK)
-#define DVSSURFLIVE(pipe) _PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE)
+#define DVSCNTR(pipe) _MMIO_PIPE(pipe, _DVSACNTR, _DVSBCNTR)
+#define DVSLINOFF(pipe) _MMIO_PIPE(pipe, _DVSALINOFF, _DVSBLINOFF)
+#define DVSSTRIDE(pipe) _MMIO_PIPE(pipe, _DVSASTRIDE, _DVSBSTRIDE)
+#define DVSPOS(pipe) _MMIO_PIPE(pipe, _DVSAPOS, _DVSBPOS)
+#define DVSSURF(pipe) _MMIO_PIPE(pipe, _DVSASURF, _DVSBSURF)
+#define DVSKEYMAX(pipe) _MMIO_PIPE(pipe, _DVSAKEYMAXVAL, _DVSBKEYMAXVAL)
+#define DVSSIZE(pipe) _MMIO_PIPE(pipe, _DVSASIZE, _DVSBSIZE)
+#define DVSSCALE(pipe) _MMIO_PIPE(pipe, _DVSASCALE, _DVSBSCALE)
+#define DVSTILEOFF(pipe) _MMIO_PIPE(pipe, _DVSATILEOFF, _DVSBTILEOFF)
+#define DVSKEYVAL(pipe) _MMIO_PIPE(pipe, _DVSAKEYVAL, _DVSBKEYVAL)
+#define DVSKEYMSK(pipe) _MMIO_PIPE(pipe, _DVSAKEYMSK, _DVSBKEYMSK)
+#define DVSSURFLIVE(pipe) _MMIO_PIPE(pipe, _DVSASURFLIVE, _DVSBSURFLIVE)
 
 #define _SPRA_CTL		0x70280
 #define   SPRITE_ENABLE			(1<<31)
@@ -5160,20 +5223,20 @@
 #define _SPRB_SCALE		0x71304
 #define _SPRB_GAMC		0x71400
 
-#define SPRCTL(pipe) _PIPE(pipe, _SPRA_CTL, _SPRB_CTL)
-#define SPRLINOFF(pipe) _PIPE(pipe, _SPRA_LINOFF, _SPRB_LINOFF)
-#define SPRSTRIDE(pipe) _PIPE(pipe, _SPRA_STRIDE, _SPRB_STRIDE)
-#define SPRPOS(pipe) _PIPE(pipe, _SPRA_POS, _SPRB_POS)
-#define SPRSIZE(pipe) _PIPE(pipe, _SPRA_SIZE, _SPRB_SIZE)
-#define SPRKEYVAL(pipe) _PIPE(pipe, _SPRA_KEYVAL, _SPRB_KEYVAL)
-#define SPRKEYMSK(pipe) _PIPE(pipe, _SPRA_KEYMSK, _SPRB_KEYMSK)
-#define SPRSURF(pipe) _PIPE(pipe, _SPRA_SURF, _SPRB_SURF)
-#define SPRKEYMAX(pipe) _PIPE(pipe, _SPRA_KEYMAX, _SPRB_KEYMAX)
-#define SPRTILEOFF(pipe) _PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF)
-#define SPROFFSET(pipe) _PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET)
-#define SPRSCALE(pipe) _PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE)
-#define SPRGAMC(pipe) _PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC)
-#define SPRSURFLIVE(pipe) _PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE)
+#define SPRCTL(pipe) _MMIO_PIPE(pipe, _SPRA_CTL, _SPRB_CTL)
+#define SPRLINOFF(pipe) _MMIO_PIPE(pipe, _SPRA_LINOFF, _SPRB_LINOFF)
+#define SPRSTRIDE(pipe) _MMIO_PIPE(pipe, _SPRA_STRIDE, _SPRB_STRIDE)
+#define SPRPOS(pipe) _MMIO_PIPE(pipe, _SPRA_POS, _SPRB_POS)
+#define SPRSIZE(pipe) _MMIO_PIPE(pipe, _SPRA_SIZE, _SPRB_SIZE)
+#define SPRKEYVAL(pipe) _MMIO_PIPE(pipe, _SPRA_KEYVAL, _SPRB_KEYVAL)
+#define SPRKEYMSK(pipe) _MMIO_PIPE(pipe, _SPRA_KEYMSK, _SPRB_KEYMSK)
+#define SPRSURF(pipe) _MMIO_PIPE(pipe, _SPRA_SURF, _SPRB_SURF)
+#define SPRKEYMAX(pipe) _MMIO_PIPE(pipe, _SPRA_KEYMAX, _SPRB_KEYMAX)
+#define SPRTILEOFF(pipe) _MMIO_PIPE(pipe, _SPRA_TILEOFF, _SPRB_TILEOFF)
+#define SPROFFSET(pipe) _MMIO_PIPE(pipe, _SPRA_OFFSET, _SPRB_OFFSET)
+#define SPRSCALE(pipe) _MMIO_PIPE(pipe, _SPRA_SCALE, _SPRB_SCALE)
+#define SPRGAMC(pipe) _MMIO_PIPE(pipe, _SPRA_GAMC, _SPRB_GAMC)
+#define SPRSURFLIVE(pipe) _MMIO_PIPE(pipe, _SPRA_SURFLIVE, _SPRB_SURFLIVE)
 
 #define _SPACNTR		(VLV_DISPLAY_BASE + 0x72180)
 #define   SP_ENABLE			(1<<31)
@@ -5223,18 +5286,18 @@
 #define _SPBCONSTALPHA		(VLV_DISPLAY_BASE + 0x722a8)
 #define _SPBGAMC		(VLV_DISPLAY_BASE + 0x722f4)
 
-#define SPCNTR(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPACNTR, _SPBCNTR)
-#define SPLINOFF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPALINOFF, _SPBLINOFF)
-#define SPSTRIDE(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASTRIDE, _SPBSTRIDE)
-#define SPPOS(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAPOS, _SPBPOS)
-#define SPSIZE(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASIZE, _SPBSIZE)
-#define SPKEYMINVAL(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMINVAL, _SPBKEYMINVAL)
-#define SPKEYMSK(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMSK, _SPBKEYMSK)
-#define SPSURF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPASURF, _SPBSURF)
-#define SPKEYMAXVAL(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAKEYMAXVAL, _SPBKEYMAXVAL)
-#define SPTILEOFF(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPATILEOFF, _SPBTILEOFF)
-#define SPCONSTALPHA(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPACONSTALPHA, _SPBCONSTALPHA)
-#define SPGAMC(pipe, plane) _PIPE((pipe) * 2 + (plane), _SPAGAMC, _SPBGAMC)
+#define SPCNTR(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPACNTR, _SPBCNTR)
+#define SPLINOFF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPALINOFF, _SPBLINOFF)
+#define SPSTRIDE(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASTRIDE, _SPBSTRIDE)
+#define SPPOS(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAPOS, _SPBPOS)
+#define SPSIZE(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASIZE, _SPBSIZE)
+#define SPKEYMINVAL(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMINVAL, _SPBKEYMINVAL)
+#define SPKEYMSK(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMSK, _SPBKEYMSK)
+#define SPSURF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPASURF, _SPBSURF)
+#define SPKEYMAXVAL(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAKEYMAXVAL, _SPBKEYMAXVAL)
+#define SPTILEOFF(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPATILEOFF, _SPBTILEOFF)
+#define SPCONSTALPHA(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPACONSTALPHA, _SPBCONSTALPHA)
+#define SPGAMC(pipe, plane) _MMIO_PIPE((pipe) * 2 + (plane), _SPAGAMC, _SPBGAMC)
 
 /*
  * CHV pipe B sprite CSC
@@ -5243,29 +5306,29 @@
  * |yg| = |c3 c4 c5| x |yg + yg_ioff| + |yg_ooff|
  * |cb|   |c6 c7 c8|   |cb + cr_ioff|   |cb_ooff|
  */
-#define SPCSCYGOFF(sprite)	(VLV_DISPLAY_BASE + 0x6d900 + (sprite) * 0x1000)
-#define SPCSCCBOFF(sprite)	(VLV_DISPLAY_BASE + 0x6d904 + (sprite) * 0x1000)
-#define SPCSCCROFF(sprite)	(VLV_DISPLAY_BASE + 0x6d908 + (sprite) * 0x1000)
+#define SPCSCYGOFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d900 + (sprite) * 0x1000)
+#define SPCSCCBOFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d904 + (sprite) * 0x1000)
+#define SPCSCCROFF(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d908 + (sprite) * 0x1000)
 #define  SPCSC_OOFF(x)		(((x) & 0x7ff) << 16) /* s11 */
 #define  SPCSC_IOFF(x)		(((x) & 0x7ff) << 0) /* s11 */
 
-#define SPCSCC01(sprite)	(VLV_DISPLAY_BASE + 0x6d90c + (sprite) * 0x1000)
-#define SPCSCC23(sprite)	(VLV_DISPLAY_BASE + 0x6d910 + (sprite) * 0x1000)
-#define SPCSCC45(sprite)	(VLV_DISPLAY_BASE + 0x6d914 + (sprite) * 0x1000)
-#define SPCSCC67(sprite)	(VLV_DISPLAY_BASE + 0x6d918 + (sprite) * 0x1000)
-#define SPCSCC8(sprite)		(VLV_DISPLAY_BASE + 0x6d91c + (sprite) * 0x1000)
+#define SPCSCC01(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d90c + (sprite) * 0x1000)
+#define SPCSCC23(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d910 + (sprite) * 0x1000)
+#define SPCSCC45(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d914 + (sprite) * 0x1000)
+#define SPCSCC67(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d918 + (sprite) * 0x1000)
+#define SPCSCC8(sprite)		_MMIO(VLV_DISPLAY_BASE + 0x6d91c + (sprite) * 0x1000)
 #define  SPCSC_C1(x)		(((x) & 0x7fff) << 16) /* s3.12 */
 #define  SPCSC_C0(x)		(((x) & 0x7fff) << 0) /* s3.12 */
 
-#define SPCSCYGICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d920 + (sprite) * 0x1000)
-#define SPCSCCBICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d924 + (sprite) * 0x1000)
-#define SPCSCCRICLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d928 + (sprite) * 0x1000)
+#define SPCSCYGICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d920 + (sprite) * 0x1000)
+#define SPCSCCBICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d924 + (sprite) * 0x1000)
+#define SPCSCCRICLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d928 + (sprite) * 0x1000)
 #define  SPCSC_IMAX(x)		(((x) & 0x7ff) << 16) /* s11 */
 #define  SPCSC_IMIN(x)		(((x) & 0x7ff) << 0) /* s11 */
 
-#define SPCSCYGOCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d92c + (sprite) * 0x1000)
-#define SPCSCCBOCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d930 + (sprite) * 0x1000)
-#define SPCSCCROCLAMP(sprite)	(VLV_DISPLAY_BASE + 0x6d934 + (sprite) * 0x1000)
+#define SPCSCYGOCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d92c + (sprite) * 0x1000)
+#define SPCSCCBOCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d930 + (sprite) * 0x1000)
+#define SPCSCCROCLAMP(sprite)	_MMIO(VLV_DISPLAY_BASE + 0x6d934 + (sprite) * 0x1000)
 #define  SPCSC_OMAX(x)		((x) << 16) /* u10 */
 #define  SPCSC_OMIN(x)		((x) << 0) /* u10 */
 
@@ -5346,7 +5409,7 @@
 #define _PLANE_CTL_2(pipe)	_PIPE(pipe, _PLANE_CTL_2_A, _PLANE_CTL_2_B)
 #define _PLANE_CTL_3(pipe)	_PIPE(pipe, _PLANE_CTL_3_A, _PLANE_CTL_3_B)
 #define PLANE_CTL(pipe, plane)	\
-	_PLANE(plane, _PLANE_CTL_1(pipe), _PLANE_CTL_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_CTL_1(pipe), _PLANE_CTL_2(pipe))
 
 #define _PLANE_STRIDE_1_B			0x71188
 #define _PLANE_STRIDE_2_B			0x71288
@@ -5358,7 +5421,7 @@
 #define _PLANE_STRIDE_3(pipe)	\
 	_PIPE(pipe, _PLANE_STRIDE_3_A, _PLANE_STRIDE_3_B)
 #define PLANE_STRIDE(pipe, plane)	\
-	_PLANE(plane, _PLANE_STRIDE_1(pipe), _PLANE_STRIDE_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_STRIDE_1(pipe), _PLANE_STRIDE_2(pipe))
 
 #define _PLANE_POS_1_B				0x7118c
 #define _PLANE_POS_2_B				0x7128c
@@ -5367,7 +5430,7 @@
 #define _PLANE_POS_2(pipe)	_PIPE(pipe, _PLANE_POS_2_A, _PLANE_POS_2_B)
 #define _PLANE_POS_3(pipe)	_PIPE(pipe, _PLANE_POS_3_A, _PLANE_POS_3_B)
 #define PLANE_POS(pipe, plane)	\
-	_PLANE(plane, _PLANE_POS_1(pipe), _PLANE_POS_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_POS_1(pipe), _PLANE_POS_2(pipe))
 
 #define _PLANE_SIZE_1_B				0x71190
 #define _PLANE_SIZE_2_B				0x71290
@@ -5376,7 +5439,7 @@
 #define _PLANE_SIZE_2(pipe)	_PIPE(pipe, _PLANE_SIZE_2_A, _PLANE_SIZE_2_B)
 #define _PLANE_SIZE_3(pipe)	_PIPE(pipe, _PLANE_SIZE_3_A, _PLANE_SIZE_3_B)
 #define PLANE_SIZE(pipe, plane)	\
-	_PLANE(plane, _PLANE_SIZE_1(pipe), _PLANE_SIZE_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_SIZE_1(pipe), _PLANE_SIZE_2(pipe))
 
 #define _PLANE_SURF_1_B				0x7119c
 #define _PLANE_SURF_2_B				0x7129c
@@ -5385,35 +5448,35 @@
 #define _PLANE_SURF_2(pipe)	_PIPE(pipe, _PLANE_SURF_2_A, _PLANE_SURF_2_B)
 #define _PLANE_SURF_3(pipe)	_PIPE(pipe, _PLANE_SURF_3_A, _PLANE_SURF_3_B)
 #define PLANE_SURF(pipe, plane)	\
-	_PLANE(plane, _PLANE_SURF_1(pipe), _PLANE_SURF_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_SURF_1(pipe), _PLANE_SURF_2(pipe))
 
 #define _PLANE_OFFSET_1_B			0x711a4
 #define _PLANE_OFFSET_2_B			0x712a4
 #define _PLANE_OFFSET_1(pipe) _PIPE(pipe, _PLANE_OFFSET_1_A, _PLANE_OFFSET_1_B)
 #define _PLANE_OFFSET_2(pipe) _PIPE(pipe, _PLANE_OFFSET_2_A, _PLANE_OFFSET_2_B)
 #define PLANE_OFFSET(pipe, plane)	\
-	_PLANE(plane, _PLANE_OFFSET_1(pipe), _PLANE_OFFSET_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_OFFSET_1(pipe), _PLANE_OFFSET_2(pipe))
 
 #define _PLANE_KEYVAL_1_B			0x71194
 #define _PLANE_KEYVAL_2_B			0x71294
 #define _PLANE_KEYVAL_1(pipe) _PIPE(pipe, _PLANE_KEYVAL_1_A, _PLANE_KEYVAL_1_B)
 #define _PLANE_KEYVAL_2(pipe) _PIPE(pipe, _PLANE_KEYVAL_2_A, _PLANE_KEYVAL_2_B)
 #define PLANE_KEYVAL(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYVAL_1(pipe), _PLANE_KEYVAL_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYVAL_1(pipe), _PLANE_KEYVAL_2(pipe))
 
 #define _PLANE_KEYMSK_1_B			0x71198
 #define _PLANE_KEYMSK_2_B			0x71298
 #define _PLANE_KEYMSK_1(pipe) _PIPE(pipe, _PLANE_KEYMSK_1_A, _PLANE_KEYMSK_1_B)
 #define _PLANE_KEYMSK_2(pipe) _PIPE(pipe, _PLANE_KEYMSK_2_A, _PLANE_KEYMSK_2_B)
 #define PLANE_KEYMSK(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYMSK_1(pipe), _PLANE_KEYMSK_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYMSK_1(pipe), _PLANE_KEYMSK_2(pipe))
 
 #define _PLANE_KEYMAX_1_B			0x711a0
 #define _PLANE_KEYMAX_2_B			0x712a0
 #define _PLANE_KEYMAX_1(pipe) _PIPE(pipe, _PLANE_KEYMAX_1_A, _PLANE_KEYMAX_1_B)
 #define _PLANE_KEYMAX_2(pipe) _PIPE(pipe, _PLANE_KEYMAX_2_A, _PLANE_KEYMAX_2_B)
 #define PLANE_KEYMAX(pipe, plane)	\
-	_PLANE(plane, _PLANE_KEYMAX_1(pipe), _PLANE_KEYMAX_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_KEYMAX_1(pipe), _PLANE_KEYMAX_2(pipe))
 
 #define _PLANE_BUF_CFG_1_B			0x7127c
 #define _PLANE_BUF_CFG_2_B			0x7137c
@@ -5422,7 +5485,7 @@
 #define _PLANE_BUF_CFG_2(pipe)	\
 	_PIPE(pipe, _PLANE_BUF_CFG_2_A, _PLANE_BUF_CFG_2_B)
 #define PLANE_BUF_CFG(pipe, plane)	\
-	_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
 
 #define _PLANE_NV12_BUF_CFG_1_B		0x71278
 #define _PLANE_NV12_BUF_CFG_2_B		0x71378
@@ -5431,26 +5494,26 @@
 #define _PLANE_NV12_BUF_CFG_2(pipe)	\
 	_PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B)
 #define PLANE_NV12_BUF_CFG(pipe, plane)	\
-	_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
+	_MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
 
 /* SKL new cursor registers */
 #define _CUR_BUF_CFG_A				0x7017c
 #define _CUR_BUF_CFG_B				0x7117c
-#define CUR_BUF_CFG(pipe)	_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B)
+#define CUR_BUF_CFG(pipe)	_MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B)
 
 /* VBIOS regs */
-#define VGACNTRL		0x71400
+#define VGACNTRL		_MMIO(0x71400)
 # define VGA_DISP_DISABLE			(1 << 31)
 # define VGA_2X_MODE				(1 << 30)
 # define VGA_PIPE_B_SELECT			(1 << 29)
 
-#define VLV_VGACNTRL		(VLV_DISPLAY_BASE + 0x71400)
+#define VLV_VGACNTRL		_MMIO(VLV_DISPLAY_BASE + 0x71400)
 
 /* Ironlake */
 
-#define CPU_VGACNTRL	0x41000
+#define CPU_VGACNTRL	_MMIO(0x41000)
 
-#define DIGITAL_PORT_HOTPLUG_CNTRL	0x44030
+#define DIGITAL_PORT_HOTPLUG_CNTRL	_MMIO(0x44030)
 #define  DIGITAL_PORTA_HOTPLUG_ENABLE		(1 << 4)
 #define  DIGITAL_PORTA_PULSE_DURATION_2ms	(0 << 2) /* pre-HSW */
 #define  DIGITAL_PORTA_PULSE_DURATION_4_5ms	(1 << 2) /* pre-HSW */
@@ -5463,26 +5526,26 @@
 #define  DIGITAL_PORTA_HOTPLUG_LONG_DETECT	(2 << 0)
 
 /* refresh rate hardware control */
-#define RR_HW_CTL       0x45300
+#define RR_HW_CTL       _MMIO(0x45300)
 #define  RR_HW_LOW_POWER_FRAMES_MASK    0xff
 #define  RR_HW_HIGH_POWER_FRAMES_MASK   0xff00
 
-#define FDI_PLL_BIOS_0  0x46000
+#define FDI_PLL_BIOS_0  _MMIO(0x46000)
 #define  FDI_PLL_FB_CLOCK_MASK  0xff
-#define FDI_PLL_BIOS_1  0x46004
-#define FDI_PLL_BIOS_2  0x46008
-#define DISPLAY_PORT_PLL_BIOS_0         0x4600c
-#define DISPLAY_PORT_PLL_BIOS_1         0x46010
-#define DISPLAY_PORT_PLL_BIOS_2         0x46014
+#define FDI_PLL_BIOS_1  _MMIO(0x46004)
+#define FDI_PLL_BIOS_2  _MMIO(0x46008)
+#define DISPLAY_PORT_PLL_BIOS_0         _MMIO(0x4600c)
+#define DISPLAY_PORT_PLL_BIOS_1         _MMIO(0x46010)
+#define DISPLAY_PORT_PLL_BIOS_2         _MMIO(0x46014)
 
-#define PCH_3DCGDIS0		0x46020
+#define PCH_3DCGDIS0		_MMIO(0x46020)
 # define MARIUNIT_CLOCK_GATE_DISABLE		(1 << 18)
 # define SVSMUNIT_CLOCK_GATE_DISABLE		(1 << 1)
 
-#define PCH_3DCGDIS1		0x46024
+#define PCH_3DCGDIS1		_MMIO(0x46024)
 # define VFMUNIT_CLOCK_GATE_DISABLE		(1 << 11)
 
-#define FDI_PLL_FREQ_CTL        0x46030
+#define FDI_PLL_FREQ_CTL        _MMIO(0x46030)
 #define  FDI_PLL_FREQ_CHANGE_REQUEST    (1<<24)
 #define  FDI_PLL_FREQ_LOCK_LIMIT_MASK   0xfff00
 #define  FDI_PLL_FREQ_DISABLE_COUNT_LIMIT_MASK  0xff
@@ -5519,14 +5582,14 @@
 #define _PIPEB_LINK_M2		0x61048
 #define _PIPEB_LINK_N2		0x6104c
 
-#define PIPE_DATA_M1(tran) _TRANSCODER2(tran, _PIPEA_DATA_M1)
-#define PIPE_DATA_N1(tran) _TRANSCODER2(tran, _PIPEA_DATA_N1)
-#define PIPE_DATA_M2(tran) _TRANSCODER2(tran, _PIPEA_DATA_M2)
-#define PIPE_DATA_N2(tran) _TRANSCODER2(tran, _PIPEA_DATA_N2)
-#define PIPE_LINK_M1(tran) _TRANSCODER2(tran, _PIPEA_LINK_M1)
-#define PIPE_LINK_N1(tran) _TRANSCODER2(tran, _PIPEA_LINK_N1)
-#define PIPE_LINK_M2(tran) _TRANSCODER2(tran, _PIPEA_LINK_M2)
-#define PIPE_LINK_N2(tran) _TRANSCODER2(tran, _PIPEA_LINK_N2)
+#define PIPE_DATA_M1(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_M1)
+#define PIPE_DATA_N1(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_N1)
+#define PIPE_DATA_M2(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_M2)
+#define PIPE_DATA_N2(tran) _MMIO_TRANS2(tran, _PIPEA_DATA_N2)
+#define PIPE_LINK_M1(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_M1)
+#define PIPE_LINK_N1(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_N1)
+#define PIPE_LINK_M2(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_M2)
+#define PIPE_LINK_N2(tran) _MMIO_TRANS2(tran, _PIPEA_LINK_N2)
 
 /* CPU panel fitter */
 /* IVB+ has 3 fitters, 0 is 7x5 capable, the other two only 3x3 */
@@ -5549,11 +5612,11 @@
 #define _PFA_HSCALE		0x68090
 #define _PFB_HSCALE		0x68890
 
-#define PF_CTL(pipe)		_PIPE(pipe, _PFA_CTL_1, _PFB_CTL_1)
-#define PF_WIN_SZ(pipe)		_PIPE(pipe, _PFA_WIN_SZ, _PFB_WIN_SZ)
-#define PF_WIN_POS(pipe)	_PIPE(pipe, _PFA_WIN_POS, _PFB_WIN_POS)
-#define PF_VSCALE(pipe)		_PIPE(pipe, _PFA_VSCALE, _PFB_VSCALE)
-#define PF_HSCALE(pipe)		_PIPE(pipe, _PFA_HSCALE, _PFB_HSCALE)
+#define PF_CTL(pipe)		_MMIO_PIPE(pipe, _PFA_CTL_1, _PFB_CTL_1)
+#define PF_WIN_SZ(pipe)		_MMIO_PIPE(pipe, _PFA_WIN_SZ, _PFB_WIN_SZ)
+#define PF_WIN_POS(pipe)	_MMIO_PIPE(pipe, _PFA_WIN_POS, _PFB_WIN_POS)
+#define PF_VSCALE(pipe)		_MMIO_PIPE(pipe, _PFA_VSCALE, _PFB_VSCALE)
+#define PF_HSCALE(pipe)		_MMIO_PIPE(pipe, _PFA_HSCALE, _PFB_HSCALE)
 
 #define _PSA_CTL		0x68180
 #define _PSB_CTL		0x68980
@@ -5563,9 +5626,9 @@
 #define _PSA_WIN_POS		0x68170
 #define _PSB_WIN_POS		0x68970
 
-#define PS_CTL(pipe)		_PIPE(pipe, _PSA_CTL, _PSB_CTL)
-#define PS_WIN_SZ(pipe)		_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
-#define PS_WIN_POS(pipe)	_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
+#define PS_CTL(pipe)		_MMIO_PIPE(pipe, _PSA_CTL, _PSB_CTL)
+#define PS_WIN_SZ(pipe)		_MMIO_PIPE(pipe, _PSA_WIN_SZ, _PSB_WIN_SZ)
+#define PS_WIN_POS(pipe)	_MMIO_PIPE(pipe, _PSA_WIN_POS, _PSB_WIN_POS)
 
 /*
  * Skylake scalers
@@ -5654,48 +5717,63 @@
 #define _PS_ECC_STAT_1C     0x691D0
 
 #define _ID(id, a, b) ((a) + (id)*((b)-(a)))
-#define SKL_PS_CTRL(pipe, id) _PIPE(pipe,        \
+#define SKL_PS_CTRL(pipe, id) _MMIO_PIPE(pipe,        \
 			_ID(id, _PS_1A_CTRL, _PS_2A_CTRL),       \
 			_ID(id, _PS_1B_CTRL, _PS_2B_CTRL))
-#define SKL_PS_PWR_GATE(pipe, id) _PIPE(pipe,    \
+#define SKL_PS_PWR_GATE(pipe, id) _MMIO_PIPE(pipe,    \
 			_ID(id, _PS_PWR_GATE_1A, _PS_PWR_GATE_2A), \
 			_ID(id, _PS_PWR_GATE_1B, _PS_PWR_GATE_2B))
-#define SKL_PS_WIN_POS(pipe, id) _PIPE(pipe,     \
+#define SKL_PS_WIN_POS(pipe, id) _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_WIN_POS_1A, _PS_WIN_POS_2A), \
 			_ID(id, _PS_WIN_POS_1B, _PS_WIN_POS_2B))
-#define SKL_PS_WIN_SZ(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_WIN_SZ(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_WIN_SZ_1A, _PS_WIN_SZ_2A),   \
 			_ID(id, _PS_WIN_SZ_1B, _PS_WIN_SZ_2B))
-#define SKL_PS_VSCALE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_VSCALE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_VSCALE_1A, _PS_VSCALE_2A),   \
 			_ID(id, _PS_VSCALE_1B, _PS_VSCALE_2B))
-#define SKL_PS_HSCALE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_HSCALE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_HSCALE_1A, _PS_HSCALE_2A),   \
 			_ID(id, _PS_HSCALE_1B, _PS_HSCALE_2B))
-#define SKL_PS_VPHASE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_VPHASE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_VPHASE_1A, _PS_VPHASE_2A),   \
 			_ID(id, _PS_VPHASE_1B, _PS_VPHASE_2B))
-#define SKL_PS_HPHASE(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_HPHASE(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_HPHASE_1A, _PS_HPHASE_2A),   \
 			_ID(id, _PS_HPHASE_1B, _PS_HPHASE_2B))
-#define SKL_PS_ECC_STAT(pipe, id)  _PIPE(pipe,     \
+#define SKL_PS_ECC_STAT(pipe, id)  _MMIO_PIPE(pipe,     \
 			_ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A),   \
-			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B)
+			_ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B))
 
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
-#define LGC_PALETTE(pipe, i) (_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
+#define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
 
 #define _GAMMA_MODE_A		0x4a480
 #define _GAMMA_MODE_B		0x4ac80
-#define GAMMA_MODE(pipe) _PIPE(pipe, _GAMMA_MODE_A, _GAMMA_MODE_B)
+#define GAMMA_MODE(pipe) _MMIO_PIPE(pipe, _GAMMA_MODE_A, _GAMMA_MODE_B)
 #define GAMMA_MODE_MODE_MASK	(3 << 0)
 #define GAMMA_MODE_MODE_8BIT	(0 << 0)
 #define GAMMA_MODE_MODE_10BIT	(1 << 0)
 #define GAMMA_MODE_MODE_12BIT	(2 << 0)
 #define GAMMA_MODE_MODE_SPLIT	(3 << 0)
 
+/* DMC/CSR */
+#define CSR_PROGRAM(i)		_MMIO(0x80000 + (i) * 4)
+#define CSR_SSP_BASE_ADDR_GEN9	0x00002FC0
+#define CSR_HTP_ADDR_SKL	0x00500034
+#define CSR_SSP_BASE		_MMIO(0x8F074)
+#define CSR_HTP_SKL		_MMIO(0x8F004)
+#define CSR_LAST_WRITE		_MMIO(0x8F034)
+#define CSR_LAST_WRITE_VALUE	0xc003b400
+/* MMIO address range for CSR program (0x80000 - 0x82FFF) */
+#define CSR_MMIO_START_RANGE	0x80000
+#define CSR_MMIO_END_RANGE	0x8FFFF
+#define SKL_CSR_DC3_DC5_COUNT	_MMIO(0x80030)
+#define SKL_CSR_DC5_DC6_COUNT	_MMIO(0x8002C)
+#define BXT_CSR_DC3_DC5_COUNT	_MMIO(0x80038)
+
 /* interrupts */
 #define DE_MASTER_IRQ_CONTROL   (1 << 31)
 #define DE_SPRITEB_FLIP_DONE    (1 << 29)
@@ -5747,20 +5825,20 @@
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 #define DE_PIPE_VBLANK_IVB(pipe)	(1 << ((pipe) * 5))
 
-#define VLV_MASTER_IER			0x4400c /* Gunit master IER */
+#define VLV_MASTER_IER			_MMIO(0x4400c) /* Gunit master IER */
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 
-#define DEISR   0x44000
-#define DEIMR   0x44004
-#define DEIIR   0x44008
-#define DEIER   0x4400c
+#define DEISR   _MMIO(0x44000)
+#define DEIMR   _MMIO(0x44004)
+#define DEIIR   _MMIO(0x44008)
+#define DEIER   _MMIO(0x4400c)
 
-#define GTISR   0x44010
-#define GTIMR   0x44014
-#define GTIIR   0x44018
-#define GTIER   0x4401c
+#define GTISR   _MMIO(0x44010)
+#define GTIMR   _MMIO(0x44014)
+#define GTIIR   _MMIO(0x44018)
+#define GTIER   _MMIO(0x4401c)
 
-#define GEN8_MASTER_IRQ			0x44200
+#define GEN8_MASTER_IRQ			_MMIO(0x44200)
 #define  GEN8_MASTER_IRQ_CONTROL	(1<<31)
 #define  GEN8_PCU_IRQ			(1<<30)
 #define  GEN8_DE_PCH_IRQ		(1<<23)
@@ -5777,10 +5855,10 @@
 #define  GEN8_GT_BCS_IRQ		(1<<1)
 #define  GEN8_GT_RCS_IRQ		(1<<0)
 
-#define GEN8_GT_ISR(which) (0x44300 + (0x10 * (which)))
-#define GEN8_GT_IMR(which) (0x44304 + (0x10 * (which)))
-#define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which)))
-#define GEN8_GT_IER(which) (0x4430c + (0x10 * (which)))
+#define GEN8_GT_ISR(which) _MMIO(0x44300 + (0x10 * (which)))
+#define GEN8_GT_IMR(which) _MMIO(0x44304 + (0x10 * (which)))
+#define GEN8_GT_IIR(which) _MMIO(0x44308 + (0x10 * (which)))
+#define GEN8_GT_IER(which) _MMIO(0x4430c + (0x10 * (which)))
 
 #define GEN8_RCS_IRQ_SHIFT 0
 #define GEN8_BCS_IRQ_SHIFT 16
@@ -5789,10 +5867,10 @@
 #define GEN8_VECS_IRQ_SHIFT 0
 #define GEN8_WD_IRQ_SHIFT 16
 
-#define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IIR(pipe) (0x44408 + (0x10 * (pipe)))
-#define GEN8_DE_PIPE_IER(pipe) (0x4440c + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_ISR(pipe) _MMIO(0x44400 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IMR(pipe) _MMIO(0x44404 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IIR(pipe) _MMIO(0x44408 + (0x10 * (pipe)))
+#define GEN8_DE_PIPE_IER(pipe) _MMIO(0x4440c + (0x10 * (pipe)))
 #define  GEN8_PIPE_FIFO_UNDERRUN	(1 << 31)
 #define  GEN8_PIPE_CDCLK_CRC_ERROR	(1 << 29)
 #define  GEN8_PIPE_CDCLK_CRC_DONE	(1 << 28)
@@ -5825,10 +5903,10 @@
 	 GEN9_PIPE_PLANE2_FAULT | \
 	 GEN9_PIPE_PLANE1_FAULT)
 
-#define GEN8_DE_PORT_ISR 0x44440
-#define GEN8_DE_PORT_IMR 0x44444
-#define GEN8_DE_PORT_IIR 0x44448
-#define GEN8_DE_PORT_IER 0x4444c
+#define GEN8_DE_PORT_ISR _MMIO(0x44440)
+#define GEN8_DE_PORT_IMR _MMIO(0x44444)
+#define GEN8_DE_PORT_IIR _MMIO(0x44448)
+#define GEN8_DE_PORT_IER _MMIO(0x4444c)
 #define  GEN9_AUX_CHANNEL_D		(1 << 27)
 #define  GEN9_AUX_CHANNEL_C		(1 << 26)
 #define  GEN9_AUX_CHANNEL_B		(1 << 25)
@@ -5842,23 +5920,23 @@
 #define  BXT_DE_PORT_GMBUS		(1 << 1)
 #define  GEN8_AUX_CHANNEL_A		(1 << 0)
 
-#define GEN8_DE_MISC_ISR 0x44460
-#define GEN8_DE_MISC_IMR 0x44464
-#define GEN8_DE_MISC_IIR 0x44468
-#define GEN8_DE_MISC_IER 0x4446c
+#define GEN8_DE_MISC_ISR _MMIO(0x44460)
+#define GEN8_DE_MISC_IMR _MMIO(0x44464)
+#define GEN8_DE_MISC_IIR _MMIO(0x44468)
+#define GEN8_DE_MISC_IER _MMIO(0x4446c)
 #define  GEN8_DE_MISC_GSE		(1 << 27)
 
-#define GEN8_PCU_ISR 0x444e0
-#define GEN8_PCU_IMR 0x444e4
-#define GEN8_PCU_IIR 0x444e8
-#define GEN8_PCU_IER 0x444ec
+#define GEN8_PCU_ISR _MMIO(0x444e0)
+#define GEN8_PCU_IMR _MMIO(0x444e4)
+#define GEN8_PCU_IIR _MMIO(0x444e8)
+#define GEN8_PCU_IER _MMIO(0x444ec)
 
-#define ILK_DISPLAY_CHICKEN2	0x42004
+#define ILK_DISPLAY_CHICKEN2	_MMIO(0x42004)
 /* Required on all Ironlake and Sandybridge according to the B-Spec. */
 #define  ILK_ELPIN_409_SELECT	(1 << 25)
 #define  ILK_DPARB_GATE	(1<<22)
 #define  ILK_VSDPFD_FULL	(1<<21)
-#define FUSE_STRAP			0x42014
+#define FUSE_STRAP			_MMIO(0x42014)
 #define  ILK_INTERNAL_GRAPHICS_DISABLE	(1 << 31)
 #define  ILK_INTERNAL_DISPLAY_DISABLE	(1 << 30)
 #define  ILK_DISPLAY_DEBUG_DISABLE	(1 << 29)
@@ -5867,18 +5945,18 @@
 #define  HSW_CDCLK_LIMIT		(1 << 24)
 #define  ILK_DESKTOP			(1 << 23)
 
-#define ILK_DSPCLK_GATE_D			0x42020
+#define ILK_DSPCLK_GATE_D			_MMIO(0x42020)
 #define   ILK_VRHUNIT_CLOCK_GATE_DISABLE	(1 << 28)
 #define   ILK_DPFCUNIT_CLOCK_GATE_DISABLE	(1 << 9)
 #define   ILK_DPFCRUNIT_CLOCK_GATE_DISABLE	(1 << 8)
 #define   ILK_DPFDUNIT_CLOCK_GATE_ENABLE	(1 << 7)
 #define   ILK_DPARBUNIT_CLOCK_GATE_ENABLE	(1 << 5)
 
-#define IVB_CHICKEN3	0x4200c
+#define IVB_CHICKEN3	_MMIO(0x4200c)
 # define CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE	(1 << 5)
 # define CHICKEN3_DGMG_DONE_FIX_DISABLE		(1 << 2)
 
-#define CHICKEN_PAR1_1		0x42080
+#define CHICKEN_PAR1_1		_MMIO(0x42080)
 #define  DPA_MASK_VBLANK_SRD	(1 << 15)
 #define  FORCE_ARB_IDLE_PLANES	(1 << 14)
 
@@ -5886,70 +5964,70 @@
 #define _CHICKEN_PIPESL_1_B	0x420b4
 #define  HSW_FBCQ_DIS			(1 << 22)
 #define  BDW_DPRS_MASK_VBLANK_SRD	(1 << 0)
-#define CHICKEN_PIPESL_1(pipe) _PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
+#define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
 
-#define DISP_ARB_CTL	0x45000
+#define DISP_ARB_CTL	_MMIO(0x45000)
 #define  DISP_TILE_SURFACE_SWIZZLING	(1<<13)
 #define  DISP_FBC_WM_DIS		(1<<15)
-#define DISP_ARB_CTL2	0x45004
+#define DISP_ARB_CTL2	_MMIO(0x45004)
 #define  DISP_DATA_PARTITION_5_6	(1<<6)
-#define DBUF_CTL	0x45008
+#define DBUF_CTL	_MMIO(0x45008)
 #define  DBUF_POWER_REQUEST		(1<<31)
 #define  DBUF_POWER_STATE		(1<<30)
-#define GEN7_MSG_CTL	0x45010
+#define GEN7_MSG_CTL	_MMIO(0x45010)
 #define  WAIT_FOR_PCH_RESET_ACK		(1<<1)
 #define  WAIT_FOR_PCH_FLR_ACK		(1<<0)
-#define HSW_NDE_RSTWRN_OPT	0x46408
+#define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1<<4)
 
-#define SKL_DFSM			0x51000
+#define SKL_DFSM			_MMIO(0x51000)
 #define SKL_DFSM_CDCLK_LIMIT_MASK	(3 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_675	(0 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_540	(1 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_450	(2 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_337_5	(3 << 23)
 
-#define FF_SLICE_CS_CHICKEN2			0x20e4
+#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
 
 /* GEN7 chicken */
-#define GEN7_COMMON_SLICE_CHICKEN1		0x7010
+#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE		(1<<14)
-#define COMMON_SLICE_CHICKEN2			0x7014
+#define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
 
-#define HIZ_CHICKEN					0x7018
+#define HIZ_CHICKEN					_MMIO(0x7018)
 # define CHV_HZ_8X8_MODE_IN_1X				(1<<15)
 # define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE	(1<<3)
 
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0		0x7308
+#define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
 #define  DISABLE_PIXEL_MASK_CAMMING		(1<<14)
 
-#define GEN7_L3SQCREG1				0xB010
+#define GEN7_L3SQCREG1				_MMIO(0xB010)
 #define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
-#define GEN8_L3SQCREG1				0xB100
+#define GEN8_L3SQCREG1				_MMIO(0xB100)
 #define  BDW_WA_L3SQCREG1_DEFAULT		0x784000
 
-#define GEN7_L3CNTLREG1				0xB01C
+#define GEN7_L3CNTLREG1				_MMIO(0xB01C)
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
 #define  GEN7_L3AGDIS				(1<<19)
-#define GEN7_L3CNTLREG2				0xB020
-#define GEN7_L3CNTLREG3				0xB024
+#define GEN7_L3CNTLREG2				_MMIO(0xB020)
+#define GEN7_L3CNTLREG3				_MMIO(0xB024)
 
-#define GEN7_L3_CHICKEN_MODE_REGISTER		0xB030
+#define GEN7_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB030)
 #define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
 
-#define GEN7_L3SQCREG4				0xb034
+#define GEN7_L3SQCREG4				_MMIO(0xb034)
 #define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1<<27)
 
-#define GEN8_L3SQCREG4				0xb118
+#define GEN8_L3SQCREG4				_MMIO(0xb118)
 #define  GEN8_LQSC_RO_PERF_DIS			(1<<27)
 #define  GEN8_LQSC_FLUSH_COHERENT_LINES		(1<<21)
 
 /* GEN8 chicken */
-#define HDC_CHICKEN0				0x7300
+#define HDC_CHICKEN0				_MMIO(0x7300)
 #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
@@ -5958,17 +6036,17 @@
 #define  HDC_BARRIER_PERFORMANCE_DISABLE	(1<<10)
 
 /* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0			0x7308
+#define SLICE_ECO_CHICKEN0			_MMIO(0x7308)
 #define   PIXEL_MASK_CAMMING_DISABLE		(1 << 14)
 
 /* WaCatErrorRejectionIssue */
-#define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		0x9030
+#define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		_MMIO(0x9030)
 #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1<<11)
 
-#define HSW_SCRATCH1				0xb038
+#define HSW_SCRATCH1				_MMIO(0xb038)
 #define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1<<27)
 
-#define BDW_SCRATCH1					0xb11c
+#define BDW_SCRATCH1					_MMIO(0xb11c)
 #define  GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE	(1<<2)
 
 /* PCH */
@@ -6062,12 +6140,12 @@
 				 SDE_FDI_RXB_CPT | \
 				 SDE_FDI_RXA_CPT)
 
-#define SDEISR  0xc4000
-#define SDEIMR  0xc4004
-#define SDEIIR  0xc4008
-#define SDEIER  0xc400c
+#define SDEISR  _MMIO(0xc4000)
+#define SDEIMR  _MMIO(0xc4004)
+#define SDEIIR  _MMIO(0xc4008)
+#define SDEIER  _MMIO(0xc400c)
 
-#define SERR_INT			0xc4040
+#define SERR_INT			_MMIO(0xc4040)
 #define  SERR_INT_POISON		(1<<31)
 #define  SERR_INT_TRANS_C_FIFO_UNDERRUN	(1<<6)
 #define  SERR_INT_TRANS_B_FIFO_UNDERRUN	(1<<3)
@@ -6075,7 +6153,7 @@
 #define  SERR_INT_TRANS_FIFO_UNDERRUN(pipe)	(1<<((pipe)*3))
 
 /* digital port hotplug */
-#define PCH_PORT_HOTPLUG		0xc4030	/* SHOTPLUG_CTL */
+#define PCH_PORT_HOTPLUG		_MMIO(0xc4030)	/* SHOTPLUG_CTL */
 #define  PORTA_HOTPLUG_ENABLE		(1 << 28) /* LPT:LP+ & BXT */
 #define  PORTA_HOTPLUG_STATUS_MASK	(3 << 24) /* SPT+ & BXT */
 #define  PORTA_HOTPLUG_NO_DETECT	(0 << 24) /* SPT+ & BXT */
@@ -6112,42 +6190,42 @@
 #define  PORTB_HOTPLUG_SHORT_DETECT	(1 << 0)
 #define  PORTB_HOTPLUG_LONG_DETECT	(2 << 0)
 
-#define PCH_PORT_HOTPLUG2		0xc403C	/* SHOTPLUG_CTL2 SPT+ */
+#define PCH_PORT_HOTPLUG2		_MMIO(0xc403C)	/* SHOTPLUG_CTL2 SPT+ */
 #define  PORTE_HOTPLUG_ENABLE		(1 << 4)
 #define  PORTE_HOTPLUG_STATUS_MASK	(3 << 0)
 #define  PORTE_HOTPLUG_NO_DETECT	(0 << 0)
 #define  PORTE_HOTPLUG_SHORT_DETECT	(1 << 0)
 #define  PORTE_HOTPLUG_LONG_DETECT	(2 << 0)
 
-#define PCH_GPIOA               0xc5010
-#define PCH_GPIOB               0xc5014
-#define PCH_GPIOC               0xc5018
-#define PCH_GPIOD               0xc501c
-#define PCH_GPIOE               0xc5020
-#define PCH_GPIOF               0xc5024
+#define PCH_GPIOA               _MMIO(0xc5010)
+#define PCH_GPIOB               _MMIO(0xc5014)
+#define PCH_GPIOC               _MMIO(0xc5018)
+#define PCH_GPIOD               _MMIO(0xc501c)
+#define PCH_GPIOE               _MMIO(0xc5020)
+#define PCH_GPIOF               _MMIO(0xc5024)
 
-#define PCH_GMBUS0		0xc5100
-#define PCH_GMBUS1		0xc5104
-#define PCH_GMBUS2		0xc5108
-#define PCH_GMBUS3		0xc510c
-#define PCH_GMBUS4		0xc5110
-#define PCH_GMBUS5		0xc5120
+#define PCH_GMBUS0		_MMIO(0xc5100)
+#define PCH_GMBUS1		_MMIO(0xc5104)
+#define PCH_GMBUS2		_MMIO(0xc5108)
+#define PCH_GMBUS3		_MMIO(0xc510c)
+#define PCH_GMBUS4		_MMIO(0xc5110)
+#define PCH_GMBUS5		_MMIO(0xc5120)
 
 #define _PCH_DPLL_A              0xc6014
 #define _PCH_DPLL_B              0xc6018
-#define PCH_DPLL(pll) (pll == 0 ? _PCH_DPLL_A : _PCH_DPLL_B)
+#define PCH_DPLL(pll) _MMIO(pll == 0 ? _PCH_DPLL_A : _PCH_DPLL_B)
 
 #define _PCH_FPA0                0xc6040
 #define  FP_CB_TUNE		(0x3<<22)
 #define _PCH_FPA1                0xc6044
 #define _PCH_FPB0                0xc6048
 #define _PCH_FPB1                0xc604c
-#define PCH_FP0(pll) (pll == 0 ? _PCH_FPA0 : _PCH_FPB0)
-#define PCH_FP1(pll) (pll == 0 ? _PCH_FPA1 : _PCH_FPB1)
+#define PCH_FP0(pll) _MMIO(pll == 0 ? _PCH_FPA0 : _PCH_FPB0)
+#define PCH_FP1(pll) _MMIO(pll == 0 ? _PCH_FPA1 : _PCH_FPB1)
 
-#define PCH_DPLL_TEST           0xc606c
+#define PCH_DPLL_TEST           _MMIO(0xc606c)
 
-#define PCH_DREF_CONTROL        0xC6200
+#define PCH_DREF_CONTROL        _MMIO(0xC6200)
 #define  DREF_CONTROL_MASK      0x7fc3
 #define  DREF_CPU_SOURCE_OUTPUT_DISABLE         (0<<13)
 #define  DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD      (2<<13)
@@ -6170,19 +6248,19 @@
 #define  DREF_SSC4_DISABLE                      (0)
 #define  DREF_SSC4_ENABLE                       (1)
 
-#define PCH_RAWCLK_FREQ         0xc6204
+#define PCH_RAWCLK_FREQ         _MMIO(0xc6204)
 #define  FDL_TP1_TIMER_SHIFT    12
 #define  FDL_TP1_TIMER_MASK     (3<<12)
 #define  FDL_TP2_TIMER_SHIFT    10
 #define  FDL_TP2_TIMER_MASK     (3<<10)
 #define  RAWCLK_FREQ_MASK       0x3ff
 
-#define PCH_DPLL_TMR_CFG        0xc6208
+#define PCH_DPLL_TMR_CFG        _MMIO(0xc6208)
 
-#define PCH_SSC4_PARMS          0xc6210
-#define PCH_SSC4_AUX_PARMS      0xc6214
+#define PCH_SSC4_PARMS          _MMIO(0xc6210)
+#define PCH_SSC4_AUX_PARMS      _MMIO(0xc6214)
 
-#define PCH_DPLL_SEL		0xc7000
+#define PCH_DPLL_SEL		_MMIO(0xc7000)
 #define	 TRANS_DPLLB_SEL(pipe)		(1 << ((pipe) * 4))
 #define	 TRANS_DPLLA_SEL(pipe)		0
 #define  TRANS_DPLL_ENABLE(pipe)	(1 << ((pipe) * 4 + 3))
@@ -6230,79 +6308,73 @@
 #define _VIDEO_DIP_DATA_B        0xe1208
 #define _VIDEO_DIP_GCP_B         0xe1210
 
-#define TVIDEO_DIP_CTL(pipe) _PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B)
-#define TVIDEO_DIP_DATA(pipe) _PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
-#define TVIDEO_DIP_GCP(pipe) _PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
+#define TVIDEO_DIP_CTL(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_CTL_A, _VIDEO_DIP_CTL_B)
+#define TVIDEO_DIP_DATA(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_DATA_A, _VIDEO_DIP_DATA_B)
+#define TVIDEO_DIP_GCP(pipe) _MMIO_PIPE(pipe, _VIDEO_DIP_GCP_A, _VIDEO_DIP_GCP_B)
 
 /* Per-transcoder DIP controls (VLV) */
-#define VLV_VIDEO_DIP_CTL_A		(VLV_DISPLAY_BASE + 0x60200)
-#define VLV_VIDEO_DIP_DATA_A		(VLV_DISPLAY_BASE + 0x60208)
-#define VLV_VIDEO_DIP_GDCP_PAYLOAD_A	(VLV_DISPLAY_BASE + 0x60210)
+#define _VLV_VIDEO_DIP_CTL_A		(VLV_DISPLAY_BASE + 0x60200)
+#define _VLV_VIDEO_DIP_DATA_A		(VLV_DISPLAY_BASE + 0x60208)
+#define _VLV_VIDEO_DIP_GDCP_PAYLOAD_A	(VLV_DISPLAY_BASE + 0x60210)
 
-#define VLV_VIDEO_DIP_CTL_B		(VLV_DISPLAY_BASE + 0x61170)
-#define VLV_VIDEO_DIP_DATA_B		(VLV_DISPLAY_BASE + 0x61174)
-#define VLV_VIDEO_DIP_GDCP_PAYLOAD_B	(VLV_DISPLAY_BASE + 0x61178)
+#define _VLV_VIDEO_DIP_CTL_B		(VLV_DISPLAY_BASE + 0x61170)
+#define _VLV_VIDEO_DIP_DATA_B		(VLV_DISPLAY_BASE + 0x61174)
+#define _VLV_VIDEO_DIP_GDCP_PAYLOAD_B	(VLV_DISPLAY_BASE + 0x61178)
 
-#define CHV_VIDEO_DIP_CTL_C		(VLV_DISPLAY_BASE + 0x611f0)
-#define CHV_VIDEO_DIP_DATA_C		(VLV_DISPLAY_BASE + 0x611f4)
-#define CHV_VIDEO_DIP_GDCP_PAYLOAD_C	(VLV_DISPLAY_BASE + 0x611f8)
+#define _CHV_VIDEO_DIP_CTL_C		(VLV_DISPLAY_BASE + 0x611f0)
+#define _CHV_VIDEO_DIP_DATA_C		(VLV_DISPLAY_BASE + 0x611f4)
+#define _CHV_VIDEO_DIP_GDCP_PAYLOAD_C	(VLV_DISPLAY_BASE + 0x611f8)
 
 #define VLV_TVIDEO_DIP_CTL(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_CTL_A, \
-	       VLV_VIDEO_DIP_CTL_B, CHV_VIDEO_DIP_CTL_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_CTL_A, \
+	       _VLV_VIDEO_DIP_CTL_B, _CHV_VIDEO_DIP_CTL_C)
 #define VLV_TVIDEO_DIP_DATA(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_DATA_A, \
-	       VLV_VIDEO_DIP_DATA_B, CHV_VIDEO_DIP_DATA_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_DATA_A, \
+	       _VLV_VIDEO_DIP_DATA_B, _CHV_VIDEO_DIP_DATA_C)
 #define VLV_TVIDEO_DIP_GCP(pipe) \
-	_PIPE3((pipe), VLV_VIDEO_DIP_GDCP_PAYLOAD_A, \
-		VLV_VIDEO_DIP_GDCP_PAYLOAD_B, CHV_VIDEO_DIP_GDCP_PAYLOAD_C)
+	_MMIO_PIPE3((pipe), _VLV_VIDEO_DIP_GDCP_PAYLOAD_A, \
+		_VLV_VIDEO_DIP_GDCP_PAYLOAD_B, _CHV_VIDEO_DIP_GDCP_PAYLOAD_C)
 
 /* Haswell DIP controls */
-#define HSW_VIDEO_DIP_CTL_A		0x60200
-#define HSW_VIDEO_DIP_AVI_DATA_A	0x60220
-#define HSW_VIDEO_DIP_VS_DATA_A		0x60260
-#define HSW_VIDEO_DIP_SPD_DATA_A	0x602A0
-#define HSW_VIDEO_DIP_GMP_DATA_A	0x602E0
-#define HSW_VIDEO_DIP_VSC_DATA_A	0x60320
-#define HSW_VIDEO_DIP_AVI_ECC_A		0x60240
-#define HSW_VIDEO_DIP_VS_ECC_A		0x60280
-#define HSW_VIDEO_DIP_SPD_ECC_A		0x602C0
-#define HSW_VIDEO_DIP_GMP_ECC_A		0x60300
-#define HSW_VIDEO_DIP_VSC_ECC_A		0x60344
-#define HSW_VIDEO_DIP_GCP_A		0x60210
 
-#define HSW_VIDEO_DIP_CTL_B		0x61200
-#define HSW_VIDEO_DIP_AVI_DATA_B	0x61220
-#define HSW_VIDEO_DIP_VS_DATA_B		0x61260
-#define HSW_VIDEO_DIP_SPD_DATA_B	0x612A0
-#define HSW_VIDEO_DIP_GMP_DATA_B	0x612E0
-#define HSW_VIDEO_DIP_VSC_DATA_B	0x61320
-#define HSW_VIDEO_DIP_BVI_ECC_B		0x61240
-#define HSW_VIDEO_DIP_VS_ECC_B		0x61280
-#define HSW_VIDEO_DIP_SPD_ECC_B		0x612C0
-#define HSW_VIDEO_DIP_GMP_ECC_B		0x61300
-#define HSW_VIDEO_DIP_VSC_ECC_B		0x61344
-#define HSW_VIDEO_DIP_GCP_B		0x61210
+#define _HSW_VIDEO_DIP_CTL_A		0x60200
+#define _HSW_VIDEO_DIP_AVI_DATA_A	0x60220
+#define _HSW_VIDEO_DIP_VS_DATA_A	0x60260
+#define _HSW_VIDEO_DIP_SPD_DATA_A	0x602A0
+#define _HSW_VIDEO_DIP_GMP_DATA_A	0x602E0
+#define _HSW_VIDEO_DIP_VSC_DATA_A	0x60320
+#define _HSW_VIDEO_DIP_AVI_ECC_A	0x60240
+#define _HSW_VIDEO_DIP_VS_ECC_A		0x60280
+#define _HSW_VIDEO_DIP_SPD_ECC_A	0x602C0
+#define _HSW_VIDEO_DIP_GMP_ECC_A	0x60300
+#define _HSW_VIDEO_DIP_VSC_ECC_A	0x60344
+#define _HSW_VIDEO_DIP_GCP_A		0x60210
 
-#define HSW_TVIDEO_DIP_CTL(trans) \
-	 _TRANSCODER2(trans, HSW_VIDEO_DIP_CTL_A)
-#define HSW_TVIDEO_DIP_AVI_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_AVI_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_VS_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_VS_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_SPD_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_SPD_DATA_A) + (i) * 4)
-#define HSW_TVIDEO_DIP_GCP(trans) \
-	_TRANSCODER2(trans, HSW_VIDEO_DIP_GCP_A)
-#define HSW_TVIDEO_DIP_VSC_DATA(trans, i) \
-	(_TRANSCODER2(trans, HSW_VIDEO_DIP_VSC_DATA_A) + (i) * 4)
+#define _HSW_VIDEO_DIP_CTL_B		0x61200
+#define _HSW_VIDEO_DIP_AVI_DATA_B	0x61220
+#define _HSW_VIDEO_DIP_VS_DATA_B	0x61260
+#define _HSW_VIDEO_DIP_SPD_DATA_B	0x612A0
+#define _HSW_VIDEO_DIP_GMP_DATA_B	0x612E0
+#define _HSW_VIDEO_DIP_VSC_DATA_B	0x61320
+#define _HSW_VIDEO_DIP_BVI_ECC_B	0x61240
+#define _HSW_VIDEO_DIP_VS_ECC_B		0x61280
+#define _HSW_VIDEO_DIP_SPD_ECC_B	0x612C0
+#define _HSW_VIDEO_DIP_GMP_ECC_B	0x61300
+#define _HSW_VIDEO_DIP_VSC_ECC_B	0x61344
+#define _HSW_VIDEO_DIP_GCP_B		0x61210
 
-#define HSW_STEREO_3D_CTL_A	0x70020
-#define   S3D_ENABLE		(1<<31)
-#define HSW_STEREO_3D_CTL_B	0x71020
+#define HSW_TVIDEO_DIP_CTL(trans)		_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_CTL_A)
+#define HSW_TVIDEO_DIP_AVI_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_AVI_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_VS_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_VS_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_SPD_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_SPD_DATA_A + (i) * 4)
+#define HSW_TVIDEO_DIP_GCP(trans)		_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_GCP_A)
+#define HSW_TVIDEO_DIP_VSC_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_VSC_DATA_A + (i) * 4)
 
-#define HSW_STEREO_3D_CTL(trans) \
-	_PIPE2(trans, HSW_STEREO_3D_CTL_A)
+#define _HSW_STEREO_3D_CTL_A		0x70020
+#define   S3D_ENABLE			(1<<31)
+#define _HSW_STEREO_3D_CTL_B		0x71020
+
+#define HSW_STEREO_3D_CTL(trans)	_MMIO_PIPE2(trans, _HSW_STEREO_3D_CTL_A)
 
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HBLANK_B          0xe1004
@@ -6310,16 +6382,15 @@
 #define _PCH_TRANS_VTOTAL_B          0xe100c
 #define _PCH_TRANS_VBLANK_B          0xe1010
 #define _PCH_TRANS_VSYNC_B           0xe1014
-#define _PCH_TRANS_VSYNCSHIFT_B	 0xe1028
+#define _PCH_TRANS_VSYNCSHIFT_B 0xe1028
 
-#define PCH_TRANS_HTOTAL(pipe) _PIPE(pipe, _PCH_TRANS_HTOTAL_A, _PCH_TRANS_HTOTAL_B)
-#define PCH_TRANS_HBLANK(pipe) _PIPE(pipe, _PCH_TRANS_HBLANK_A, _PCH_TRANS_HBLANK_B)
-#define PCH_TRANS_HSYNC(pipe) _PIPE(pipe, _PCH_TRANS_HSYNC_A, _PCH_TRANS_HSYNC_B)
-#define PCH_TRANS_VTOTAL(pipe) _PIPE(pipe, _PCH_TRANS_VTOTAL_A, _PCH_TRANS_VTOTAL_B)
-#define PCH_TRANS_VBLANK(pipe) _PIPE(pipe, _PCH_TRANS_VBLANK_A, _PCH_TRANS_VBLANK_B)
-#define PCH_TRANS_VSYNC(pipe) _PIPE(pipe, _PCH_TRANS_VSYNC_A, _PCH_TRANS_VSYNC_B)
-#define PCH_TRANS_VSYNCSHIFT(pipe) _PIPE(pipe, _PCH_TRANS_VSYNCSHIFT_A, \
-					 _PCH_TRANS_VSYNCSHIFT_B)
+#define PCH_TRANS_HTOTAL(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HTOTAL_A, _PCH_TRANS_HTOTAL_B)
+#define PCH_TRANS_HBLANK(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HBLANK_A, _PCH_TRANS_HBLANK_B)
+#define PCH_TRANS_HSYNC(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_HSYNC_A, _PCH_TRANS_HSYNC_B)
+#define PCH_TRANS_VTOTAL(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VTOTAL_A, _PCH_TRANS_VTOTAL_B)
+#define PCH_TRANS_VBLANK(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VBLANK_A, _PCH_TRANS_VBLANK_B)
+#define PCH_TRANS_VSYNC(pipe)		_MMIO_PIPE(pipe, _PCH_TRANS_VSYNC_A, _PCH_TRANS_VSYNC_B)
+#define PCH_TRANS_VSYNCSHIFT(pipe)	_MMIO_PIPE(pipe, _PCH_TRANS_VSYNCSHIFT_A, _PCH_TRANS_VSYNCSHIFT_B)
 
 #define _PCH_TRANSB_DATA_M1	0xe1030
 #define _PCH_TRANSB_DATA_N1	0xe1034
@@ -6330,19 +6401,19 @@
 #define _PCH_TRANSB_LINK_M2	0xe1048
 #define _PCH_TRANSB_LINK_N2	0xe104c
 
-#define PCH_TRANS_DATA_M1(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_M1, _PCH_TRANSB_DATA_M1)
-#define PCH_TRANS_DATA_N1(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_N1, _PCH_TRANSB_DATA_N1)
-#define PCH_TRANS_DATA_M2(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_M2, _PCH_TRANSB_DATA_M2)
-#define PCH_TRANS_DATA_N2(pipe) _PIPE(pipe, _PCH_TRANSA_DATA_N2, _PCH_TRANSB_DATA_N2)
-#define PCH_TRANS_LINK_M1(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_M1, _PCH_TRANSB_LINK_M1)
-#define PCH_TRANS_LINK_N1(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_N1, _PCH_TRANSB_LINK_N1)
-#define PCH_TRANS_LINK_M2(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_M2, _PCH_TRANSB_LINK_M2)
-#define PCH_TRANS_LINK_N2(pipe) _PIPE(pipe, _PCH_TRANSA_LINK_N2, _PCH_TRANSB_LINK_N2)
+#define PCH_TRANS_DATA_M1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_M1, _PCH_TRANSB_DATA_M1)
+#define PCH_TRANS_DATA_N1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_N1, _PCH_TRANSB_DATA_N1)
+#define PCH_TRANS_DATA_M2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_M2, _PCH_TRANSB_DATA_M2)
+#define PCH_TRANS_DATA_N2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_DATA_N2, _PCH_TRANSB_DATA_N2)
+#define PCH_TRANS_LINK_M1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_M1, _PCH_TRANSB_LINK_M1)
+#define PCH_TRANS_LINK_N1(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_N1, _PCH_TRANSB_LINK_N1)
+#define PCH_TRANS_LINK_M2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_M2, _PCH_TRANSB_LINK_M2)
+#define PCH_TRANS_LINK_N2(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSA_LINK_N2, _PCH_TRANSB_LINK_N2)
 
 #define _PCH_TRANSACONF              0xf0008
 #define _PCH_TRANSBCONF              0xf1008
-#define PCH_TRANSCONF(pipe) _PIPE(pipe, _PCH_TRANSACONF, _PCH_TRANSBCONF)
-#define LPT_TRANSCONF		_PCH_TRANSACONF /* lpt has only one transcoder */
+#define PCH_TRANSCONF(pipe)	_MMIO_PIPE(pipe, _PCH_TRANSACONF, _PCH_TRANSBCONF)
+#define LPT_TRANSCONF		PCH_TRANSCONF(PIPE_A) /* lpt has only one transcoder */
 #define  TRANS_DISABLE          (0<<31)
 #define  TRANS_ENABLE           (1<<31)
 #define  TRANS_STATE_MASK       (1<<30)
@@ -6363,47 +6434,47 @@
 
 #define _TRANSA_CHICKEN1	 0xf0060
 #define _TRANSB_CHICKEN1	 0xf1060
-#define TRANS_CHICKEN1(pipe) _PIPE(pipe, _TRANSA_CHICKEN1, _TRANSB_CHICKEN1)
+#define TRANS_CHICKEN1(pipe)	_MMIO_PIPE(pipe, _TRANSA_CHICKEN1, _TRANSB_CHICKEN1)
 #define  TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE	(1<<10)
 #define  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE	(1<<4)
 #define _TRANSA_CHICKEN2	 0xf0064
 #define _TRANSB_CHICKEN2	 0xf1064
-#define TRANS_CHICKEN2(pipe) _PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
+#define TRANS_CHICKEN2(pipe)	_MMIO_PIPE(pipe, _TRANSA_CHICKEN2, _TRANSB_CHICKEN2)
 #define  TRANS_CHICKEN2_TIMING_OVERRIDE			(1<<31)
 #define  TRANS_CHICKEN2_FDI_POLARITY_REVERSED		(1<<29)
 #define  TRANS_CHICKEN2_FRAME_START_DELAY_MASK		(3<<27)
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER	(1<<26)
 #define  TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH	(1<<25)
 
-#define SOUTH_CHICKEN1		0xc2000
+#define SOUTH_CHICKEN1		_MMIO(0xc2000)
 #define  FDIA_PHASE_SYNC_SHIFT_OVR	19
 #define  FDIA_PHASE_SYNC_SHIFT_EN	18
 #define  FDI_PHASE_SYNC_OVR(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_OVR - ((pipe) * 2)))
 #define  FDI_PHASE_SYNC_EN(pipe) (1<<(FDIA_PHASE_SYNC_SHIFT_EN - ((pipe) * 2)))
 #define  FDI_BC_BIFURCATION_SELECT	(1 << 12)
 #define  SPT_PWM_GRANULARITY		(1<<0)
-#define SOUTH_CHICKEN2		0xc2004
+#define SOUTH_CHICKEN2		_MMIO(0xc2004)
 #define  FDI_MPHY_IOSFSB_RESET_STATUS	(1<<13)
 #define  FDI_MPHY_IOSFSB_RESET_CTL	(1<<12)
 #define  LPT_PWM_GRANULARITY		(1<<5)
 #define  DPLS_EDP_PPS_FIX_DIS		(1<<0)
 
-#define _FDI_RXA_CHICKEN         0xc200c
-#define _FDI_RXB_CHICKEN         0xc2010
+#define _FDI_RXA_CHICKEN        0xc200c
+#define _FDI_RXB_CHICKEN        0xc2010
 #define  FDI_RX_PHASE_SYNC_POINTER_OVR	(1<<1)
 #define  FDI_RX_PHASE_SYNC_POINTER_EN	(1<<0)
-#define FDI_RX_CHICKEN(pipe) _PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
+#define FDI_RX_CHICKEN(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
 
-#define SOUTH_DSPCLK_GATE_D	0xc2020
+#define SOUTH_DSPCLK_GATE_D	_MMIO(0xc2020)
 #define  PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30)
 #define  PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29)
 #define  PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14)
 #define  PCH_LP_PARTITION_LEVEL_DISABLE  (1<<12)
 
 /* CPU: FDI_TX */
-#define _FDI_TXA_CTL             0x60100
-#define _FDI_TXB_CTL             0x61100
-#define FDI_TX_CTL(pipe) _PIPE(pipe, _FDI_TXA_CTL, _FDI_TXB_CTL)
+#define _FDI_TXA_CTL            0x60100
+#define _FDI_TXB_CTL            0x61100
+#define FDI_TX_CTL(pipe)	_MMIO_PIPE(pipe, _FDI_TXA_CTL, _FDI_TXB_CTL)
 #define  FDI_TX_DISABLE         (0<<31)
 #define  FDI_TX_ENABLE          (1<<31)
 #define  FDI_LINK_TRAIN_PATTERN_1       (0<<28)
@@ -6453,7 +6524,7 @@
 /* FDI_RX, FDI_X is hard-wired to Transcoder_X */
 #define _FDI_RXA_CTL             0xf000c
 #define _FDI_RXB_CTL             0xf100c
-#define FDI_RX_CTL(pipe) _PIPE(pipe, _FDI_RXA_CTL, _FDI_RXB_CTL)
+#define FDI_RX_CTL(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_CTL, _FDI_RXB_CTL)
 #define  FDI_RX_ENABLE          (1<<31)
 /* train, dp width same as FDI_TX */
 #define  FDI_FS_ERRC_ENABLE		(1<<27)
@@ -6489,14 +6560,14 @@
 #define  FDI_RX_TP1_TO_TP2_48		(2<<20)
 #define  FDI_RX_TP1_TO_TP2_64		(3<<20)
 #define  FDI_RX_FDI_DELAY_90		(0x90<<0)
-#define FDI_RX_MISC(pipe) _PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
+#define FDI_RX_MISC(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_MISC, _FDI_RXB_MISC)
 
-#define _FDI_RXA_TUSIZE1         0xf0030
-#define _FDI_RXA_TUSIZE2         0xf0038
-#define _FDI_RXB_TUSIZE1         0xf1030
-#define _FDI_RXB_TUSIZE2         0xf1038
-#define FDI_RX_TUSIZE1(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
-#define FDI_RX_TUSIZE2(pipe) _PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
+#define _FDI_RXA_TUSIZE1        0xf0030
+#define _FDI_RXA_TUSIZE2        0xf0038
+#define _FDI_RXB_TUSIZE1        0xf1030
+#define _FDI_RXB_TUSIZE2        0xf1038
+#define FDI_RX_TUSIZE1(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_TUSIZE1, _FDI_RXB_TUSIZE1)
+#define FDI_RX_TUSIZE2(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_TUSIZE2, _FDI_RXB_TUSIZE2)
 
 /* FDI_RX interrupt register format */
 #define FDI_RX_INTER_LANE_ALIGN         (1<<10)
@@ -6511,44 +6582,41 @@
 #define FDI_RX_CROSS_CLOCK_OVERFLOW     (1<<1)
 #define FDI_RX_SYMBOL_QUEUE_OVERFLOW    (1<<0)
 
-#define _FDI_RXA_IIR             0xf0014
-#define _FDI_RXA_IMR             0xf0018
-#define _FDI_RXB_IIR             0xf1014
-#define _FDI_RXB_IMR             0xf1018
-#define FDI_RX_IIR(pipe) _PIPE(pipe, _FDI_RXA_IIR, _FDI_RXB_IIR)
-#define FDI_RX_IMR(pipe) _PIPE(pipe, _FDI_RXA_IMR, _FDI_RXB_IMR)
+#define _FDI_RXA_IIR            0xf0014
+#define _FDI_RXA_IMR            0xf0018
+#define _FDI_RXB_IIR            0xf1014
+#define _FDI_RXB_IMR            0xf1018
+#define FDI_RX_IIR(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_IIR, _FDI_RXB_IIR)
+#define FDI_RX_IMR(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_IMR, _FDI_RXB_IMR)
 
-#define FDI_PLL_CTL_1           0xfe000
-#define FDI_PLL_CTL_2           0xfe004
+#define FDI_PLL_CTL_1           _MMIO(0xfe000)
+#define FDI_PLL_CTL_2           _MMIO(0xfe004)
 
-#define PCH_LVDS	0xe1180
+#define PCH_LVDS	_MMIO(0xe1180)
 #define  LVDS_DETECTED	(1 << 1)
 
 /* vlv has 2 sets of panel control regs. */
-#define PIPEA_PP_STATUS         (VLV_DISPLAY_BASE + 0x61200)
-#define PIPEA_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61204)
-#define PIPEA_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61208)
+#define _PIPEA_PP_STATUS         (VLV_DISPLAY_BASE + 0x61200)
+#define _PIPEA_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61204)
+#define _PIPEA_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61208)
 #define  PANEL_PORT_SELECT_VLV(port)	((port) << 30)
-#define PIPEA_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6120c)
-#define PIPEA_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61210)
+#define _PIPEA_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6120c)
+#define _PIPEA_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61210)
 
-#define PIPEB_PP_STATUS         (VLV_DISPLAY_BASE + 0x61300)
-#define PIPEB_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61304)
-#define PIPEB_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61308)
-#define PIPEB_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6130c)
-#define PIPEB_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61310)
+#define _PIPEB_PP_STATUS         (VLV_DISPLAY_BASE + 0x61300)
+#define _PIPEB_PP_CONTROL        (VLV_DISPLAY_BASE + 0x61304)
+#define _PIPEB_PP_ON_DELAYS      (VLV_DISPLAY_BASE + 0x61308)
+#define _PIPEB_PP_OFF_DELAYS     (VLV_DISPLAY_BASE + 0x6130c)
+#define _PIPEB_PP_DIVISOR        (VLV_DISPLAY_BASE + 0x61310)
 
-#define VLV_PIPE_PP_STATUS(pipe) _PIPE(pipe, PIPEA_PP_STATUS, PIPEB_PP_STATUS)
-#define VLV_PIPE_PP_CONTROL(pipe) _PIPE(pipe, PIPEA_PP_CONTROL, PIPEB_PP_CONTROL)
-#define VLV_PIPE_PP_ON_DELAYS(pipe) \
-		_PIPE(pipe, PIPEA_PP_ON_DELAYS, PIPEB_PP_ON_DELAYS)
-#define VLV_PIPE_PP_OFF_DELAYS(pipe) \
-		_PIPE(pipe, PIPEA_PP_OFF_DELAYS, PIPEB_PP_OFF_DELAYS)
-#define VLV_PIPE_PP_DIVISOR(pipe) \
-		_PIPE(pipe, PIPEA_PP_DIVISOR, PIPEB_PP_DIVISOR)
+#define VLV_PIPE_PP_STATUS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS)
+#define VLV_PIPE_PP_CONTROL(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL)
+#define VLV_PIPE_PP_ON_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS)
+#define VLV_PIPE_PP_OFF_DELAYS(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS)
+#define VLV_PIPE_PP_DIVISOR(pipe)	_MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR)
 
-#define PCH_PP_STATUS		0xc7200
-#define PCH_PP_CONTROL		0xc7204
+#define _PCH_PP_STATUS		0xc7200
+#define _PCH_PP_CONTROL		0xc7204
 #define  PANEL_UNLOCK_REGS	(0xabcd << 16)
 #define  PANEL_UNLOCK_MASK	(0xffff << 16)
 #define  BXT_POWER_CYCLE_DELAY_MASK	(0x1f0)
@@ -6558,7 +6626,7 @@
 #define  PANEL_POWER_RESET	(1 << 1)
 #define  PANEL_POWER_OFF	(0 << 0)
 #define  PANEL_POWER_ON		(1 << 0)
-#define PCH_PP_ON_DELAYS	0xc7208
+#define _PCH_PP_ON_DELAYS	0xc7208
 #define  PANEL_PORT_SELECT_MASK	(3 << 30)
 #define  PANEL_PORT_SELECT_LVDS	(0 << 30)
 #define  PANEL_PORT_SELECT_DPA	(1 << 30)
@@ -6569,52 +6637,64 @@
 #define  PANEL_LIGHT_ON_DELAY_MASK	(0x1fff)
 #define  PANEL_LIGHT_ON_DELAY_SHIFT	0
 
-#define PCH_PP_OFF_DELAYS	0xc720c
+#define _PCH_PP_OFF_DELAYS		0xc720c
 #define  PANEL_POWER_DOWN_DELAY_MASK	(0x1fff0000)
 #define  PANEL_POWER_DOWN_DELAY_SHIFT	16
 #define  PANEL_LIGHT_OFF_DELAY_MASK	(0x1fff)
 #define  PANEL_LIGHT_OFF_DELAY_SHIFT	0
 
-#define PCH_PP_DIVISOR		0xc7210
+#define _PCH_PP_DIVISOR			0xc7210
 #define  PP_REFERENCE_DIVIDER_MASK	(0xffffff00)
 #define  PP_REFERENCE_DIVIDER_SHIFT	8
 #define  PANEL_POWER_CYCLE_DELAY_MASK	(0x1f)
 #define  PANEL_POWER_CYCLE_DELAY_SHIFT	0
 
+#define PCH_PP_STATUS			_MMIO(_PCH_PP_STATUS)
+#define PCH_PP_CONTROL			_MMIO(_PCH_PP_CONTROL)
+#define PCH_PP_ON_DELAYS		_MMIO(_PCH_PP_ON_DELAYS)
+#define PCH_PP_OFF_DELAYS		_MMIO(_PCH_PP_OFF_DELAYS)
+#define PCH_PP_DIVISOR			_MMIO(_PCH_PP_DIVISOR)
+
 /* BXT PPS changes - 2nd set of PPS registers */
 #define _BXT_PP_STATUS2 	0xc7300
 #define _BXT_PP_CONTROL2 	0xc7304
 #define _BXT_PP_ON_DELAYS2	0xc7308
 #define _BXT_PP_OFF_DELAYS2	0xc730c
 
-#define BXT_PP_STATUS(n)	_PIPE(n, PCH_PP_STATUS, _BXT_PP_STATUS2)
-#define BXT_PP_CONTROL(n)	_PIPE(n, PCH_PP_CONTROL, _BXT_PP_CONTROL2)
-#define BXT_PP_ON_DELAYS(n)	_PIPE(n, PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
-#define BXT_PP_OFF_DELAYS(n)	_PIPE(n, PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
+#define BXT_PP_STATUS(n)	_MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2)
+#define BXT_PP_CONTROL(n)	_MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2)
+#define BXT_PP_ON_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2)
+#define BXT_PP_OFF_DELAYS(n)	_MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2)
 
-#define PCH_DP_B		0xe4100
-#define PCH_DPB_AUX_CH_CTL	0xe4110
-#define PCH_DPB_AUX_CH_DATA1	0xe4114
-#define PCH_DPB_AUX_CH_DATA2	0xe4118
-#define PCH_DPB_AUX_CH_DATA3	0xe411c
-#define PCH_DPB_AUX_CH_DATA4	0xe4120
-#define PCH_DPB_AUX_CH_DATA5	0xe4124
+#define _PCH_DP_B		0xe4100
+#define PCH_DP_B		_MMIO(_PCH_DP_B)
+#define _PCH_DPB_AUX_CH_CTL	0xe4110
+#define _PCH_DPB_AUX_CH_DATA1	0xe4114
+#define _PCH_DPB_AUX_CH_DATA2	0xe4118
+#define _PCH_DPB_AUX_CH_DATA3	0xe411c
+#define _PCH_DPB_AUX_CH_DATA4	0xe4120
+#define _PCH_DPB_AUX_CH_DATA5	0xe4124
 
-#define PCH_DP_C		0xe4200
-#define PCH_DPC_AUX_CH_CTL	0xe4210
-#define PCH_DPC_AUX_CH_DATA1	0xe4214
-#define PCH_DPC_AUX_CH_DATA2	0xe4218
-#define PCH_DPC_AUX_CH_DATA3	0xe421c
-#define PCH_DPC_AUX_CH_DATA4	0xe4220
-#define PCH_DPC_AUX_CH_DATA5	0xe4224
+#define _PCH_DP_C		0xe4200
+#define PCH_DP_C		_MMIO(_PCH_DP_C)
+#define _PCH_DPC_AUX_CH_CTL	0xe4210
+#define _PCH_DPC_AUX_CH_DATA1	0xe4214
+#define _PCH_DPC_AUX_CH_DATA2	0xe4218
+#define _PCH_DPC_AUX_CH_DATA3	0xe421c
+#define _PCH_DPC_AUX_CH_DATA4	0xe4220
+#define _PCH_DPC_AUX_CH_DATA5	0xe4224
 
-#define PCH_DP_D		0xe4300
-#define PCH_DPD_AUX_CH_CTL	0xe4310
-#define PCH_DPD_AUX_CH_DATA1	0xe4314
-#define PCH_DPD_AUX_CH_DATA2	0xe4318
-#define PCH_DPD_AUX_CH_DATA3	0xe431c
-#define PCH_DPD_AUX_CH_DATA4	0xe4320
-#define PCH_DPD_AUX_CH_DATA5	0xe4324
+#define _PCH_DP_D		0xe4300
+#define PCH_DP_D		_MMIO(_PCH_DP_D)
+#define _PCH_DPD_AUX_CH_CTL	0xe4310
+#define _PCH_DPD_AUX_CH_DATA1	0xe4314
+#define _PCH_DPD_AUX_CH_DATA2	0xe4318
+#define _PCH_DPD_AUX_CH_DATA3	0xe431c
+#define _PCH_DPD_AUX_CH_DATA4	0xe4320
+#define _PCH_DPD_AUX_CH_DATA5	0xe4324
+
+#define PCH_DP_AUX_CH_CTL(port)		_MMIO_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_CTL, _PCH_DPC_AUX_CH_CTL)
+#define PCH_DP_AUX_CH_DATA(port, i)	_MMIO(_PORT((port) - PORT_B, _PCH_DPB_AUX_CH_DATA1, _PCH_DPC_AUX_CH_DATA1) + (i) * 4) /* 5 registers */
 
 /* CPT */
 #define  PORT_TRANS_A_SEL_CPT	0
@@ -6627,10 +6707,10 @@
 #define  SDVO_PORT_TO_PIPE_CHV(val)	(((val) & (3<<24)) >> 24)
 #define  DP_PORT_TO_PIPE_CHV(val)	(((val) & (3<<16)) >> 16)
 
-#define TRANS_DP_CTL_A		0xe0300
-#define TRANS_DP_CTL_B		0xe1300
-#define TRANS_DP_CTL_C		0xe2300
-#define TRANS_DP_CTL(pipe)	_PIPE(pipe, TRANS_DP_CTL_A, TRANS_DP_CTL_B)
+#define _TRANS_DP_CTL_A		0xe0300
+#define _TRANS_DP_CTL_B		0xe1300
+#define _TRANS_DP_CTL_C		0xe2300
+#define TRANS_DP_CTL(pipe)	_MMIO_PIPE(pipe, _TRANS_DP_CTL_A, _TRANS_DP_CTL_B)
 #define  TRANS_DP_OUTPUT_ENABLE	(1<<31)
 #define  TRANS_DP_PORT_SEL_B	(0<<29)
 #define  TRANS_DP_PORT_SEL_C	(1<<29)
@@ -6683,40 +6763,40 @@
 
 #define  EDP_LINK_TRAIN_VOL_EMP_MASK_IVB	(0x3f<<22)
 
-#define  VLV_PMWGICZ				0x1300a4
+#define  VLV_PMWGICZ				_MMIO(0x1300a4)
 
-#define  FORCEWAKE				0xA18C
-#define  FORCEWAKE_VLV				0x1300b0
-#define  FORCEWAKE_ACK_VLV			0x1300b4
-#define  FORCEWAKE_MEDIA_VLV			0x1300b8
-#define  FORCEWAKE_ACK_MEDIA_VLV		0x1300bc
-#define  FORCEWAKE_ACK_HSW			0x130044
-#define  FORCEWAKE_ACK				0x130090
-#define  VLV_GTLC_WAKE_CTRL			0x130090
+#define  FORCEWAKE				_MMIO(0xA18C)
+#define  FORCEWAKE_VLV				_MMIO(0x1300b0)
+#define  FORCEWAKE_ACK_VLV			_MMIO(0x1300b4)
+#define  FORCEWAKE_MEDIA_VLV			_MMIO(0x1300b8)
+#define  FORCEWAKE_ACK_MEDIA_VLV		_MMIO(0x1300bc)
+#define  FORCEWAKE_ACK_HSW			_MMIO(0x130044)
+#define  FORCEWAKE_ACK				_MMIO(0x130090)
+#define  VLV_GTLC_WAKE_CTRL			_MMIO(0x130090)
 #define   VLV_GTLC_RENDER_CTX_EXISTS		(1 << 25)
 #define   VLV_GTLC_MEDIA_CTX_EXISTS		(1 << 24)
 #define   VLV_GTLC_ALLOWWAKEREQ			(1 << 0)
 
-#define  VLV_GTLC_PW_STATUS			0x130094
+#define  VLV_GTLC_PW_STATUS			_MMIO(0x130094)
 #define   VLV_GTLC_ALLOWWAKEACK			(1 << 0)
 #define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
 #define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
 #define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
-#define  FORCEWAKE_MT				0xa188 /* multi-threaded */
-#define  FORCEWAKE_MEDIA_GEN9			0xa270
-#define  FORCEWAKE_RENDER_GEN9			0xa278
-#define  FORCEWAKE_BLITTER_GEN9			0xa188
-#define  FORCEWAKE_ACK_MEDIA_GEN9		0x0D88
-#define  FORCEWAKE_ACK_RENDER_GEN9		0x0D84
-#define  FORCEWAKE_ACK_BLITTER_GEN9		0x130044
+#define  FORCEWAKE_MT				_MMIO(0xa188) /* multi-threaded */
+#define  FORCEWAKE_MEDIA_GEN9			_MMIO(0xa270)
+#define  FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
+#define  FORCEWAKE_BLITTER_GEN9			_MMIO(0xa188)
+#define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
+#define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
+#define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
-#define  FORCEWAKE_MT_ACK			0x130040
-#define  ECOBUS					0xa180
+#define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
+#define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
-#define  VLV_SPAREG2H				0xA194
+#define  VLV_SPAREG2H				_MMIO(0xA194)
 
-#define  GTFIFODBG				0x120000
+#define  GTFIFODBG				_MMIO(0x120000)
 #define    GT_FIFO_SBDROPERR			(1<<6)
 #define    GT_FIFO_BLOBDROPERR			(1<<5)
 #define    GT_FIFO_SB_READ_ABORTERR		(1<<4)
@@ -6725,23 +6805,23 @@
 #define    GT_FIFO_IAWRERR			(1<<1)
 #define    GT_FIFO_IARDERR			(1<<0)
 
-#define  GTFIFOCTL				0x120008
+#define  GTFIFOCTL				_MMIO(0x120008)
 #define    GT_FIFO_FREE_ENTRIES_MASK		0x7f
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 #define    GT_FIFO_CTL_BLOCK_ALL_POLICY_STALL	(1 << 12)
 #define    GT_FIFO_CTL_RC6_POLICY_STALL		(1 << 11)
 
-#define  HSW_IDICR				0x9008
+#define  HSW_IDICR				_MMIO(0x9008)
 #define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
-#define  HSW_EDRAM_PRESENT			0x120010
+#define  HSW_EDRAM_PRESENT			_MMIO(0x120010)
 #define    EDRAM_ENABLED			0x1
 
-#define GEN6_UCGCTL1				0x9400
+#define GEN6_UCGCTL1				_MMIO(0x9400)
 # define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
 
-#define GEN6_UCGCTL2				0x9404
+#define GEN6_UCGCTL2				_MMIO(0x9404)
 # define GEN6_VFUNIT_CLOCK_GATE_DISABLE			(1 << 31)
 # define GEN7_VDSUNIT_CLOCK_GATE_DISABLE		(1 << 30)
 # define GEN7_TDLUNIT_CLOCK_GATE_DISABLE		(1 << 22)
@@ -6749,30 +6829,30 @@
 # define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE		(1 << 12)
 # define GEN6_RCCUNIT_CLOCK_GATE_DISABLE		(1 << 11)
 
-#define GEN6_UCGCTL3				0x9408
+#define GEN6_UCGCTL3				_MMIO(0x9408)
 
-#define GEN7_UCGCTL4				0x940c
+#define GEN7_UCGCTL4				_MMIO(0x940c)
 #define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1<<25)
 
-#define GEN6_RCGCTL1				0x9410
-#define GEN6_RCGCTL2				0x9414
-#define GEN6_RSTCTL				0x9420
+#define GEN6_RCGCTL1				_MMIO(0x9410)
+#define GEN6_RCGCTL2				_MMIO(0x9414)
+#define GEN6_RSTCTL				_MMIO(0x9420)
 
-#define GEN8_UCGCTL6				0x9430
+#define GEN8_UCGCTL6				_MMIO(0x9430)
 #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE	(1<<24)
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE	(1<<14)
 #define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1<<28)
 
-#define GEN6_GFXPAUSE				0xA000
-#define GEN6_RPNSWREQ				0xA008
+#define GEN6_GFXPAUSE				_MMIO(0xA000)
+#define GEN6_RPNSWREQ				_MMIO(0xA008)
 #define   GEN6_TURBO_DISABLE			(1<<31)
 #define   GEN6_FREQUENCY(x)			((x)<<25)
 #define   HSW_FREQUENCY(x)			((x)<<24)
 #define   GEN9_FREQUENCY(x)			((x)<<23)
 #define   GEN6_OFFSET(x)			((x)<<19)
 #define   GEN6_AGGRESSIVE_TURBO			(0<<15)
-#define GEN6_RC_VIDEO_FREQ			0xA00C
-#define GEN6_RC_CONTROL				0xA090
+#define GEN6_RC_VIDEO_FREQ			_MMIO(0xA00C)
+#define GEN6_RC_CONTROL				_MMIO(0xA090)
 #define   GEN6_RC_CTL_RC6pp_ENABLE		(1<<16)
 #define   GEN6_RC_CTL_RC6p_ENABLE		(1<<17)
 #define   GEN6_RC_CTL_RC6_ENABLE		(1<<18)
@@ -6782,16 +6862,16 @@
 #define   GEN7_RC_CTL_TO_MODE			(1<<28)
 #define   GEN6_RC_CTL_EI_MODE(x)		((x)<<27)
 #define   GEN6_RC_CTL_HW_ENABLE			(1<<31)
-#define GEN6_RP_DOWN_TIMEOUT			0xA010
-#define GEN6_RP_INTERRUPT_LIMITS		0xA014
-#define GEN6_RPSTAT1				0xA01C
+#define GEN6_RP_DOWN_TIMEOUT			_MMIO(0xA010)
+#define GEN6_RP_INTERRUPT_LIMITS		_MMIO(0xA014)
+#define GEN6_RPSTAT1				_MMIO(0xA01C)
 #define   GEN6_CAGF_SHIFT			8
 #define   HSW_CAGF_SHIFT			7
 #define   GEN9_CAGF_SHIFT			23
 #define   GEN6_CAGF_MASK			(0x7f << GEN6_CAGF_SHIFT)
 #define   HSW_CAGF_MASK				(0x7f << HSW_CAGF_SHIFT)
 #define   GEN9_CAGF_MASK			(0x1ff << GEN9_CAGF_SHIFT)
-#define GEN6_RP_CONTROL				0xA024
+#define GEN6_RP_CONTROL				_MMIO(0xA024)
 #define   GEN6_RP_MEDIA_TURBO			(1<<11)
 #define   GEN6_RP_MEDIA_MODE_MASK		(3<<9)
 #define   GEN6_RP_MEDIA_HW_TURBO_MODE		(3<<9)
@@ -6805,53 +6885,53 @@
 #define   GEN6_RP_UP_BUSY_CONT			(0x4<<3)
 #define   GEN6_RP_DOWN_IDLE_AVG			(0x2<<0)
 #define   GEN6_RP_DOWN_IDLE_CONT		(0x1<<0)
-#define GEN6_RP_UP_THRESHOLD			0xA02C
-#define GEN6_RP_DOWN_THRESHOLD			0xA030
-#define GEN6_RP_CUR_UP_EI			0xA050
+#define GEN6_RP_UP_THRESHOLD			_MMIO(0xA02C)
+#define GEN6_RP_DOWN_THRESHOLD			_MMIO(0xA030)
+#define GEN6_RP_CUR_UP_EI			_MMIO(0xA050)
 #define   GEN6_CURICONT_MASK			0xffffff
-#define GEN6_RP_CUR_UP				0xA054
+#define GEN6_RP_CUR_UP				_MMIO(0xA054)
 #define   GEN6_CURBSYTAVG_MASK			0xffffff
-#define GEN6_RP_PREV_UP				0xA058
-#define GEN6_RP_CUR_DOWN_EI			0xA05C
+#define GEN6_RP_PREV_UP				_MMIO(0xA058)
+#define GEN6_RP_CUR_DOWN_EI			_MMIO(0xA05C)
 #define   GEN6_CURIAVG_MASK			0xffffff
-#define GEN6_RP_CUR_DOWN			0xA060
-#define GEN6_RP_PREV_DOWN			0xA064
-#define GEN6_RP_UP_EI				0xA068
-#define GEN6_RP_DOWN_EI				0xA06C
-#define GEN6_RP_IDLE_HYSTERSIS			0xA070
-#define GEN6_RPDEUHWTC				0xA080
-#define GEN6_RPDEUC				0xA084
-#define GEN6_RPDEUCSW				0xA088
-#define GEN6_RC_STATE				0xA094
-#define GEN6_RC1_WAKE_RATE_LIMIT		0xA098
-#define GEN6_RC6_WAKE_RATE_LIMIT		0xA09C
-#define GEN6_RC6pp_WAKE_RATE_LIMIT		0xA0A0
-#define GEN6_RC_EVALUATION_INTERVAL		0xA0A8
-#define GEN6_RC_IDLE_HYSTERSIS			0xA0AC
-#define GEN6_RC_SLEEP				0xA0B0
-#define GEN6_RCUBMABDTMR			0xA0B0
-#define GEN6_RC1e_THRESHOLD			0xA0B4
-#define GEN6_RC6_THRESHOLD			0xA0B8
-#define GEN6_RC6p_THRESHOLD			0xA0BC
-#define VLV_RCEDATA				0xA0BC
-#define GEN6_RC6pp_THRESHOLD			0xA0C0
-#define GEN6_PMINTRMSK				0xA168
+#define GEN6_RP_CUR_DOWN			_MMIO(0xA060)
+#define GEN6_RP_PREV_DOWN			_MMIO(0xA064)
+#define GEN6_RP_UP_EI				_MMIO(0xA068)
+#define GEN6_RP_DOWN_EI				_MMIO(0xA06C)
+#define GEN6_RP_IDLE_HYSTERSIS			_MMIO(0xA070)
+#define GEN6_RPDEUHWTC				_MMIO(0xA080)
+#define GEN6_RPDEUC				_MMIO(0xA084)
+#define GEN6_RPDEUCSW				_MMIO(0xA088)
+#define GEN6_RC_STATE				_MMIO(0xA094)
+#define GEN6_RC1_WAKE_RATE_LIMIT		_MMIO(0xA098)
+#define GEN6_RC6_WAKE_RATE_LIMIT		_MMIO(0xA09C)
+#define GEN6_RC6pp_WAKE_RATE_LIMIT		_MMIO(0xA0A0)
+#define GEN6_RC_EVALUATION_INTERVAL		_MMIO(0xA0A8)
+#define GEN6_RC_IDLE_HYSTERSIS			_MMIO(0xA0AC)
+#define GEN6_RC_SLEEP				_MMIO(0xA0B0)
+#define GEN6_RCUBMABDTMR			_MMIO(0xA0B0)
+#define GEN6_RC1e_THRESHOLD			_MMIO(0xA0B4)
+#define GEN6_RC6_THRESHOLD			_MMIO(0xA0B8)
+#define GEN6_RC6p_THRESHOLD			_MMIO(0xA0BC)
+#define VLV_RCEDATA				_MMIO(0xA0BC)
+#define GEN6_RC6pp_THRESHOLD			_MMIO(0xA0C0)
+#define GEN6_PMINTRMSK				_MMIO(0xA168)
 #define GEN8_PMINTR_REDIRECT_TO_NON_DISP	(1<<31)
-#define VLV_PWRDWNUPCTL				0xA294
-#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		0xA0C4
-#define GEN9_RENDER_PG_IDLE_HYSTERESIS		0xA0C8
-#define GEN9_PG_ENABLE				0xA210
+#define VLV_PWRDWNUPCTL				_MMIO(0xA294)
+#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		_MMIO(0xA0C4)
+#define GEN9_RENDER_PG_IDLE_HYSTERESIS		_MMIO(0xA0C8)
+#define GEN9_PG_ENABLE				_MMIO(0xA210)
 #define GEN9_RENDER_PG_ENABLE			(1<<0)
 #define GEN9_MEDIA_PG_ENABLE			(1<<1)
 
-#define VLV_CHICKEN_3				(VLV_DISPLAY_BASE + 0x7040C)
+#define VLV_CHICKEN_3				_MMIO(VLV_DISPLAY_BASE + 0x7040C)
 #define  PIXEL_OVERLAP_CNT_MASK			(3 << 30)
 #define  PIXEL_OVERLAP_CNT_SHIFT		30
 
-#define GEN6_PMISR				0x44020
-#define GEN6_PMIMR				0x44024 /* rps_lock */
-#define GEN6_PMIIR				0x44028
-#define GEN6_PMIER				0x4402C
+#define GEN6_PMISR				_MMIO(0x44020)
+#define GEN6_PMIMR				_MMIO(0x44024) /* rps_lock */
+#define GEN6_PMIIR				_MMIO(0x44028)
+#define GEN6_PMIER				_MMIO(0x4402C)
 #define  GEN6_PM_MBOX_EVENT			(1<<25)
 #define  GEN6_PM_THERMAL_EVENT			(1<<24)
 #define  GEN6_PM_RP_DOWN_TIMEOUT		(1<<6)
@@ -6863,30 +6943,30 @@
 						 GEN6_PM_RP_DOWN_THRESHOLD | \
 						 GEN6_PM_RP_DOWN_TIMEOUT)
 
-#define GEN7_GT_SCRATCH(i)			(0x4F100 + (i) * 4)
+#define GEN7_GT_SCRATCH(i)			_MMIO(0x4F100 + (i) * 4)
 #define GEN7_GT_SCRATCH_REG_NUM			8
 
-#define VLV_GTLC_SURVIVABILITY_REG              0x130098
+#define VLV_GTLC_SURVIVABILITY_REG              _MMIO(0x130098)
 #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
 #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
 
-#define GEN6_GT_GFX_RC6_LOCKED			0x138104
-#define VLV_COUNTER_CONTROL			0x138104
+#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
+#define VLV_COUNTER_CONTROL			_MMIO(0x138104)
 #define   VLV_COUNT_RANGE_HIGH			(1<<15)
 #define   VLV_MEDIA_RC0_COUNT_EN		(1<<5)
 #define   VLV_RENDER_RC0_COUNT_EN		(1<<4)
 #define   VLV_MEDIA_RC6_COUNT_EN		(1<<1)
 #define   VLV_RENDER_RC6_COUNT_EN		(1<<0)
-#define GEN6_GT_GFX_RC6				0x138108
-#define VLV_GT_RENDER_RC6			0x138108
-#define VLV_GT_MEDIA_RC6			0x13810C
+#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
+#define VLV_GT_RENDER_RC6			_MMIO(0x138108)
+#define VLV_GT_MEDIA_RC6			_MMIO(0x13810C)
 
-#define GEN6_GT_GFX_RC6p			0x13810C
-#define GEN6_GT_GFX_RC6pp			0x138110
-#define VLV_RENDER_C0_COUNT			0x138118
-#define VLV_MEDIA_C0_COUNT			0x13811C
+#define GEN6_GT_GFX_RC6p			_MMIO(0x13810C)
+#define GEN6_GT_GFX_RC6pp			_MMIO(0x138110)
+#define VLV_RENDER_C0_COUNT			_MMIO(0x138118)
+#define VLV_MEDIA_C0_COUNT			_MMIO(0x13811C)
 
-#define GEN6_PCODE_MAILBOX			0x138124
+#define GEN6_PCODE_MAILBOX			_MMIO(0x138124)
 #define   GEN6_PCODE_READY			(1<<31)
 #define	  GEN6_PCODE_WRITE_RC6VIDS		0x4
 #define	  GEN6_PCODE_READ_RC6VIDS		0x5
@@ -6909,12 +6989,12 @@
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
 #define   DISPLAY_IPS_CONTROL			0x19
 #define	  HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL	0x1A
-#define GEN6_PCODE_DATA				0x138128
+#define GEN6_PCODE_DATA				_MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
-#define GEN6_PCODE_DATA1			0x13812C
+#define GEN6_PCODE_DATA1			_MMIO(0x13812C)
 
-#define GEN6_GT_CORE_STATUS		0x138060
+#define GEN6_GT_CORE_STATUS		_MMIO(0x138060)
 #define   GEN6_CORE_CPD_STATE_MASK	(7<<4)
 #define   GEN6_RCn_MASK			7
 #define   GEN6_RC0			0
@@ -6922,26 +7002,26 @@
 #define   GEN6_RC6			3
 #define   GEN6_RC7			4
 
-#define GEN8_GT_SLICE_INFO		0x138064
+#define GEN8_GT_SLICE_INFO		_MMIO(0x138064)
 #define   GEN8_LSLICESTAT_MASK		0x7
 
-#define CHV_POWER_SS0_SIG1		0xa720
-#define CHV_POWER_SS1_SIG1		0xa728
+#define CHV_POWER_SS0_SIG1		_MMIO(0xa720)
+#define CHV_POWER_SS1_SIG1		_MMIO(0xa728)
 #define   CHV_SS_PG_ENABLE		(1<<1)
 #define   CHV_EU08_PG_ENABLE		(1<<9)
 #define   CHV_EU19_PG_ENABLE		(1<<17)
 #define   CHV_EU210_PG_ENABLE		(1<<25)
 
-#define CHV_POWER_SS0_SIG2		0xa724
-#define CHV_POWER_SS1_SIG2		0xa72c
+#define CHV_POWER_SS0_SIG2		_MMIO(0xa724)
+#define CHV_POWER_SS1_SIG2		_MMIO(0xa72c)
 #define   CHV_EU311_PG_ENABLE		(1<<1)
 
-#define GEN9_SLICE_PGCTL_ACK(slice)	(0x804c + (slice)*0x4)
+#define GEN9_SLICE_PGCTL_ACK(slice)	_MMIO(0x804c + (slice)*0x4)
 #define   GEN9_PGCTL_SLICE_ACK		(1 << 0)
 #define   GEN9_PGCTL_SS_ACK(subslice)	(1 << (2 + (subslice)*2))
 
-#define GEN9_SS01_EU_PGCTL_ACK(slice)	(0x805c + (slice)*0x8)
-#define GEN9_SS23_EU_PGCTL_ACK(slice)	(0x8060 + (slice)*0x8)
+#define GEN9_SS01_EU_PGCTL_ACK(slice)	_MMIO(0x805c + (slice)*0x8)
+#define GEN9_SS23_EU_PGCTL_ACK(slice)	_MMIO(0x8060 + (slice)*0x8)
 #define   GEN9_PGCTL_SSA_EU08_ACK	(1 << 0)
 #define   GEN9_PGCTL_SSA_EU19_ACK	(1 << 2)
 #define   GEN9_PGCTL_SSA_EU210_ACK	(1 << 4)
@@ -6951,18 +7031,17 @@
 #define   GEN9_PGCTL_SSB_EU210_ACK	(1 << 12)
 #define   GEN9_PGCTL_SSB_EU311_ACK	(1 << 14)
 
-#define GEN7_MISCCPCTL			(0x9424)
+#define GEN7_MISCCPCTL				_MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE		(1<<0)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE	(1<<2)
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1<<4)
 #define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE     (1<<6)
 
-#define GEN8_GARBCNTL                   0xB004
+#define GEN8_GARBCNTL                   _MMIO(0xB004)
 #define   GEN9_GAPS_TSV_CREDIT_DISABLE  (1<<7)
 
 /* IVYBRIDGE DPF */
-#define GEN7_L3CDERRST1			0xB008 /* L3CD Error Status 1 */
-#define HSW_L3CDERRST11			0xB208 /* L3CD Error Status register 1 slice 1 */
+#define GEN7_L3CDERRST1(slice)		_MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */
 #define   GEN7_L3CDERRST1_ROW_MASK	(0x7ff<<14)
 #define   GEN7_PARITY_ERROR_VALID	(1<<13)
 #define   GEN7_L3CDERRST1_BANK_MASK	(3<<11)
@@ -6975,119 +7054,102 @@
 		((reg & GEN7_L3CDERRST1_SUBBANK_MASK) >> 8)
 #define   GEN7_L3CDERRST1_ENABLE	(1<<7)
 
-#define GEN7_L3LOG_BASE			0xB070
-#define HSW_L3LOG_BASE_SLICE1		0xB270
+#define GEN7_L3LOG(slice, i)		_MMIO(0xB070 + (slice) * 0x200 + (i) * 4)
 #define GEN7_L3LOG_SIZE			0x80
 
-#define GEN7_HALF_SLICE_CHICKEN1	0xe100 /* IVB GT1 + VLV */
-#define GEN7_HALF_SLICE_CHICKEN1_GT2	0xf100
+#define GEN7_HALF_SLICE_CHICKEN1	_MMIO(0xe100) /* IVB GT1 + VLV */
+#define GEN7_HALF_SLICE_CHICKEN1_GT2	_MMIO(0xf100)
 #define   GEN7_MAX_PS_THREAD_DEP		(8<<12)
 #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE	(1<<10)
 #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE	(1<<4)
 #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1<<3)
 
-#define GEN9_HALF_SLICE_CHICKEN5	0xe188
+#define GEN9_HALF_SLICE_CHICKEN5	_MMIO(0xe188)
 #define   GEN9_DG_MIRROR_FIX_ENABLE	(1<<5)
 #define   GEN9_CCS_TLB_PREFETCH_ENABLE	(1<<3)
 
-#define GEN8_ROW_CHICKEN		0xe4f0
+#define GEN8_ROW_CHICKEN		_MMIO(0xe4f0)
 #define   PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE	(1<<8)
 #define   STALL_DOP_GATING_DISABLE		(1<<5)
 
-#define GEN7_ROW_CHICKEN2		0xe4f4
-#define GEN7_ROW_CHICKEN2_GT2		0xf4f4
+#define GEN7_ROW_CHICKEN2		_MMIO(0xe4f4)
+#define GEN7_ROW_CHICKEN2_GT2		_MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE	(1<<0)
 
-#define HSW_ROW_CHICKEN3		0xe49c
+#define HSW_ROW_CHICKEN3		_MMIO(0xe49c)
 #define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
 
-#define HALF_SLICE_CHICKEN2		0xe180
+#define HALF_SLICE_CHICKEN2		_MMIO(0xe180)
 #define   GEN8_ST_PO_DISABLE		(1<<13)
 
-#define HALF_SLICE_CHICKEN3		0xe184
+#define HALF_SLICE_CHICKEN3		_MMIO(0xe184)
 #define   HSW_SAMPLE_C_PERFORMANCE	(1<<9)
 #define   GEN8_CENTROID_PIXEL_OPT_DIS	(1<<8)
 #define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC	(1<<5)
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS	(1<<1)
 
-#define GEN9_HALF_SLICE_CHICKEN7	0xe194
+#define GEN9_HALF_SLICE_CHICKEN7	_MMIO(0xe194)
 #define   GEN9_ENABLE_YV12_BUGFIX	(1<<4)
 
 /* Audio */
-#define G4X_AUD_VID_DID			(dev_priv->info.display_mmio_offset + 0x62020)
+#define G4X_AUD_VID_DID			_MMIO(dev_priv->info.display_mmio_offset + 0x62020)
 #define   INTEL_AUDIO_DEVCL		0x808629FB
 #define   INTEL_AUDIO_DEVBLC		0x80862801
 #define   INTEL_AUDIO_DEVCTG		0x80862802
 
-#define G4X_AUD_CNTL_ST			0x620B4
+#define G4X_AUD_CNTL_ST			_MMIO(0x620B4)
 #define   G4X_ELDV_DEVCL_DEVBLC		(1 << 13)
 #define   G4X_ELDV_DEVCTG		(1 << 14)
 #define   G4X_ELD_ADDR_MASK		(0xf << 5)
 #define   G4X_ELD_ACK			(1 << 4)
-#define G4X_HDMIW_HDMIEDID		0x6210C
+#define G4X_HDMIW_HDMIEDID		_MMIO(0x6210C)
 
 #define _IBX_HDMIW_HDMIEDID_A		0xE2050
 #define _IBX_HDMIW_HDMIEDID_B		0xE2150
-#define IBX_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_IBX_HDMIW_HDMIEDID_A, \
-					_IBX_HDMIW_HDMIEDID_B)
+#define IBX_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _IBX_HDMIW_HDMIEDID_A, \
+						  _IBX_HDMIW_HDMIEDID_B)
 #define _IBX_AUD_CNTL_ST_A		0xE20B4
 #define _IBX_AUD_CNTL_ST_B		0xE21B4
-#define IBX_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_IBX_AUD_CNTL_ST_A, \
-					_IBX_AUD_CNTL_ST_B)
+#define IBX_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _IBX_AUD_CNTL_ST_A, \
+						  _IBX_AUD_CNTL_ST_B)
 #define   IBX_ELD_BUFFER_SIZE_MASK	(0x1f << 10)
 #define   IBX_ELD_ADDRESS_MASK		(0x1f << 5)
 #define   IBX_ELD_ACK			(1 << 4)
-#define IBX_AUD_CNTL_ST2		0xE20C0
+#define IBX_AUD_CNTL_ST2		_MMIO(0xE20C0)
 #define   IBX_CP_READY(port)		((1 << 1) << (((port) - 1) * 4))
 #define   IBX_ELD_VALID(port)		((1 << 0) << (((port) - 1) * 4))
 
 #define _CPT_HDMIW_HDMIEDID_A		0xE5050
 #define _CPT_HDMIW_HDMIEDID_B		0xE5150
-#define CPT_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_CPT_HDMIW_HDMIEDID_A, \
-					_CPT_HDMIW_HDMIEDID_B)
+#define CPT_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _CPT_HDMIW_HDMIEDID_A, _CPT_HDMIW_HDMIEDID_B)
 #define _CPT_AUD_CNTL_ST_A		0xE50B4
 #define _CPT_AUD_CNTL_ST_B		0xE51B4
-#define CPT_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_CPT_AUD_CNTL_ST_A, \
-					_CPT_AUD_CNTL_ST_B)
-#define CPT_AUD_CNTRL_ST2		0xE50C0
+#define CPT_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _CPT_AUD_CNTL_ST_A, _CPT_AUD_CNTL_ST_B)
+#define CPT_AUD_CNTRL_ST2		_MMIO(0xE50C0)
 
 #define _VLV_HDMIW_HDMIEDID_A		(VLV_DISPLAY_BASE + 0x62050)
 #define _VLV_HDMIW_HDMIEDID_B		(VLV_DISPLAY_BASE + 0x62150)
-#define VLV_HDMIW_HDMIEDID(pipe) _PIPE(pipe, \
-					_VLV_HDMIW_HDMIEDID_A, \
-					_VLV_HDMIW_HDMIEDID_B)
+#define VLV_HDMIW_HDMIEDID(pipe)	_MMIO_PIPE(pipe, _VLV_HDMIW_HDMIEDID_A, _VLV_HDMIW_HDMIEDID_B)
 #define _VLV_AUD_CNTL_ST_A		(VLV_DISPLAY_BASE + 0x620B4)
 #define _VLV_AUD_CNTL_ST_B		(VLV_DISPLAY_BASE + 0x621B4)
-#define VLV_AUD_CNTL_ST(pipe) _PIPE(pipe, \
-					_VLV_AUD_CNTL_ST_A, \
-					_VLV_AUD_CNTL_ST_B)
-#define VLV_AUD_CNTL_ST2		(VLV_DISPLAY_BASE + 0x620C0)
+#define VLV_AUD_CNTL_ST(pipe)		_MMIO_PIPE(pipe, _VLV_AUD_CNTL_ST_A, _VLV_AUD_CNTL_ST_B)
+#define VLV_AUD_CNTL_ST2		_MMIO(VLV_DISPLAY_BASE + 0x620C0)
 
 /* These are the 4 32-bit write offset registers for each stream
  * output buffer.  It determines the offset from the
  * 3DSTATE_SO_BUFFERs that the next streamed vertex output goes to.
  */
-#define GEN7_SO_WRITE_OFFSET(n)		(0x5280 + (n) * 4)
+#define GEN7_SO_WRITE_OFFSET(n)		_MMIO(0x5280 + (n) * 4)
 
 #define _IBX_AUD_CONFIG_A		0xe2000
 #define _IBX_AUD_CONFIG_B		0xe2100
-#define IBX_AUD_CFG(pipe) _PIPE(pipe, \
-					_IBX_AUD_CONFIG_A, \
-					_IBX_AUD_CONFIG_B)
+#define IBX_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _IBX_AUD_CONFIG_A, _IBX_AUD_CONFIG_B)
 #define _CPT_AUD_CONFIG_A		0xe5000
 #define _CPT_AUD_CONFIG_B		0xe5100
-#define CPT_AUD_CFG(pipe) _PIPE(pipe, \
-					_CPT_AUD_CONFIG_A, \
-					_CPT_AUD_CONFIG_B)
+#define CPT_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _CPT_AUD_CONFIG_A, _CPT_AUD_CONFIG_B)
 #define _VLV_AUD_CONFIG_A		(VLV_DISPLAY_BASE + 0x62000)
 #define _VLV_AUD_CONFIG_B		(VLV_DISPLAY_BASE + 0x62100)
-#define VLV_AUD_CFG(pipe) _PIPE(pipe, \
-					_VLV_AUD_CONFIG_A, \
-					_VLV_AUD_CONFIG_B)
+#define VLV_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _VLV_AUD_CONFIG_A, _VLV_AUD_CONFIG_B)
 
 #define   AUD_CONFIG_N_VALUE_INDEX		(1 << 29)
 #define   AUD_CONFIG_N_PROG_ENABLE		(1 << 28)
@@ -7112,72 +7174,62 @@
 /* HSW Audio */
 #define _HSW_AUD_CONFIG_A		0x65000
 #define _HSW_AUD_CONFIG_B		0x65100
-#define HSW_AUD_CFG(pipe) _PIPE(pipe, \
-					_HSW_AUD_CONFIG_A, \
-					_HSW_AUD_CONFIG_B)
+#define HSW_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B)
 
 #define _HSW_AUD_MISC_CTRL_A		0x65010
 #define _HSW_AUD_MISC_CTRL_B		0x65110
-#define HSW_AUD_MISC_CTRL(pipe) _PIPE(pipe, \
-					_HSW_AUD_MISC_CTRL_A, \
-					_HSW_AUD_MISC_CTRL_B)
+#define HSW_AUD_MISC_CTRL(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B)
 
 #define _HSW_AUD_DIP_ELD_CTRL_ST_A	0x650b4
 #define _HSW_AUD_DIP_ELD_CTRL_ST_B	0x651b4
-#define HSW_AUD_DIP_ELD_CTRL(pipe) _PIPE(pipe, \
-					_HSW_AUD_DIP_ELD_CTRL_ST_A, \
-					_HSW_AUD_DIP_ELD_CTRL_ST_B)
+#define HSW_AUD_DIP_ELD_CTRL(pipe)	_MMIO_PIPE(pipe, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B)
 
 /* Audio Digital Converter */
 #define _HSW_AUD_DIG_CNVT_1		0x65080
 #define _HSW_AUD_DIG_CNVT_2		0x65180
-#define AUD_DIG_CNVT(pipe) _PIPE(pipe, \
-					_HSW_AUD_DIG_CNVT_1, \
-					_HSW_AUD_DIG_CNVT_2)
+#define AUD_DIG_CNVT(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2)
 #define DIP_PORT_SEL_MASK		0x3
 
 #define _HSW_AUD_EDID_DATA_A		0x65050
 #define _HSW_AUD_EDID_DATA_B		0x65150
-#define HSW_AUD_EDID_DATA(pipe) _PIPE(pipe, \
-					_HSW_AUD_EDID_DATA_A, \
-					_HSW_AUD_EDID_DATA_B)
+#define HSW_AUD_EDID_DATA(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B)
 
-#define HSW_AUD_PIPE_CONV_CFG		0x6507c
-#define HSW_AUD_PIN_ELD_CP_VLD		0x650c0
+#define HSW_AUD_PIPE_CONV_CFG		_MMIO(0x6507c)
+#define HSW_AUD_PIN_ELD_CP_VLD		_MMIO(0x650c0)
 #define   AUDIO_INACTIVE(trans)		((1 << 3) << ((trans) * 4))
 #define   AUDIO_OUTPUT_ENABLE(trans)	((1 << 2) << ((trans) * 4))
 #define   AUDIO_CP_READY(trans)		((1 << 1) << ((trans) * 4))
 #define   AUDIO_ELD_VALID(trans)	((1 << 0) << ((trans) * 4))
 
-#define HSW_AUD_CHICKENBIT			0x65f10
+#define HSW_AUD_CHICKENBIT			_MMIO(0x65f10)
 #define   SKL_AUD_CODEC_WAKE_SIGNAL		(1 << 15)
 
 /* HSW Power Wells */
-#define HSW_PWR_WELL_BIOS			0x45400 /* CTL1 */
-#define HSW_PWR_WELL_DRIVER			0x45404 /* CTL2 */
-#define HSW_PWR_WELL_KVMR			0x45408 /* CTL3 */
-#define HSW_PWR_WELL_DEBUG			0x4540C /* CTL4 */
+#define HSW_PWR_WELL_BIOS			_MMIO(0x45400) /* CTL1 */
+#define HSW_PWR_WELL_DRIVER			_MMIO(0x45404) /* CTL2 */
+#define HSW_PWR_WELL_KVMR			_MMIO(0x45408) /* CTL3 */
+#define HSW_PWR_WELL_DEBUG			_MMIO(0x4540C) /* CTL4 */
 #define   HSW_PWR_WELL_ENABLE_REQUEST		(1<<31)
 #define   HSW_PWR_WELL_STATE_ENABLED		(1<<30)
-#define HSW_PWR_WELL_CTL5			0x45410
+#define HSW_PWR_WELL_CTL5			_MMIO(0x45410)
 #define   HSW_PWR_WELL_ENABLE_SINGLE_STEP	(1<<31)
 #define   HSW_PWR_WELL_PWR_GATE_OVERRIDE	(1<<20)
 #define   HSW_PWR_WELL_FORCE_ON			(1<<19)
-#define HSW_PWR_WELL_CTL6			0x45414
+#define HSW_PWR_WELL_CTL6			_MMIO(0x45414)
 
 /* SKL Fuse Status */
-#define SKL_FUSE_STATUS				0x42000
+#define SKL_FUSE_STATUS				_MMIO(0x42000)
 #define  SKL_FUSE_DOWNLOAD_STATUS              (1<<31)
 #define  SKL_FUSE_PG0_DIST_STATUS              (1<<27)
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
 /* Per-pipe DDI Function Control */
-#define TRANS_DDI_FUNC_CTL_A		0x60400
-#define TRANS_DDI_FUNC_CTL_B		0x61400
-#define TRANS_DDI_FUNC_CTL_C		0x62400
-#define TRANS_DDI_FUNC_CTL_EDP		0x6F400
-#define TRANS_DDI_FUNC_CTL(tran) _TRANSCODER2(tran, TRANS_DDI_FUNC_CTL_A)
+#define _TRANS_DDI_FUNC_CTL_A		0x60400
+#define _TRANS_DDI_FUNC_CTL_B		0x61400
+#define _TRANS_DDI_FUNC_CTL_C		0x62400
+#define _TRANS_DDI_FUNC_CTL_EDP		0x6F400
+#define TRANS_DDI_FUNC_CTL(tran) _MMIO_TRANS2(tran, _TRANS_DDI_FUNC_CTL_A)
 
 #define  TRANS_DDI_FUNC_ENABLE		(1<<31)
 /* Those bits are ignored by pipe EDP since it can only connect to DDI A */
@@ -7207,9 +7259,9 @@
 #define  TRANS_DDI_BFI_ENABLE		(1<<4)
 
 /* DisplayPort Transport Control */
-#define DP_TP_CTL_A			0x64040
-#define DP_TP_CTL_B			0x64140
-#define DP_TP_CTL(port) _PORT(port, DP_TP_CTL_A, DP_TP_CTL_B)
+#define _DP_TP_CTL_A			0x64040
+#define _DP_TP_CTL_B			0x64140
+#define DP_TP_CTL(port) _MMIO_PORT(port, _DP_TP_CTL_A, _DP_TP_CTL_B)
 #define  DP_TP_CTL_ENABLE			(1<<31)
 #define  DP_TP_CTL_MODE_SST			(0<<27)
 #define  DP_TP_CTL_MODE_MST			(1<<27)
@@ -7225,9 +7277,9 @@
 #define  DP_TP_CTL_SCRAMBLE_DISABLE		(1<<7)
 
 /* DisplayPort Transport Status */
-#define DP_TP_STATUS_A			0x64044
-#define DP_TP_STATUS_B			0x64144
-#define DP_TP_STATUS(port) _PORT(port, DP_TP_STATUS_A, DP_TP_STATUS_B)
+#define _DP_TP_STATUS_A			0x64044
+#define _DP_TP_STATUS_B			0x64144
+#define DP_TP_STATUS(port) _MMIO_PORT(port, _DP_TP_STATUS_A, _DP_TP_STATUS_B)
 #define  DP_TP_STATUS_IDLE_DONE			(1<<25)
 #define  DP_TP_STATUS_ACT_SENT			(1<<24)
 #define  DP_TP_STATUS_MODE_STATUS_MST		(1<<23)
@@ -7237,9 +7289,9 @@
 #define  DP_TP_STATUS_PAYLOAD_MAPPING_VC0	(3 << 0)
 
 /* DDI Buffer Control */
-#define DDI_BUF_CTL_A				0x64000
-#define DDI_BUF_CTL_B				0x64100
-#define DDI_BUF_CTL(port) _PORT(port, DDI_BUF_CTL_A, DDI_BUF_CTL_B)
+#define _DDI_BUF_CTL_A				0x64000
+#define _DDI_BUF_CTL_B				0x64100
+#define DDI_BUF_CTL(port) _MMIO_PORT(port, _DDI_BUF_CTL_A, _DDI_BUF_CTL_B)
 #define  DDI_BUF_CTL_ENABLE			(1<<31)
 #define  DDI_BUF_TRANS_SELECT(n)	((n) << 24)
 #define  DDI_BUF_EMP_MASK			(0xf<<24)
@@ -7252,17 +7304,17 @@
 #define  DDI_INIT_DISPLAY_DETECTED		(1<<0)
 
 /* DDI Buffer Translations */
-#define DDI_BUF_TRANS_A				0x64E00
-#define DDI_BUF_TRANS_B				0x64E60
-#define DDI_BUF_TRANS_LO(port, i) (_PORT(port, DDI_BUF_TRANS_A, DDI_BUF_TRANS_B) + (i) * 8)
-#define DDI_BUF_TRANS_HI(port, i) (_PORT(port, DDI_BUF_TRANS_A, DDI_BUF_TRANS_B) + (i) * 8 + 4)
+#define _DDI_BUF_TRANS_A		0x64E00
+#define _DDI_BUF_TRANS_B		0x64E60
+#define DDI_BUF_TRANS_LO(port, i)	_MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8)
+#define DDI_BUF_TRANS_HI(port, i)	_MMIO(_PORT(port, _DDI_BUF_TRANS_A, _DDI_BUF_TRANS_B) + (i) * 8 + 4)
 
 /* Sideband Interface (SBI) is programmed indirectly, via
  * SBI_ADDR, which contains the register offset; and SBI_DATA,
  * which contains the payload */
-#define SBI_ADDR			0xC6000
-#define SBI_DATA			0xC6004
-#define SBI_CTL_STAT			0xC6008
+#define SBI_ADDR			_MMIO(0xC6000)
+#define SBI_DATA			_MMIO(0xC6004)
+#define SBI_CTL_STAT			_MMIO(0xC6008)
 #define  SBI_CTL_DEST_ICLK		(0x0<<16)
 #define  SBI_CTL_DEST_MPHY		(0x1<<16)
 #define  SBI_CTL_OP_IORD		(0x2<<8)
@@ -7293,12 +7345,12 @@
 #define   SBI_GEN0_CFG_BUFFENABLE_DISABLE	(1<<0)
 
 /* LPT PIXCLK_GATE */
-#define PIXCLK_GATE			0xC6020
+#define PIXCLK_GATE			_MMIO(0xC6020)
 #define  PIXCLK_GATE_UNGATE		(1<<0)
 #define  PIXCLK_GATE_GATE		(0<<0)
 
 /* SPLL */
-#define SPLL_CTL			0x46020
+#define SPLL_CTL			_MMIO(0x46020)
 #define  SPLL_PLL_ENABLE		(1<<31)
 #define  SPLL_PLL_SSC			(1<<28)
 #define  SPLL_PLL_NON_SSC		(2<<28)
@@ -7310,9 +7362,9 @@
 #define  SPLL_PLL_FREQ_MASK		(3<<26)
 
 /* WRPLL */
-#define WRPLL_CTL1			0x46040
-#define WRPLL_CTL2			0x46060
-#define WRPLL_CTL(pll)			(pll == 0 ? WRPLL_CTL1 : WRPLL_CTL2)
+#define _WRPLL_CTL1			0x46040
+#define _WRPLL_CTL2			0x46060
+#define WRPLL_CTL(pll)			_MMIO_PIPE(pll, _WRPLL_CTL1, _WRPLL_CTL2)
 #define  WRPLL_PLL_ENABLE		(1<<31)
 #define  WRPLL_PLL_SSC			(1<<28)
 #define  WRPLL_PLL_NON_SSC		(2<<28)
@@ -7329,9 +7381,9 @@
 #define  WRPLL_DIVIDER_FB_MASK		(0xff<<16)
 
 /* Port clock selection */
-#define PORT_CLK_SEL_A			0x46100
-#define PORT_CLK_SEL_B			0x46104
-#define PORT_CLK_SEL(port) _PORT(port, PORT_CLK_SEL_A, PORT_CLK_SEL_B)
+#define _PORT_CLK_SEL_A			0x46100
+#define _PORT_CLK_SEL_B			0x46104
+#define PORT_CLK_SEL(port) _MMIO_PORT(port, _PORT_CLK_SEL_A, _PORT_CLK_SEL_B)
 #define  PORT_CLK_SEL_LCPLL_2700	(0<<29)
 #define  PORT_CLK_SEL_LCPLL_1350	(1<<29)
 #define  PORT_CLK_SEL_LCPLL_810		(2<<29)
@@ -7343,18 +7395,18 @@
 #define  PORT_CLK_SEL_MASK		(7<<29)
 
 /* Transcoder clock selection */
-#define TRANS_CLK_SEL_A			0x46140
-#define TRANS_CLK_SEL_B			0x46144
-#define TRANS_CLK_SEL(tran) _TRANSCODER(tran, TRANS_CLK_SEL_A, TRANS_CLK_SEL_B)
+#define _TRANS_CLK_SEL_A		0x46140
+#define _TRANS_CLK_SEL_B		0x46144
+#define TRANS_CLK_SEL(tran) _MMIO_TRANS(tran, _TRANS_CLK_SEL_A, _TRANS_CLK_SEL_B)
 /* For each transcoder, we need to select the corresponding port clock */
 #define  TRANS_CLK_SEL_DISABLED		(0x0<<29)
 #define  TRANS_CLK_SEL_PORT(x)		(((x)+1)<<29)
 
-#define TRANSA_MSA_MISC			0x60410
-#define TRANSB_MSA_MISC			0x61410
-#define TRANSC_MSA_MISC			0x62410
-#define TRANS_EDP_MSA_MISC		0x6f410
-#define TRANS_MSA_MISC(tran) _TRANSCODER2(tran, TRANSA_MSA_MISC)
+#define _TRANSA_MSA_MISC		0x60410
+#define _TRANSB_MSA_MISC		0x61410
+#define _TRANSC_MSA_MISC		0x62410
+#define _TRANS_EDP_MSA_MISC		0x6f410
+#define TRANS_MSA_MISC(tran) _MMIO_TRANS2(tran, _TRANSA_MSA_MISC)
 
 #define  TRANS_MSA_SYNC_CLK		(1<<0)
 #define  TRANS_MSA_6_BPC		(0<<5)
@@ -7364,7 +7416,7 @@
 #define  TRANS_MSA_16_BPC		(4<<5)
 
 /* LCPLL Control */
-#define LCPLL_CTL			0x130040
+#define LCPLL_CTL			_MMIO(0x130040)
 #define  LCPLL_PLL_DISABLE		(1<<31)
 #define  LCPLL_PLL_LOCK			(1<<30)
 #define  LCPLL_CLK_FREQ_MASK		(3<<26)
@@ -7384,7 +7436,7 @@
  */
 
 /* CDCLK_CTL */
-#define CDCLK_CTL			0x46000
+#define CDCLK_CTL			_MMIO(0x46000)
 #define  CDCLK_FREQ_SEL_MASK		(3<<26)
 #define  CDCLK_FREQ_450_432		(0<<26)
 #define  CDCLK_FREQ_540			(1<<26)
@@ -7400,12 +7452,12 @@
 #define  BXT_CDCLK_SSA_PRECHARGE_ENABLE	(1<<16)
 
 /* LCPLL_CTL */
-#define LCPLL1_CTL		0x46010
-#define LCPLL2_CTL		0x46014
+#define LCPLL1_CTL		_MMIO(0x46010)
+#define LCPLL2_CTL		_MMIO(0x46014)
 #define  LCPLL_PLL_ENABLE	(1<<31)
 
 /* DPLL control1 */
-#define DPLL_CTRL1		0x6C058
+#define DPLL_CTRL1		_MMIO(0x6C058)
 #define  DPLL_CTRL1_HDMI_MODE(id)		(1<<((id)*6+5))
 #define  DPLL_CTRL1_SSC(id)			(1<<((id)*6+4))
 #define  DPLL_CTRL1_LINK_RATE_MASK(id)		(7<<((id)*6+1))
@@ -7420,7 +7472,7 @@
 #define  DPLL_CTRL1_LINK_RATE_2160		5
 
 /* DPLL control2 */
-#define DPLL_CTRL2				0x6C05C
+#define DPLL_CTRL2				_MMIO(0x6C05C)
 #define  DPLL_CTRL2_DDI_CLK_OFF(port)		(1<<((port)+15))
 #define  DPLL_CTRL2_DDI_CLK_SEL_MASK(port)	(3<<((port)*3+1))
 #define  DPLL_CTRL2_DDI_CLK_SEL_SHIFT(port)    ((port)*3+1)
@@ -7428,21 +7480,21 @@
 #define  DPLL_CTRL2_DDI_SEL_OVERRIDE(port)     (1<<((port)*3))
 
 /* DPLL Status */
-#define DPLL_STATUS	0x6C060
+#define DPLL_STATUS	_MMIO(0x6C060)
 #define  DPLL_LOCK(id) (1<<((id)*8))
 
 /* DPLL cfg */
-#define DPLL1_CFGCR1	0x6C040
-#define DPLL2_CFGCR1	0x6C048
-#define DPLL3_CFGCR1	0x6C050
+#define _DPLL1_CFGCR1	0x6C040
+#define _DPLL2_CFGCR1	0x6C048
+#define _DPLL3_CFGCR1	0x6C050
 #define  DPLL_CFGCR1_FREQ_ENABLE	(1<<31)
 #define  DPLL_CFGCR1_DCO_FRACTION_MASK	(0x7fff<<9)
 #define  DPLL_CFGCR1_DCO_FRACTION(x)	((x)<<9)
 #define  DPLL_CFGCR1_DCO_INTEGER_MASK	(0x1ff)
 
-#define DPLL1_CFGCR2	0x6C044
-#define DPLL2_CFGCR2	0x6C04C
-#define DPLL3_CFGCR2	0x6C054
+#define _DPLL1_CFGCR2	0x6C044
+#define _DPLL2_CFGCR2	0x6C04C
+#define _DPLL3_CFGCR2	0x6C054
 #define  DPLL_CFGCR2_QDIV_RATIO_MASK	(0xff<<8)
 #define  DPLL_CFGCR2_QDIV_RATIO(x)	((x)<<8)
 #define  DPLL_CFGCR2_QDIV_MODE(x)	((x)<<7)
@@ -7460,58 +7512,59 @@
 #define  DPLL_CFGCR2_PDIV_7 (4<<2)
 #define  DPLL_CFGCR2_CENTRAL_FREQ_MASK	(3)
 
-#define DPLL_CFGCR1(id) (DPLL1_CFGCR1 + ((id) - SKL_DPLL1) * 8)
-#define DPLL_CFGCR2(id) (DPLL1_CFGCR2 + ((id) - SKL_DPLL1) * 8)
+#define DPLL_CFGCR1(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR1, _DPLL2_CFGCR2)
+#define DPLL_CFGCR2(id)	_MMIO_PIPE((id) - SKL_DPLL1, _DPLL1_CFGCR2, _DPLL2_CFGCR2)
 
 /* BXT display engine PLL */
-#define BXT_DE_PLL_CTL			0x6d000
+#define BXT_DE_PLL_CTL			_MMIO(0x6d000)
 #define   BXT_DE_PLL_RATIO(x)		(x)	/* {60,65,100} * 19.2MHz */
 #define   BXT_DE_PLL_RATIO_MASK		0xff
 
-#define BXT_DE_PLL_ENABLE		0x46070
+#define BXT_DE_PLL_ENABLE		_MMIO(0x46070)
 #define   BXT_DE_PLL_PLL_ENABLE		(1 << 31)
 #define   BXT_DE_PLL_LOCK		(1 << 30)
 
 /* GEN9 DC */
-#define DC_STATE_EN			0x45504
+#define DC_STATE_EN			_MMIO(0x45504)
+#define  DC_STATE_DISABLE		0
 #define  DC_STATE_EN_UPTO_DC5		(1<<0)
 #define  DC_STATE_EN_DC9		(1<<3)
 #define  DC_STATE_EN_UPTO_DC6		(2<<0)
 #define  DC_STATE_EN_UPTO_DC5_DC6_MASK   0x3
 
-#define  DC_STATE_DEBUG                  0x45520
+#define  DC_STATE_DEBUG                  _MMIO(0x45520)
 #define  DC_STATE_DEBUG_MASK_MEMORY_UP	(1<<1)
 
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
  * since on HSW we can't write to it using I915_WRITE. */
-#define D_COMP_HSW			(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
-#define D_COMP_BDW			0x138144
+#define D_COMP_HSW			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
+#define D_COMP_BDW			_MMIO(0x138144)
 #define  D_COMP_RCOMP_IN_PROGRESS	(1<<9)
 #define  D_COMP_COMP_FORCE		(1<<8)
 #define  D_COMP_COMP_DISABLE		(1<<0)
 
 /* Pipe WM_LINETIME - watermark line time */
-#define PIPE_WM_LINETIME_A		0x45270
-#define PIPE_WM_LINETIME_B		0x45274
-#define PIPE_WM_LINETIME(pipe) _PIPE(pipe, PIPE_WM_LINETIME_A, \
-					   PIPE_WM_LINETIME_B)
+#define _PIPE_WM_LINETIME_A		0x45270
+#define _PIPE_WM_LINETIME_B		0x45274
+#define PIPE_WM_LINETIME(pipe) _MMIO_PIPE(pipe, _PIPE_WM_LINETIME_A, _PIPE_WM_LINETIME_B)
 #define   PIPE_WM_LINETIME_MASK			(0x1ff)
 #define   PIPE_WM_LINETIME_TIME(x)		((x))
 #define   PIPE_WM_LINETIME_IPS_LINETIME_MASK	(0x1ff<<16)
 #define   PIPE_WM_LINETIME_IPS_LINETIME(x)	((x)<<16)
 
 /* SFUSE_STRAP */
-#define SFUSE_STRAP			0xc2014
+#define SFUSE_STRAP			_MMIO(0xc2014)
 #define  SFUSE_STRAP_FUSE_LOCK		(1<<13)
 #define  SFUSE_STRAP_DISPLAY_DISABLED	(1<<7)
+#define  SFUSE_STRAP_CRT_DISABLED	(1<<6)
 #define  SFUSE_STRAP_DDIB_DETECTED	(1<<2)
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
 
-#define WM_MISC				0x45260
+#define WM_MISC				_MMIO(0x45260)
 #define  WM_MISC_DATA_PARTITION_5_6	(1 << 0)
 
-#define WM_DBG				0x45280
+#define WM_DBG				_MMIO(0x45280)
 #define  WM_DBG_DISALLOW_MULTIPLE_LP	(1<<0)
 #define  WM_DBG_DISALLOW_MAXFIFO	(1<<1)
 #define  WM_DBG_DISALLOW_SPRITE		(1<<2)
@@ -7548,28 +7601,29 @@
 #define _PIPE_B_CSC_POSTOFF_ME	0x49144
 #define _PIPE_B_CSC_POSTOFF_LO	0x49148
 
-#define PIPE_CSC_COEFF_RY_GY(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RY_GY, _PIPE_B_CSC_COEFF_RY_GY)
-#define PIPE_CSC_COEFF_BY(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BY, _PIPE_B_CSC_COEFF_BY)
-#define PIPE_CSC_COEFF_RU_GU(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RU_GU, _PIPE_B_CSC_COEFF_RU_GU)
-#define PIPE_CSC_COEFF_BU(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BU, _PIPE_B_CSC_COEFF_BU)
-#define PIPE_CSC_COEFF_RV_GV(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_RV_GV, _PIPE_B_CSC_COEFF_RV_GV)
-#define PIPE_CSC_COEFF_BV(pipe) _PIPE(pipe, _PIPE_A_CSC_COEFF_BV, _PIPE_B_CSC_COEFF_BV)
-#define PIPE_CSC_MODE(pipe) _PIPE(pipe, _PIPE_A_CSC_MODE, _PIPE_B_CSC_MODE)
-#define PIPE_CSC_PREOFF_HI(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_HI, _PIPE_B_CSC_PREOFF_HI)
-#define PIPE_CSC_PREOFF_ME(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_ME, _PIPE_B_CSC_PREOFF_ME)
-#define PIPE_CSC_PREOFF_LO(pipe) _PIPE(pipe, _PIPE_A_CSC_PREOFF_LO, _PIPE_B_CSC_PREOFF_LO)
-#define PIPE_CSC_POSTOFF_HI(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_HI, _PIPE_B_CSC_POSTOFF_HI)
-#define PIPE_CSC_POSTOFF_ME(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
-#define PIPE_CSC_POSTOFF_LO(pipe) _PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
+#define PIPE_CSC_COEFF_RY_GY(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RY_GY, _PIPE_B_CSC_COEFF_RY_GY)
+#define PIPE_CSC_COEFF_BY(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BY, _PIPE_B_CSC_COEFF_BY)
+#define PIPE_CSC_COEFF_RU_GU(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RU_GU, _PIPE_B_CSC_COEFF_RU_GU)
+#define PIPE_CSC_COEFF_BU(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BU, _PIPE_B_CSC_COEFF_BU)
+#define PIPE_CSC_COEFF_RV_GV(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_RV_GV, _PIPE_B_CSC_COEFF_RV_GV)
+#define PIPE_CSC_COEFF_BV(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_COEFF_BV, _PIPE_B_CSC_COEFF_BV)
+#define PIPE_CSC_MODE(pipe)		_MMIO_PIPE(pipe, _PIPE_A_CSC_MODE, _PIPE_B_CSC_MODE)
+#define PIPE_CSC_PREOFF_HI(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_HI, _PIPE_B_CSC_PREOFF_HI)
+#define PIPE_CSC_PREOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_ME, _PIPE_B_CSC_PREOFF_ME)
+#define PIPE_CSC_PREOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_PREOFF_LO, _PIPE_B_CSC_PREOFF_LO)
+#define PIPE_CSC_POSTOFF_HI(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_HI, _PIPE_B_CSC_POSTOFF_HI)
+#define PIPE_CSC_POSTOFF_ME(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_ME, _PIPE_B_CSC_POSTOFF_ME)
+#define PIPE_CSC_POSTOFF_LO(pipe)	_MMIO_PIPE(pipe, _PIPE_A_CSC_POSTOFF_LO, _PIPE_B_CSC_POSTOFF_LO)
 
 /* MIPI DSI registers */
 
 #define _MIPI_PORT(port, a, c)	_PORT3(port, a, 0, c)	/* ports A and C only */
+#define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
 
 /* BXT MIPI clock controls */
 #define BXT_MAX_VAR_OUTPUT_KHZ			39500
 
-#define BXT_MIPI_CLOCK_CTL			0x46090
+#define BXT_MIPI_CLOCK_CTL			_MMIO(0x46090)
 #define  BXT_MIPI1_DIV_SHIFT			26
 #define  BXT_MIPI2_DIV_SHIFT			10
 #define  BXT_MIPI_DIV_SHIFT(port)		\
@@ -7631,20 +7685,20 @@
 /* BXT MIPI mode configure */
 #define  _BXT_MIPIA_TRANS_HACTIVE			0x6B0F8
 #define  _BXT_MIPIC_TRANS_HACTIVE			0x6B8F8
-#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_HACTIVE, _BXT_MIPIC_TRANS_HACTIVE)
 
 #define  _BXT_MIPIA_TRANS_VACTIVE			0x6B0FC
 #define  _BXT_MIPIC_TRANS_VACTIVE			0x6B8FC
-#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_VACTIVE, _BXT_MIPIC_TRANS_VACTIVE)
 
 #define  _BXT_MIPIA_TRANS_VTOTAL			0x6B100
 #define  _BXT_MIPIC_TRANS_VTOTAL			0x6B900
-#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MIPI_PORT(tc, \
+#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MMIO_MIPI(tc, \
 		_BXT_MIPIA_TRANS_VTOTAL, _BXT_MIPIC_TRANS_VTOTAL)
 
-#define BXT_DSI_PLL_CTL			0x161000
+#define BXT_DSI_PLL_CTL			_MMIO(0x161000)
 #define  BXT_DSI_PLL_PVD_RATIO_SHIFT	16
 #define  BXT_DSI_PLL_PVD_RATIO_MASK	(3 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
 #define  BXT_DSI_PLL_PVD_RATIO_1	(1 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
@@ -7660,21 +7714,20 @@
 #define BXT_DSI_PLL_RATIO_MAX		0x7D
 #define BXT_DSI_PLL_RATIO_MIN		0x22
 #define BXT_DSI_PLL_RATIO_MASK		0xFF
-#define BXT_REF_CLOCK_KHZ		19500
+#define BXT_REF_CLOCK_KHZ		19200
 
-#define BXT_DSI_PLL_ENABLE		0x46080
+#define BXT_DSI_PLL_ENABLE		_MMIO(0x46080)
 #define  BXT_DSI_PLL_DO_ENABLE		(1 << 31)
 #define  BXT_DSI_PLL_LOCKED		(1 << 30)
 
 #define _MIPIA_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61190)
 #define _MIPIC_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61700)
-#define MIPI_PORT_CTRL(port)	_MIPI_PORT(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
+#define MIPI_PORT_CTRL(port)	_MMIO_MIPI(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
 
  /* BXT port control */
 #define _BXT_MIPIA_PORT_CTRL				0x6B0C0
 #define _BXT_MIPIC_PORT_CTRL				0x6B8C0
-#define BXT_MIPI_PORT_CTRL(tc)	_MIPI_PORT(tc, _BXT_MIPIA_PORT_CTRL, \
-						_BXT_MIPIC_PORT_CTRL)
+#define BXT_MIPI_PORT_CTRL(tc)	_MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL)
 
 #define  DPI_ENABLE					(1 << 31) /* A + C */
 #define  MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT		27
@@ -7718,8 +7771,7 @@
 
 #define _MIPIA_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61194)
 #define _MIPIC_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61704)
-#define MIPI_TEARING_CTRL(port)			_MIPI_PORT(port, \
-				_MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
+#define MIPI_TEARING_CTRL(port)			_MMIO_MIPI(port, _MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
 #define  TEARING_EFFECT_DELAY_SHIFT			0
 #define  TEARING_EFFECT_DELAY_MASK			(0xffff << 0)
 
@@ -7730,8 +7782,7 @@
 
 #define _MIPIA_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb000)
 #define _MIPIC_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb800)
-#define MIPI_DEVICE_READY(port)		_MIPI_PORT(port, _MIPIA_DEVICE_READY, \
-						_MIPIC_DEVICE_READY)
+#define MIPI_DEVICE_READY(port)		_MMIO_MIPI(port, _MIPIA_DEVICE_READY, _MIPIC_DEVICE_READY)
 #define  BUS_POSSESSION					(1 << 3) /* set to give bus to receiver */
 #define  ULPS_STATE_MASK				(3 << 1)
 #define  ULPS_STATE_ENTER				(2 << 1)
@@ -7741,12 +7792,10 @@
 
 #define _MIPIA_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb004)
 #define _MIPIC_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb804)
-#define MIPI_INTR_STAT(port)		_MIPI_PORT(port, _MIPIA_INTR_STAT, \
-					_MIPIC_INTR_STAT)
+#define MIPI_INTR_STAT(port)		_MMIO_MIPI(port, _MIPIA_INTR_STAT, _MIPIC_INTR_STAT)
 #define _MIPIA_INTR_EN			(dev_priv->mipi_mmio_base + 0xb008)
 #define _MIPIC_INTR_EN			(dev_priv->mipi_mmio_base + 0xb808)
-#define MIPI_INTR_EN(port)		_MIPI_PORT(port, _MIPIA_INTR_EN, \
-					_MIPIC_INTR_EN)
+#define MIPI_INTR_EN(port)		_MMIO_MIPI(port, _MIPIA_INTR_EN, _MIPIC_INTR_EN)
 #define  TEARING_EFFECT					(1 << 31)
 #define  SPL_PKT_SENT_INTERRUPT				(1 << 30)
 #define  GEN_READ_DATA_AVAIL				(1 << 29)
@@ -7782,8 +7831,7 @@
 
 #define _MIPIA_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb00c)
 #define _MIPIC_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb80c)
-#define MIPI_DSI_FUNC_PRG(port)		_MIPI_PORT(port, _MIPIA_DSI_FUNC_PRG, \
-						_MIPIC_DSI_FUNC_PRG)
+#define MIPI_DSI_FUNC_PRG(port)		_MMIO_MIPI(port, _MIPIA_DSI_FUNC_PRG, _MIPIC_DSI_FUNC_PRG)
 #define  CMD_MODE_DATA_WIDTH_MASK			(7 << 13)
 #define  CMD_MODE_NOT_SUPPORTED				(0 << 13)
 #define  CMD_MODE_DATA_WIDTH_16_BIT			(1 << 13)
@@ -7806,32 +7854,27 @@
 
 #define _MIPIA_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb010)
 #define _MIPIC_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb810)
-#define MIPI_HS_TX_TIMEOUT(port)	_MIPI_PORT(port, _MIPIA_HS_TX_TIMEOUT, \
-					_MIPIC_HS_TX_TIMEOUT)
+#define MIPI_HS_TX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_HS_TX_TIMEOUT, _MIPIC_HS_TX_TIMEOUT)
 #define  HIGH_SPEED_TX_TIMEOUT_COUNTER_MASK		0xffffff
 
 #define _MIPIA_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb014)
 #define _MIPIC_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb814)
-#define MIPI_LP_RX_TIMEOUT(port)	_MIPI_PORT(port, _MIPIA_LP_RX_TIMEOUT, \
-					_MIPIC_LP_RX_TIMEOUT)
+#define MIPI_LP_RX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_LP_RX_TIMEOUT, _MIPIC_LP_RX_TIMEOUT)
 #define  LOW_POWER_RX_TIMEOUT_COUNTER_MASK		0xffffff
 
 #define _MIPIA_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb018)
 #define _MIPIC_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb818)
-#define MIPI_TURN_AROUND_TIMEOUT(port)	_MIPI_PORT(port, \
-			_MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
+#define MIPI_TURN_AROUND_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
 #define  TURN_AROUND_TIMEOUT_MASK			0x3f
 
 #define _MIPIA_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb01c)
 #define _MIPIC_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb81c)
-#define MIPI_DEVICE_RESET_TIMER(port)	_MIPI_PORT(port, \
-			_MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
+#define MIPI_DEVICE_RESET_TIMER(port)	_MMIO_MIPI(port, _MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
 #define  DEVICE_RESET_TIMER_MASK			0xffff
 
 #define _MIPIA_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb020)
 #define _MIPIC_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb820)
-#define MIPI_DPI_RESOLUTION(port)	_MIPI_PORT(port, _MIPIA_DPI_RESOLUTION, \
-					_MIPIC_DPI_RESOLUTION)
+#define MIPI_DPI_RESOLUTION(port)	_MMIO_MIPI(port, _MIPIA_DPI_RESOLUTION, _MIPIC_DPI_RESOLUTION)
 #define  VERTICAL_ADDRESS_SHIFT				16
 #define  VERTICAL_ADDRESS_MASK				(0xffff << 16)
 #define  HORIZONTAL_ADDRESS_SHIFT			0
@@ -7839,8 +7882,7 @@
 
 #define _MIPIA_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb024)
 #define _MIPIC_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb824)
-#define MIPI_DBI_FIFO_THROTTLE(port)	_MIPI_PORT(port, \
-			_MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
+#define MIPI_DBI_FIFO_THROTTLE(port)	_MMIO_MIPI(port, _MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
 #define  DBI_FIFO_EMPTY_HALF				(0 << 0)
 #define  DBI_FIFO_EMPTY_QUARTER				(1 << 0)
 #define  DBI_FIFO_EMPTY_7_LOCATIONS			(2 << 0)
@@ -7848,50 +7890,41 @@
 /* regs below are bits 15:0 */
 #define _MIPIA_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb028)
 #define _MIPIC_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb828)
-#define MIPI_HSYNC_PADDING_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
+#define MIPI_HSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
 
 #define _MIPIA_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb02c)
 #define _MIPIC_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb82c)
-#define MIPI_HBP_COUNT(port)		_MIPI_PORT(port, _MIPIA_HBP_COUNT, \
-					_MIPIC_HBP_COUNT)
+#define MIPI_HBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HBP_COUNT, _MIPIC_HBP_COUNT)
 
 #define _MIPIA_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb030)
 #define _MIPIC_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb830)
-#define MIPI_HFP_COUNT(port)		_MIPI_PORT(port, _MIPIA_HFP_COUNT, \
-					_MIPIC_HFP_COUNT)
+#define MIPI_HFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HFP_COUNT, _MIPIC_HFP_COUNT)
 
 #define _MIPIA_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb034)
 #define _MIPIC_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb834)
-#define MIPI_HACTIVE_AREA_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
+#define MIPI_HACTIVE_AREA_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
 
 #define _MIPIA_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb038)
 #define _MIPIC_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb838)
-#define MIPI_VSYNC_PADDING_COUNT(port)	_MIPI_PORT(port, \
-			_MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
+#define MIPI_VSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
 
 #define _MIPIA_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb03c)
 #define _MIPIC_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb83c)
-#define MIPI_VBP_COUNT(port)		_MIPI_PORT(port, _MIPIA_VBP_COUNT, \
-					_MIPIC_VBP_COUNT)
+#define MIPI_VBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VBP_COUNT, _MIPIC_VBP_COUNT)
 
 #define _MIPIA_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb040)
 #define _MIPIC_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb840)
-#define MIPI_VFP_COUNT(port)		_MIPI_PORT(port, _MIPIA_VFP_COUNT, \
-					_MIPIC_VFP_COUNT)
+#define MIPI_VFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VFP_COUNT, _MIPIC_VFP_COUNT)
 
 #define _MIPIA_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb044)
 #define _MIPIC_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb844)
-#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MIPI_PORT(port,	\
-		_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
+#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MMIO_MIPI(port,	_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
 
 /* regs above are bits 15:0 */
 
 #define _MIPIA_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb048)
 #define _MIPIC_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb848)
-#define MIPI_DPI_CONTROL(port)		_MIPI_PORT(port, _MIPIA_DPI_CONTROL, \
-					_MIPIC_DPI_CONTROL)
+#define MIPI_DPI_CONTROL(port)		_MMIO_MIPI(port, _MIPIA_DPI_CONTROL, _MIPIC_DPI_CONTROL)
 #define  DPI_LP_MODE					(1 << 6)
 #define  BACKLIGHT_OFF					(1 << 5)
 #define  BACKLIGHT_ON					(1 << 4)
@@ -7902,29 +7935,26 @@
 
 #define _MIPIA_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb04c)
 #define _MIPIC_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb84c)
-#define MIPI_DPI_DATA(port)		_MIPI_PORT(port, _MIPIA_DPI_DATA, \
-					_MIPIC_DPI_DATA)
+#define MIPI_DPI_DATA(port)		_MMIO_MIPI(port, _MIPIA_DPI_DATA, _MIPIC_DPI_DATA)
 #define  COMMAND_BYTE_SHIFT				0
 #define  COMMAND_BYTE_MASK				(0x3f << 0)
 
 #define _MIPIA_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb050)
 #define _MIPIC_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb850)
-#define MIPI_INIT_COUNT(port)		_MIPI_PORT(port, _MIPIA_INIT_COUNT, \
-					_MIPIC_INIT_COUNT)
+#define MIPI_INIT_COUNT(port)		_MMIO_MIPI(port, _MIPIA_INIT_COUNT, _MIPIC_INIT_COUNT)
 #define  MASTER_INIT_TIMER_SHIFT			0
 #define  MASTER_INIT_TIMER_MASK				(0xffff << 0)
 
 #define _MIPIA_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb054)
 #define _MIPIC_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb854)
-#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MIPI_PORT(port, \
+#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MMIO_MIPI(port, \
 			_MIPIA_MAX_RETURN_PKT_SIZE, _MIPIC_MAX_RETURN_PKT_SIZE)
 #define  MAX_RETURN_PKT_SIZE_SHIFT			0
 #define  MAX_RETURN_PKT_SIZE_MASK			(0x3ff << 0)
 
 #define _MIPIA_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb058)
 #define _MIPIC_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb858)
-#define MIPI_VIDEO_MODE_FORMAT(port)	_MIPI_PORT(port, \
-			_MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
+#define MIPI_VIDEO_MODE_FORMAT(port)	_MMIO_MIPI(port, _MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
 #define  RANDOM_DPI_DISPLAY_RESOLUTION			(1 << 4)
 #define  DISABLE_VIDEO_BTA				(1 << 3)
 #define  IP_TG_CONFIG					(1 << 2)
@@ -7934,8 +7964,7 @@
 
 #define _MIPIA_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb05c)
 #define _MIPIC_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb85c)
-#define MIPI_EOT_DISABLE(port)		_MIPI_PORT(port, _MIPIA_EOT_DISABLE, \
-					_MIPIC_EOT_DISABLE)
+#define MIPI_EOT_DISABLE(port)		_MMIO_MIPI(port, _MIPIA_EOT_DISABLE, _MIPIC_EOT_DISABLE)
 #define  LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 7)
 #define  HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 6)
 #define  LOW_CONTENTION_RECOVERY_DISABLE		(1 << 5)
@@ -7947,31 +7976,26 @@
 
 #define _MIPIA_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb060)
 #define _MIPIC_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb860)
-#define MIPI_LP_BYTECLK(port)		_MIPI_PORT(port, _MIPIA_LP_BYTECLK, \
-					_MIPIC_LP_BYTECLK)
+#define MIPI_LP_BYTECLK(port)		_MMIO_MIPI(port, _MIPIA_LP_BYTECLK, _MIPIC_LP_BYTECLK)
 #define  LP_BYTECLK_SHIFT				0
 #define  LP_BYTECLK_MASK				(0xffff << 0)
 
 /* bits 31:0 */
 #define _MIPIA_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb064)
 #define _MIPIC_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb864)
-#define MIPI_LP_GEN_DATA(port)		_MIPI_PORT(port, _MIPIA_LP_GEN_DATA, \
-					_MIPIC_LP_GEN_DATA)
+#define MIPI_LP_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_DATA, _MIPIC_LP_GEN_DATA)
 
 /* bits 31:0 */
 #define _MIPIA_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb068)
 #define _MIPIC_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb868)
-#define MIPI_HS_GEN_DATA(port)		_MIPI_PORT(port, _MIPIA_HS_GEN_DATA, \
-					_MIPIC_HS_GEN_DATA)
+#define MIPI_HS_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_DATA, _MIPIC_HS_GEN_DATA)
 
 #define _MIPIA_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb06c)
 #define _MIPIC_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb86c)
-#define MIPI_LP_GEN_CTRL(port)		_MIPI_PORT(port, _MIPIA_LP_GEN_CTRL, \
-					_MIPIC_LP_GEN_CTRL)
+#define MIPI_LP_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_CTRL, _MIPIC_LP_GEN_CTRL)
 #define _MIPIA_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb070)
 #define _MIPIC_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb870)
-#define MIPI_HS_GEN_CTRL(port)		_MIPI_PORT(port, _MIPIA_HS_GEN_CTRL, \
-					_MIPIC_HS_GEN_CTRL)
+#define MIPI_HS_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_CTRL, _MIPIC_HS_GEN_CTRL)
 #define  LONG_PACKET_WORD_COUNT_SHIFT			8
 #define  LONG_PACKET_WORD_COUNT_MASK			(0xffff << 8)
 #define  SHORT_PACKET_PARAM_SHIFT			8
@@ -7984,8 +8008,7 @@
 
 #define _MIPIA_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb074)
 #define _MIPIC_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb874)
-#define MIPI_GEN_FIFO_STAT(port)	_MIPI_PORT(port, _MIPIA_GEN_FIFO_STAT, \
-					_MIPIC_GEN_FIFO_STAT)
+#define MIPI_GEN_FIFO_STAT(port)	_MMIO_MIPI(port, _MIPIA_GEN_FIFO_STAT, _MIPIC_GEN_FIFO_STAT)
 #define  DPI_FIFO_EMPTY					(1 << 28)
 #define  DBI_FIFO_EMPTY					(1 << 27)
 #define  LP_CTRL_FIFO_EMPTY				(1 << 26)
@@ -8003,16 +8026,14 @@
 
 #define _MIPIA_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb078)
 #define _MIPIC_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb878)
-#define MIPI_HS_LP_DBI_ENABLE(port)	_MIPI_PORT(port, \
-			_MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
+#define MIPI_HS_LP_DBI_ENABLE(port)	_MMIO_MIPI(port, _MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
 #define  DBI_HS_LP_MODE_MASK				(1 << 0)
 #define  DBI_LP_MODE					(1 << 0)
 #define  DBI_HS_MODE					(0 << 0)
 
 #define _MIPIA_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb080)
 #define _MIPIC_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb880)
-#define MIPI_DPHY_PARAM(port)		_MIPI_PORT(port, _MIPIA_DPHY_PARAM, \
-					_MIPIC_DPHY_PARAM)
+#define MIPI_DPHY_PARAM(port)		_MMIO_MIPI(port, _MIPIA_DPHY_PARAM, _MIPIC_DPHY_PARAM)
 #define  EXIT_ZERO_COUNT_SHIFT				24
 #define  EXIT_ZERO_COUNT_MASK				(0x3f << 24)
 #define  TRAIL_COUNT_SHIFT				16
@@ -8025,15 +8046,11 @@
 /* bits 31:0 */
 #define _MIPIA_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb084)
 #define _MIPIC_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb884)
-#define MIPI_DBI_BW_CTRL(port)		_MIPI_PORT(port, _MIPIA_DBI_BW_CTRL, \
-					_MIPIC_DBI_BW_CTRL)
+#define MIPI_DBI_BW_CTRL(port)		_MMIO_MIPI(port, _MIPIA_DBI_BW_CTRL, _MIPIC_DBI_BW_CTRL)
 
-#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base \
-							+ 0xb088)
-#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base \
-							+ 0xb888)
-#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MIPI_PORT(port, \
-	_MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
+#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb088)
+#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb888)
+#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MMIO_MIPI(port, _MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
 #define  LP_HS_SSW_CNT_SHIFT				16
 #define  LP_HS_SSW_CNT_MASK				(0xffff << 16)
 #define  HS_LP_PWR_SW_CNT_SHIFT				0
@@ -8041,19 +8058,16 @@
 
 #define _MIPIA_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb08c)
 #define _MIPIC_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb88c)
-#define MIPI_STOP_STATE_STALL(port)	_MIPI_PORT(port, \
-			_MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
+#define MIPI_STOP_STATE_STALL(port)	_MMIO_MIPI(port, _MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
 #define  STOP_STATE_STALL_COUNTER_SHIFT			0
 #define  STOP_STATE_STALL_COUNTER_MASK			(0xff << 0)
 
 #define _MIPIA_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb090)
 #define _MIPIC_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb890)
-#define MIPI_INTR_STAT_REG_1(port)	_MIPI_PORT(port, \
-				_MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
+#define MIPI_INTR_STAT_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
 #define _MIPIA_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb094)
 #define _MIPIC_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb894)
-#define MIPI_INTR_EN_REG_1(port)	_MIPI_PORT(port, _MIPIA_INTR_EN_REG_1, \
-					_MIPIC_INTR_EN_REG_1)
+#define MIPI_INTR_EN_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_EN_REG_1, _MIPIC_INTR_EN_REG_1)
 #define  RX_CONTENTION_DETECTED				(1 << 0)
 
 /* XXX: only pipe A ?!? */
@@ -8073,8 +8087,7 @@
 
 #define _MIPIA_CTRL			(dev_priv->mipi_mmio_base + 0xb104)
 #define _MIPIC_CTRL			(dev_priv->mipi_mmio_base + 0xb904)
-#define MIPI_CTRL(port)			_MIPI_PORT(port, _MIPIA_CTRL, \
-					_MIPIC_CTRL)
+#define MIPI_CTRL(port)			_MMIO_MIPI(port, _MIPIA_CTRL, _MIPIC_CTRL)
 #define  ESCAPE_CLOCK_DIVIDER_SHIFT			5 /* A only */
 #define  ESCAPE_CLOCK_DIVIDER_MASK			(3 << 5)
 #define  ESCAPE_CLOCK_DIVIDER_1				(0 << 5)
@@ -8093,23 +8106,20 @@
 
 #define _MIPIA_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb108)
 #define _MIPIC_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb908)
-#define MIPI_DATA_ADDRESS(port)		_MIPI_PORT(port, _MIPIA_DATA_ADDRESS, \
-					_MIPIC_DATA_ADDRESS)
+#define MIPI_DATA_ADDRESS(port)		_MMIO_MIPI(port, _MIPIA_DATA_ADDRESS, _MIPIC_DATA_ADDRESS)
 #define  DATA_MEM_ADDRESS_SHIFT				5
 #define  DATA_MEM_ADDRESS_MASK				(0x7ffffff << 5)
 #define  DATA_VALID					(1 << 0)
 
 #define _MIPIA_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb10c)
 #define _MIPIC_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb90c)
-#define MIPI_DATA_LENGTH(port)		_MIPI_PORT(port, _MIPIA_DATA_LENGTH, \
-					_MIPIC_DATA_LENGTH)
+#define MIPI_DATA_LENGTH(port)		_MMIO_MIPI(port, _MIPIA_DATA_LENGTH, _MIPIC_DATA_LENGTH)
 #define  DATA_LENGTH_SHIFT				0
 #define  DATA_LENGTH_MASK				(0xfffff << 0)
 
 #define _MIPIA_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb110)
 #define _MIPIC_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb910)
-#define MIPI_COMMAND_ADDRESS(port)	_MIPI_PORT(port, \
-				_MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
+#define MIPI_COMMAND_ADDRESS(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
 #define  COMMAND_MEM_ADDRESS_SHIFT			5
 #define  COMMAND_MEM_ADDRESS_MASK			(0x7ffffff << 5)
 #define  AUTO_PWG_ENABLE				(1 << 2)
@@ -8118,21 +8128,17 @@
 
 #define _MIPIA_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb114)
 #define _MIPIC_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb914)
-#define MIPI_COMMAND_LENGTH(port)	_MIPI_PORT(port, _MIPIA_COMMAND_LENGTH, \
-					_MIPIC_COMMAND_LENGTH)
+#define MIPI_COMMAND_LENGTH(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_LENGTH, _MIPIC_COMMAND_LENGTH)
 #define  COMMAND_LENGTH_SHIFT(n)			(8 * (n)) /* n: 0...3 */
 #define  COMMAND_LENGTH_MASK(n)				(0xff << (8 * (n)))
 
 #define _MIPIA_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb118)
 #define _MIPIC_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb918)
-#define MIPI_READ_DATA_RETURN(port, n) \
-	(_MIPI_PORT(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) \
-					+ 4 * (n)) /* n: 0...7 */
+#define MIPI_READ_DATA_RETURN(port, n) _MMIO(_MIPI(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) + 4 * (n)) /* n: 0...7 */
 
 #define _MIPIA_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb138)
 #define _MIPIC_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb938)
-#define MIPI_READ_DATA_VALID(port)	_MIPI_PORT(port, \
-				_MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
+#define MIPI_READ_DATA_VALID(port)	_MMIO_MIPI(port, _MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
 #define  READ_DATA_VALID(n)				(1 << (n))
 
 /* For UMS only (deprecated): */
@@ -8140,12 +8146,12 @@
 #define _PALETTE_B (dev_priv->info.display_mmio_offset + 0xa800)
 
 /* MOCS (Memory Object Control State) registers */
-#define GEN9_LNCFCMOCS0		0xb020	/* L3 Cache Control base */
+#define GEN9_LNCFCMOCS(i)	_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
 
-#define GEN9_GFX_MOCS_0		0xc800	/* Graphics MOCS base register*/
-#define GEN9_MFX0_MOCS_0	0xc900	/* Media 0 MOCS base register*/
-#define GEN9_MFX1_MOCS_0	0xca00	/* Media 1 MOCS base register*/
-#define GEN9_VEBOX_MOCS_0	0xcb00	/* Video MOCS base register*/
-#define GEN9_BLT_MOCS_0		0xcc00	/* Blitter MOCS base register*/
+#define GEN9_GFX_MOCS(i)	_MMIO(0xc800 + (i) * 4)	/* Graphics MOCS registers */
+#define GEN9_MFX0_MOCS(i)	_MMIO(0xc900 + (i) * 4)	/* Media 0 MOCS registers */
+#define GEN9_MFX1_MOCS(i)	_MMIO(0xca00 + (i) * 4)	/* Media 1 MOCS registers */
+#define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
+#define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
 
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 50ce9ce..f929c61 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -35,7 +35,8 @@
 #define dev_to_drm_minor(d) dev_get_drvdata((d))
 
 #ifdef CONFIG_PM
-static u32 calc_residency(struct drm_device *dev, const u32 reg)
+static u32 calc_residency(struct drm_device *dev,
+			  i915_reg_t reg)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u64 raw_time; /* 32b value may overflow during fixed point math */
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 04fe849..52b2d40 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -664,7 +664,7 @@
 );
 
 TRACE_EVENT_CONDITION(i915_reg_rw,
-	TP_PROTO(bool write, u32 reg, u64 val, int len, bool trace),
+	TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace),
 
 	TP_ARGS(write, reg, val, len, trace),
 
@@ -679,7 +679,7 @@
 
 	TP_fast_assign(
 		__entry->val = (u64)val;
-		__entry->reg = reg;
+		__entry->reg = i915_mmio_reg_offset(reg);
 		__entry->write = write;
 		__entry->len = len;
 		),
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 5eee75b..dea7429 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -69,13 +69,13 @@
 	if (!IS_HASWELL(dev))
 		return;
 
-	magic = readq(dev_priv->regs + vgtif_reg(magic));
+	magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
 	if (magic != VGT_MAGIC)
 		return;
 
 	version = INTEL_VGT_IF_VERSION_ENCODE(
-		readw(dev_priv->regs + vgtif_reg(version_major)),
-		readw(dev_priv->regs + vgtif_reg(version_minor)));
+		__raw_i915_read16(dev_priv, vgtif_reg(version_major)),
+		__raw_i915_read16(dev_priv, vgtif_reg(version_minor)));
 	if (version != INTEL_VGT_IF_VERSION) {
 		DRM_INFO("VGT interface version mismatch!\n");
 		return;
diff --git a/drivers/gpu/drm/i915/i915_vgpu.h b/drivers/gpu/drm/i915/i915_vgpu.h
index 21c97f4..3c83b47 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.h
+++ b/drivers/gpu/drm/i915/i915_vgpu.h
@@ -92,14 +92,10 @@
 	uint32_t g2v_notify;
 	uint32_t rsv6[7];
 
-	uint32_t pdp0_lo;
-	uint32_t pdp0_hi;
-	uint32_t pdp1_lo;
-	uint32_t pdp1_hi;
-	uint32_t pdp2_lo;
-	uint32_t pdp2_hi;
-	uint32_t pdp3_lo;
-	uint32_t pdp3_hi;
+	struct {
+		uint32_t lo;
+		uint32_t hi;
+	} pdp[4];
 
 	uint32_t execlist_context_descriptor_lo;
 	uint32_t execlist_context_descriptor_hi;
@@ -108,7 +104,7 @@
 } __packed;
 
 #define vgtif_reg(x) \
-	(VGT_PVINFO_PAGE + (long)&((struct vgt_if *)NULL)->x)
+	_MMIO((VGT_PVINFO_PAGE + (long)&((struct vgt_if *)NULL)->x))
 
 /* vGPU display status to be used by the host side */
 #define VGT_DRV_DISPLAY_NOT_READY 0
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index f1975f2..643f342 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -94,6 +94,7 @@
 	__drm_atomic_helper_crtc_duplicate_state(crtc, &crtc_state->base);
 
 	crtc_state->update_pipe = false;
+	crtc_state->disable_lp_wm = false;
 
 	return &crtc_state->base;
 }
@@ -205,8 +206,6 @@
 				 * but since this plane is unchanged just do the
 				 * minimum required validation.
 				 */
-				if (plane->type == DRM_PLANE_TYPE_PRIMARY)
-					intel_crtc->atomic.wait_for_flips = true;
 				crtc_state->base.planes_changed = true;
 			}
 
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index a119806..c6bb0fc 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -84,6 +84,7 @@
 	state = &intel_state->base;
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
+	intel_state->wait_req = NULL;
 
 	return state;
 }
@@ -100,6 +101,7 @@
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
+	WARN_ON(state && to_intel_plane_state(state)->wait_req);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 4dccd9b..9aa83e7 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -161,9 +161,9 @@
 }
 
 static bool intel_eld_uptodate(struct drm_connector *connector,
-			       int reg_eldv, uint32_t bits_eldv,
-			       int reg_elda, uint32_t bits_elda,
-			       int reg_edid)
+			       i915_reg_t reg_eldv, uint32_t bits_eldv,
+			       i915_reg_t reg_elda, uint32_t bits_elda,
+			       i915_reg_t reg_edid)
 {
 	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	uint8_t *eld = connector->eld;
@@ -364,8 +364,7 @@
 	enum port port = intel_dig_port->port;
 	enum pipe pipe = intel_crtc->pipe;
 	uint32_t tmp, eldv;
-	int aud_config;
-	int aud_cntrl_st2;
+	i915_reg_t aud_config, aud_cntrl_st2;
 
 	DRM_DEBUG_KMS("Disable audio codec on port %c, pipe %c\n",
 		      port_name(port), pipe_name(pipe));
@@ -416,10 +415,7 @@
 	uint32_t eldv;
 	uint32_t tmp;
 	int len, i;
-	int hdmiw_hdmiedid;
-	int aud_config;
-	int aud_cntl_st;
-	int aud_cntrl_st2;
+	i915_reg_t hdmiw_hdmiedid, aud_config, aud_cntl_st, aud_cntrl_st2;
 
 	DRM_DEBUG_KMS("Enable audio codec on port %c, pipe %c, %u bytes ELD\n",
 		      port_name(port), pipe_name(pipe), drm_eld_size(eld));
@@ -591,7 +587,7 @@
 	struct drm_i915_private *dev_priv = dev_to_i915(dev);
 	u32 tmp;
 
-	if (!IS_SKYLAKE(dev_priv))
+	if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
 		return;
 
 	/*
@@ -642,10 +638,11 @@
 	u32 tmp;
 	int n;
 
-	/* HSW, BDW SKL need this fix */
+	/* HSW, BDW, SKL, KBL need this fix */
 	if (!IS_SKYLAKE(dev_priv) &&
-		!IS_BROADWELL(dev_priv) &&
-		!IS_HASWELL(dev_priv))
+	    !IS_KABYLAKE(dev_priv) &&
+	    !IS_BROADWELL(dev_priv) &&
+	    !IS_HASWELL(dev_priv))
 		return 0;
 
 	mutex_lock(&dev_priv->av_mutex);
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index ce82f9c..070470f 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -356,7 +356,10 @@
 	general = find_section(bdb, BDB_GENERAL_FEATURES);
 	if (general) {
 		dev_priv->vbt.int_tv_support = general->int_tv_support;
-		dev_priv->vbt.int_crt_support = general->int_crt_support;
+		/* int_crt_support can't be trusted on earlier platforms */
+		if (bdb->version >= 155 &&
+		    (HAS_DDI(dev_priv) || IS_VALLEYVIEW(dev_priv)))
+			dev_priv->vbt.int_crt_support = general->int_crt_support;
 		dev_priv->vbt.lvds_use_ssc = general->enable_ssc;
 		dev_priv->vbt.lvds_ssc_freq =
 			intel_bios_ssc_frequency(dev, general->ssc_freq);
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index b84aaa0..9285fc1 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -50,7 +50,7 @@
 	 * encoder's enable/disable callbacks */
 	struct intel_connector *connector;
 	bool force_hotplug_required;
-	u32 adpa_reg;
+	i915_reg_t adpa_reg;
 };
 
 static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder)
@@ -138,18 +138,6 @@
 	pipe_config->base.adjusted_mode.flags |= intel_crt_get_flags(encoder);
 }
 
-static void hsw_crt_pre_enable(struct intel_encoder *encoder)
-{
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL already enabled\n");
-	I915_WRITE(SPLL_CTL,
-		   SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC);
-	POSTING_READ(SPLL_CTL);
-	udelay(20);
-}
-
 /* Note: The caller is required to filter out dpms modes not supported by the
  * platform. */
 static void intel_crt_set_dpms(struct intel_encoder *encoder, int mode)
@@ -216,19 +204,6 @@
 	intel_disable_crt(encoder);
 }
 
-static void hsw_crt_post_disable(struct intel_encoder *encoder)
-{
-	struct drm_device *dev = encoder->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t val;
-
-	DRM_DEBUG_KMS("Disabling SPLL\n");
-	val = I915_READ(SPLL_CTL);
-	WARN_ON(!(val & SPLL_PLL_ENABLE));
-	I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
-	POSTING_READ(SPLL_CTL);
-}
-
 static void intel_enable_crt(struct intel_encoder *encoder)
 {
 	struct intel_crt *crt = intel_encoder_to_crt(encoder);
@@ -280,6 +255,10 @@
 	if (HAS_DDI(dev)) {
 		pipe_config->ddi_pll_sel = PORT_CLK_SEL_SPLL;
 		pipe_config->port_clock = 135000 * 2;
+
+		pipe_config->dpll_hw_state.wrpll = 0;
+		pipe_config->dpll_hw_state.spll =
+			SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC;
 	}
 
 	return true;
@@ -501,12 +480,8 @@
 	uint32_t vsample;
 	uint32_t vblank, vblank_start, vblank_end;
 	uint32_t dsl;
-	uint32_t bclrpat_reg;
-	uint32_t vtotal_reg;
-	uint32_t vblank_reg;
-	uint32_t vsync_reg;
-	uint32_t pipeconf_reg;
-	uint32_t pipe_dsl_reg;
+	i915_reg_t bclrpat_reg, vtotal_reg,
+		vblank_reg, vsync_reg, pipeconf_reg, pipe_dsl_reg;
 	uint8_t	st00;
 	enum drm_connector_status status;
 
@@ -539,7 +514,7 @@
 		/* Wait for next Vblank to substitue
 		 * border color for Color info */
 		intel_wait_for_vblank(dev, pipe);
-		st00 = I915_READ8(VGA_MSR_WRITE);
+		st00 = I915_READ8(_VGA_MSR_WRITE);
 		status = ((st00 & (1 << 4)) != 0) ?
 			connector_status_connected :
 			connector_status_disconnected;
@@ -584,7 +559,7 @@
 		do {
 			count++;
 			/* Read the ST00 VGA status register */
-			st00 = I915_READ8(VGA_MSR_WRITE);
+			st00 = I915_READ8(_VGA_MSR_WRITE);
 			if (st00 & (1 << 4))
 				detect++;
 		} while ((I915_READ(pipe_dsl_reg) == dsl));
@@ -802,11 +777,37 @@
 	struct intel_crt *crt;
 	struct intel_connector *intel_connector;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	i915_reg_t adpa_reg;
+	u32 adpa;
 
 	/* Skip machines without VGA that falsely report hotplug events */
 	if (dmi_check_system(intel_no_crt))
 		return;
 
+	if (HAS_PCH_SPLIT(dev))
+		adpa_reg = PCH_ADPA;
+	else if (IS_VALLEYVIEW(dev))
+		adpa_reg = VLV_ADPA;
+	else
+		adpa_reg = ADPA;
+
+	adpa = I915_READ(adpa_reg);
+	if ((adpa & ADPA_DAC_ENABLE) == 0) {
+		/*
+		 * On some machines (some IVB at least) CRT can be
+		 * fused off, but there's no known fuse bit to
+		 * indicate that. On these machine the ADPA register
+		 * works normally, except the DAC enable bit won't
+		 * take. So the only way to tell is attempt to enable
+		 * it and see what happens.
+		 */
+		I915_WRITE(adpa_reg, adpa | ADPA_DAC_ENABLE |
+			   ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
+		if ((I915_READ(adpa_reg) & ADPA_DAC_ENABLE) == 0)
+			return;
+		I915_WRITE(adpa_reg, adpa);
+	}
+
 	crt = kzalloc(sizeof(struct intel_crt), GFP_KERNEL);
 	if (!crt)
 		return;
@@ -823,7 +824,7 @@
 			   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
 	drm_encoder_init(dev, &crt->base.base, &intel_crt_enc_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 
 	intel_connector_attach_encoder(intel_connector, &crt->base);
 
@@ -840,12 +841,7 @@
 		connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;
 
-	if (HAS_PCH_SPLIT(dev))
-		crt->adpa_reg = PCH_ADPA;
-	else if (IS_VALLEYVIEW(dev))
-		crt->adpa_reg = VLV_ADPA;
-	else
-		crt->adpa_reg = ADPA;
+	crt->adpa_reg = adpa_reg;
 
 	crt->base.compute_config = intel_crt_compute_config;
 	if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) {
@@ -860,8 +856,6 @@
 	if (HAS_DDI(dev)) {
 		crt->base.get_config = hsw_crt_get_config;
 		crt->base.get_hw_state = intel_ddi_get_hw_state;
-		crt->base.pre_enable = hsw_crt_pre_enable;
-		crt->base.post_disable = hsw_crt_post_disable;
 	} else {
 		crt->base.get_config = intel_crt_get_config;
 		crt->base.get_hw_state = intel_crt_get_hw_state;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9e530a7..6c6a669 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -47,21 +47,10 @@
 MODULE_FIRMWARE(I915_CSR_SKL);
 MODULE_FIRMWARE(I915_CSR_BXT);
 
-/*
-* SKL CSR registers for DC5 and DC6
-*/
-#define CSR_PROGRAM(i)			(0x80000 + (i) * 4)
-#define CSR_SSP_BASE_ADDR_GEN9		0x00002FC0
-#define CSR_HTP_ADDR_SKL		0x00500034
-#define CSR_SSP_BASE			0x8F074
-#define CSR_HTP_SKL			0x8F004
-#define CSR_LAST_WRITE			0x8F034
-#define CSR_LAST_WRITE_VALUE		0xc003b400
-/* MMIO address range for CSR program (0x80000 - 0x82FFF) */
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
+
 #define CSR_MAX_FW_SIZE			0x2FFF
 #define CSR_DEFAULT_FW_OFFSET		0xFFFFFFFF
-#define CSR_MMIO_START_RANGE	0x80000
-#define CSR_MMIO_END_RANGE		0x8FFFF
 
 struct intel_css_header {
 	/* 0x09 for DMC */
@@ -178,166 +167,134 @@
 };
 
 static const struct stepping_info skl_stepping_info[] = {
-		{'A', '0'}, {'B', '0'}, {'C', '0'},
-		{'D', '0'}, {'E', '0'}, {'F', '0'},
-		{'G', '0'}, {'H', '0'}, {'I', '0'}
+	{'A', '0'}, {'B', '0'}, {'C', '0'},
+	{'D', '0'}, {'E', '0'}, {'F', '0'},
+	{'G', '0'}, {'H', '0'}, {'I', '0'}
 };
 
-static struct stepping_info bxt_stepping_info[] = {
+static const struct stepping_info bxt_stepping_info[] = {
 	{'A', '0'}, {'A', '1'}, {'A', '2'},
 	{'B', '0'}, {'B', '1'}, {'B', '2'}
 };
 
-static char intel_get_stepping(struct drm_device *dev)
+static const struct stepping_info *intel_get_stepping_info(struct drm_device *dev)
 {
-	if (IS_SKYLAKE(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(skl_stepping_info)))
-		return skl_stepping_info[dev->pdev->revision].stepping;
-	else if (IS_BROXTON(dev) && (dev->pdev->revision <
-				ARRAY_SIZE(bxt_stepping_info)))
-		return bxt_stepping_info[dev->pdev->revision].stepping;
-	else
-		return -ENODATA;
-}
+	const struct stepping_info *si;
+	unsigned int size;
 
-static char intel_get_substepping(struct drm_device *dev)
-{
-	if (IS_SKYLAKE(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(skl_stepping_info)))
-		return skl_stepping_info[dev->pdev->revision].substepping;
-	else if (IS_BROXTON(dev) && (dev->pdev->revision <
-			ARRAY_SIZE(bxt_stepping_info)))
-		return bxt_stepping_info[dev->pdev->revision].substepping;
-	else
-		return -ENODATA;
-}
+	if (IS_SKYLAKE(dev)) {
+		size = ARRAY_SIZE(skl_stepping_info);
+		si = skl_stepping_info;
+	} else if (IS_BROXTON(dev)) {
+		size = ARRAY_SIZE(bxt_stepping_info);
+		si = bxt_stepping_info;
+	} else {
+		return NULL;
+	}
 
-/**
- * intel_csr_load_status_get() - to get firmware loading status.
- * @dev_priv: i915 device.
- *
- * This function helps to get the firmware loading status.
- *
- * Return: Firmware loading status.
- */
-enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
-{
-	enum csr_state state;
+	if (INTEL_REVID(dev) < size)
+		return si + INTEL_REVID(dev);
 
-	mutex_lock(&dev_priv->csr_lock);
-	state = dev_priv->csr.state;
-	mutex_unlock(&dev_priv->csr_lock);
-
-	return state;
-}
-
-/**
- * intel_csr_load_status_set() - help to set firmware loading status.
- * @dev_priv: i915 device.
- * @state: enumeration of firmware loading status.
- *
- * Set the firmware loading status.
- */
-void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
-			enum csr_state state)
-{
-	mutex_lock(&dev_priv->csr_lock);
-	dev_priv->csr.state = state;
-	mutex_unlock(&dev_priv->csr_lock);
+	return NULL;
 }
 
 /**
  * intel_csr_load_program() - write the firmware from memory to register.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * CSR firmware is read from a .bin file and kept in internal memory one time.
  * Everytime display comes back from low power state this function is called to
  * copy the firmware from internal memory to registers.
  */
-void intel_csr_load_program(struct drm_device *dev)
+void intel_csr_load_program(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 *payload = dev_priv->csr.dmc_payload;
 	uint32_t i, fw_size;
 
-	if (!IS_GEN9(dev)) {
+	if (!IS_GEN9(dev_priv)) {
 		DRM_ERROR("No CSR support available for this platform\n");
 		return;
 	}
 
-	/*
-	 * FIXME: Firmware gets lost on S3/S4, but not when entering system
-	 * standby or suspend-to-idle (which is just like forced runtime pm).
-	 * Unfortunately the ACPI subsystem doesn't yet give us a way to
-	 * differentiate this, hence figure it out with this hack.
-	 */
-	if (I915_READ(CSR_PROGRAM(0)))
+	if (!dev_priv->csr.dmc_payload) {
+		DRM_ERROR("Tried to program CSR with empty payload\n");
 		return;
+	}
 
-	mutex_lock(&dev_priv->csr_lock);
 	fw_size = dev_priv->csr.dmc_fw_size;
 	for (i = 0; i < fw_size; i++)
 		I915_WRITE(CSR_PROGRAM(i), payload[i]);
 
 	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
 		I915_WRITE(dev_priv->csr.mmioaddr[i],
-			dev_priv->csr.mmiodata[i]);
+			   dev_priv->csr.mmiodata[i]);
 	}
-
-	dev_priv->csr.state = FW_LOADED;
-	mutex_unlock(&dev_priv->csr_lock);
 }
 
-static void finish_csr_load(const struct firmware *fw, void *context)
+static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
+			      const struct firmware *fw)
 {
-	struct drm_i915_private *dev_priv = context;
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_css_header *css_header;
 	struct intel_package_header *package_header;
 	struct intel_dmc_header *dmc_header;
 	struct intel_csr *csr = &dev_priv->csr;
-	char stepping = intel_get_stepping(dev);
-	char substepping = intel_get_substepping(dev);
+	const struct stepping_info *stepping_info = intel_get_stepping_info(dev);
+	char stepping, substepping;
 	uint32_t dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
 	uint32_t i;
 	uint32_t *dmc_payload;
-	bool fw_loaded = false;
 
-	if (!fw) {
-		i915_firmware_load_error_print(csr->fw_path, 0);
-		goto out;
-	}
+	if (!fw)
+		return NULL;
 
-	if ((stepping == -ENODATA) || (substepping == -ENODATA)) {
+	if (!stepping_info) {
 		DRM_ERROR("Unknown stepping info, firmware loading failed\n");
-		goto out;
+		return NULL;
 	}
 
+	stepping = stepping_info->stepping;
+	substepping = stepping_info->substepping;
+
 	/* Extract CSS Header information*/
 	css_header = (struct intel_css_header *)fw->data;
 	if (sizeof(struct intel_css_header) !=
-		(css_header->header_len * 4)) {
+	    (css_header->header_len * 4)) {
 		DRM_ERROR("Firmware has wrong CSS header length %u bytes\n",
-			(css_header->header_len * 4));
-		goto out;
+			  (css_header->header_len * 4));
+		return NULL;
 	}
+
+	csr->version = css_header->version;
+
+	if (IS_SKYLAKE(dev) && csr->version < SKL_CSR_VERSION_REQUIRED) {
+		DRM_INFO("Refusing to load old Skylake DMC firmware v%u.%u,"
+			 " please upgrade to v%u.%u or later"
+			 " [https://01.org/linuxgraphics/intel-linux-graphics-firmwares].\n",
+			 CSR_VERSION_MAJOR(csr->version),
+			 CSR_VERSION_MINOR(csr->version),
+			 CSR_VERSION_MAJOR(SKL_CSR_VERSION_REQUIRED),
+			 CSR_VERSION_MINOR(SKL_CSR_VERSION_REQUIRED));
+		return NULL;
+	}
+
 	readcount += sizeof(struct intel_css_header);
 
 	/* Extract Package Header information*/
 	package_header = (struct intel_package_header *)
-					&fw->data[readcount];
+		&fw->data[readcount];
 	if (sizeof(struct intel_package_header) !=
-		(package_header->header_len * 4)) {
+	    (package_header->header_len * 4)) {
 		DRM_ERROR("Firmware has wrong package header length %u bytes\n",
-			(package_header->header_len * 4));
-		goto out;
+			  (package_header->header_len * 4));
+		return NULL;
 	}
 	readcount += sizeof(struct intel_package_header);
 
 	/* Search for dmc_offset to find firware binary. */
 	for (i = 0; i < package_header->num_entries; i++) {
 		if (package_header->fw_info[i].substepping == '*' &&
-			stepping == package_header->fw_info[i].stepping) {
+		    stepping == package_header->fw_info[i].stepping) {
 			dmc_offset = package_header->fw_info[i].offset;
 			break;
 		} else if (stepping == package_header->fw_info[i].stepping &&
@@ -345,12 +302,12 @@
 			dmc_offset = package_header->fw_info[i].offset;
 			break;
 		} else if (package_header->fw_info[i].stepping == '*' &&
-			package_header->fw_info[i].substepping == '*')
+			   package_header->fw_info[i].substepping == '*')
 			dmc_offset = package_header->fw_info[i].offset;
 	}
 	if (dmc_offset == CSR_DEFAULT_FW_OFFSET) {
 		DRM_ERROR("Firmware not supported for %c stepping\n", stepping);
-		goto out;
+		return NULL;
 	}
 	readcount += dmc_offset;
 
@@ -358,26 +315,26 @@
 	dmc_header = (struct intel_dmc_header *)&fw->data[readcount];
 	if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) {
 		DRM_ERROR("Firmware has wrong dmc header length %u bytes\n",
-				(dmc_header->header_len));
-		goto out;
+			  (dmc_header->header_len));
+		return NULL;
 	}
 	readcount += sizeof(struct intel_dmc_header);
 
 	/* Cache the dmc header info. */
 	if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) {
 		DRM_ERROR("Firmware has wrong mmio count %u\n",
-						dmc_header->mmio_count);
-		goto out;
+			  dmc_header->mmio_count);
+		return NULL;
 	}
 	csr->mmio_count = dmc_header->mmio_count;
 	for (i = 0; i < dmc_header->mmio_count; i++) {
 		if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE ||
-			dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
+		    dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
 			DRM_ERROR(" Firmware has wrong mmio address 0x%x\n",
-						dmc_header->mmioaddr[i]);
-			goto out;
+				  dmc_header->mmioaddr[i]);
+			return NULL;
 		}
-		csr->mmioaddr[i] = dmc_header->mmioaddr[i];
+		csr->mmioaddr[i] = _MMIO(dmc_header->mmioaddr[i]);
 		csr->mmiodata[i] = dmc_header->mmiodata[i];
 	}
 
@@ -385,56 +342,80 @@
 	nbytes = dmc_header->fw_size * 4;
 	if (nbytes > CSR_MAX_FW_SIZE) {
 		DRM_ERROR("CSR firmware too big (%u) bytes\n", nbytes);
-		goto out;
+		return NULL;
 	}
 	csr->dmc_fw_size = dmc_header->fw_size;
 
-	csr->dmc_payload = kmalloc(nbytes, GFP_KERNEL);
-	if (!csr->dmc_payload) {
+	dmc_payload = kmalloc(nbytes, GFP_KERNEL);
+	if (!dmc_payload) {
 		DRM_ERROR("Memory allocation failed for dmc payload\n");
-		goto out;
+		return NULL;
 	}
 
-	dmc_payload = csr->dmc_payload;
 	memcpy(dmc_payload, &fw->data[readcount], nbytes);
 
-	/* load csr program during system boot, as needed for DC states */
-	intel_csr_load_program(dev);
-	fw_loaded = true;
+	return dmc_payload;
+}
 
-	DRM_DEBUG_KMS("Finished loading %s\n", dev_priv->csr.fw_path);
+static void csr_load_work_fn(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv;
+	struct intel_csr *csr;
+	const struct firmware *fw;
+	int ret;
+
+	dev_priv = container_of(work, typeof(*dev_priv), csr.work);
+	csr = &dev_priv->csr;
+
+	ret = request_firmware(&fw, dev_priv->csr.fw_path,
+			       &dev_priv->dev->pdev->dev);
+	if (!fw)
+		goto out;
+
+	dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
+	if (!dev_priv->csr.dmc_payload)
+		goto out;
+
+	/* load csr program during system boot, as needed for DC states */
+	intel_csr_load_program(dev_priv);
+
 out:
-	if (fw_loaded)
-		intel_runtime_pm_put(dev_priv);
-	else
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
+	if (dev_priv->csr.dmc_payload) {
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+
+		DRM_INFO("Finished loading %s (v%u.%u)\n",
+			 dev_priv->csr.fw_path,
+			 CSR_VERSION_MAJOR(csr->version),
+			 CSR_VERSION_MINOR(csr->version));
+	} else {
+		DRM_ERROR("Failed to load DMC firmware, disabling rpm\n");
+	}
 
 	release_firmware(fw);
 }
 
 /**
  * intel_csr_ucode_init() - initialize the firmware loading.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * This function is called at the time of loading the display driver to read
  * firmware from a .bin file and copied into a internal memory.
  */
-void intel_csr_ucode_init(struct drm_device *dev)
+void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_csr *csr = &dev_priv->csr;
-	int ret;
 
-	if (!HAS_CSR(dev))
+	INIT_WORK(&dev_priv->csr.work, csr_load_work_fn);
+
+	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev_priv))
 		csr->fw_path = I915_CSR_SKL;
 	else if (IS_BROXTON(dev_priv))
 		csr->fw_path = I915_CSR_BXT;
 	else {
 		DRM_ERROR("Unexpected: no known CSR firmware for platform\n");
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
 		return;
 	}
 
@@ -444,43 +425,24 @@
 	 * Obtain a runtime pm reference, until CSR is loaded,
 	 * to avoid entering runtime-suspend.
 	 */
-	intel_runtime_pm_get(dev_priv);
+	intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
 
-	/* CSR supported for platform, load firmware */
-	ret = request_firmware_nowait(THIS_MODULE, true, csr->fw_path,
-				&dev_priv->dev->pdev->dev,
-				GFP_KERNEL, dev_priv,
-				finish_csr_load);
-	if (ret) {
-		i915_firmware_load_error_print(csr->fw_path, ret);
-		intel_csr_load_status_set(dev_priv, FW_FAILED);
-	}
+	schedule_work(&dev_priv->csr.work);
 }
 
 /**
  * intel_csr_ucode_fini() - unload the CSR firmware.
- * @dev: drm device.
+ * @dev_priv: i915 drm device.
  *
  * Firmmware unloading includes freeing the internal momory and reset the
  * firmware loading status.
  */
-void intel_csr_ucode_fini(struct drm_device *dev)
+void intel_csr_ucode_fini(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (!HAS_CSR(dev))
+	if (!HAS_CSR(dev_priv))
 		return;
 
-	intel_csr_load_status_set(dev_priv, FW_FAILED);
-	kfree(dev_priv->csr.dmc_payload);
-}
+	flush_work(&dev_priv->csr.work);
 
-void assert_csr_loaded(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE(intel_csr_load_status_get(dev_priv) != FW_LOADED,
-		  "CSR is not loaded.\n");
-	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
-		  "CSR program storage start is NULL\n");
-	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
-	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
+	kfree(dev_priv->csr.dmc_payload);
 }
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index b25e99a..4afb310 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -133,12 +133,12 @@
 	{ 0x00002016, 0x000000A0, 0x0 },
 	{ 0x00005012, 0x0000009B, 0x0 },
 	{ 0x00007011, 0x00000088, 0x0 },
-	{ 0x00009010, 0x000000C7, 0x0 },
+	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x0000009B, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x000000DF, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 };
 
 /* Skylake U */
@@ -146,12 +146,12 @@
 	{ 0x0000201B, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
 	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C7, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x0000201B, 0x0000009D, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 	{ 0x00002016, 0x00000088, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
 };
 
 /* Skylake Y */
@@ -159,12 +159,12 @@
 	{ 0x00000018, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
 	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C7, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80009010, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 	{ 0x00000018, 0x0000009D, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
-	{ 0x00007011, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80007011, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 	{ 0x00000018, 0x00000088, 0x0 },
-	{ 0x00005012, 0x000000C7, 0x0 },
+	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
 };
 
 /*
@@ -345,7 +345,7 @@
 static bool
 intel_dig_port_supports_hdmi(const struct intel_digital_port *intel_dig_port)
 {
-	return intel_dig_port->hdmi.hdmi_reg;
+	return i915_mmio_reg_valid(intel_dig_port->hdmi.hdmi_reg);
 }
 
 static const struct ddi_buf_trans *skl_get_buf_trans_dp(struct drm_device *dev,
@@ -448,7 +448,7 @@
 		bxt_ddi_vswing_sequence(dev, hdmi_level, port,
 					INTEL_OUTPUT_HDMI);
 		return;
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ddi_translations_fdi = NULL;
 		ddi_translations_dp =
 				skl_get_buf_trans_dp(dev, &n_dp_entries);
@@ -576,7 +576,7 @@
 static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
 				    enum port port)
 {
-	uint32_t reg = DDI_BUF_CTL(port);
+	i915_reg_t reg = DDI_BUF_CTL(port);
 	int i;
 
 	for (i = 0; i < 16; i++) {
@@ -931,7 +931,8 @@
 	/* Otherwise a < c && b >= d, do nothing */
 }
 
-static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, int reg)
+static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv,
+				   i915_reg_t reg)
 {
 	int refclk = LC_FREQ;
 	int n, p, r;
@@ -967,7 +968,7 @@
 static int skl_calc_wrpll_link(struct drm_i915_private *dev_priv,
 			       uint32_t dpll)
 {
-	uint32_t cfgcr1_reg, cfgcr2_reg;
+	i915_reg_t cfgcr1_reg, cfgcr2_reg;
 	uint32_t cfgcr1_val, cfgcr2_val;
 	uint32_t p0, p1, p2, dco_freq;
 
@@ -1112,10 +1113,10 @@
 		link_clock = 270000;
 		break;
 	case PORT_CLK_SEL_WRPLL1:
-		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL1);
+		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL(0));
 		break;
 	case PORT_CLK_SEL_WRPLL2:
-		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL2);
+		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL(1));
 		break;
 	case PORT_CLK_SEL_SPLL:
 		pll = I915_READ(SPLL_CTL) & SPLL_PLL_FREQ_MASK;
@@ -1184,7 +1185,7 @@
 
 	if (INTEL_INFO(dev)->gen <= 8)
 		hsw_ddi_clock_get(encoder, pipe_config);
-	else if (IS_SKYLAKE(dev))
+	else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_ddi_clock_get(encoder, pipe_config);
 	else if (IS_BROXTON(dev))
 		bxt_ddi_clock_get(encoder, pipe_config);
@@ -1286,6 +1287,18 @@
 		}
 
 		crtc_state->ddi_pll_sel = PORT_CLK_SEL_WRPLL(pll->id);
+	} else if (crtc_state->ddi_pll_sel == PORT_CLK_SEL_SPLL) {
+		struct drm_atomic_state *state = crtc_state->base.state;
+		struct intel_shared_dpll_config *spll =
+			&intel_atomic_get_shared_dpll_state(state)[DPLL_ID_SPLL];
+
+		if (spll->crtc_mask &&
+		    WARN_ON(spll->hw_state.spll != crtc_state->dpll_hw_state.spll))
+			return false;
+
+		crtc_state->shared_dpll = DPLL_ID_SPLL;
+		spll->hw_state.spll = crtc_state->dpll_hw_state.spll;
+		spll->crtc_mask |= 1 << intel_crtc->pipe;
 	}
 
 	return true;
@@ -1768,7 +1781,7 @@
 	struct intel_encoder *intel_encoder =
 		intel_ddi_get_crtc_new_encoder(crtc_state);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		return skl_ddi_pll_select(intel_crtc, crtc_state,
 					  intel_encoder);
 	else if (IS_BROXTON(dev))
@@ -1930,7 +1943,7 @@
 void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
 				       enum transcoder cpu_transcoder)
 {
-	uint32_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
+	i915_reg_t reg = TRANS_DDI_FUNC_CTL(cpu_transcoder);
 	uint32_t val = I915_READ(reg);
 
 	val &= ~(TRANS_DDI_FUNC_ENABLE | TRANS_DDI_PORT_MASK | TRANS_DDI_DP_VC_PAYLOAD_ALLOC);
@@ -2085,21 +2098,21 @@
 			iboost = dp_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_dp(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_EDP) {
 		if (dp_iboost) {
 			iboost = dp_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_edp(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		if (hdmi_iboost) {
 			iboost = hdmi_iboost;
 		} else {
 			ddi_translations = skl_get_buf_trans_hdmi(dev, &n_entries);
-			iboost = ddi_translations[port].i_boost;
+			iboost = ddi_translations[level].i_boost;
 		}
 	} else {
 		return;
@@ -2251,7 +2264,7 @@
 
 	level = translate_signal_level(signal_levels);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_ddi_set_iboost(dev, level, port, encoder->type);
 	else if (IS_BROXTON(dev))
 		bxt_ddi_vswing_sequence(dev, level, port, encoder->type);
@@ -2259,6 +2272,50 @@
 	return DDI_BUF_TRANS_SELECT(level);
 }
 
+void intel_ddi_clk_select(struct intel_encoder *encoder,
+			  const struct intel_crtc_state *pipe_config)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum port port = intel_ddi_get_encoder_port(encoder);
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		uint32_t dpll = pipe_config->ddi_pll_sel;
+		uint32_t val;
+
+		/*
+		 * DPLL0 is used for eDP and is the only "private" DPLL (as
+		 * opposed to shared) on SKL
+		 */
+		if (encoder->type == INTEL_OUTPUT_EDP) {
+			WARN_ON(dpll != SKL_DPLL0);
+
+			val = I915_READ(DPLL_CTRL1);
+
+			val &= ~(DPLL_CTRL1_HDMI_MODE(dpll) |
+				 DPLL_CTRL1_SSC(dpll) |
+				 DPLL_CTRL1_LINK_RATE_MASK(dpll));
+			val |= pipe_config->dpll_hw_state.ctrl1 << (dpll * 6);
+
+			I915_WRITE(DPLL_CTRL1, val);
+			POSTING_READ(DPLL_CTRL1);
+		}
+
+		/* DDI -> PLL mapping  */
+		val = I915_READ(DPLL_CTRL2);
+
+		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
+			DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
+		val |= (DPLL_CTRL2_DDI_CLK_SEL(dpll, port) |
+			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
+
+		I915_WRITE(DPLL_CTRL2, val);
+
+	} else if (INTEL_INFO(dev_priv)->gen < 9) {
+		WARN_ON(pipe_config->ddi_pll_sel == PORT_CLK_SEL_NONE);
+		I915_WRITE(PORT_CLK_SEL(port), pipe_config->ddi_pll_sel);
+	}
+}
+
 static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
@@ -2274,42 +2331,7 @@
 		intel_edp_panel_on(intel_dp);
 	}
 
-	if (IS_SKYLAKE(dev)) {
-		uint32_t dpll = crtc->config->ddi_pll_sel;
-		uint32_t val;
-
-		/*
-		 * DPLL0 is used for eDP and is the only "private" DPLL (as
-		 * opposed to shared) on SKL
-		 */
-		if (type == INTEL_OUTPUT_EDP) {
-			WARN_ON(dpll != SKL_DPLL0);
-
-			val = I915_READ(DPLL_CTRL1);
-
-			val &= ~(DPLL_CTRL1_HDMI_MODE(dpll) |
-				 DPLL_CTRL1_SSC(dpll) |
-				 DPLL_CTRL1_LINK_RATE_MASK(dpll));
-			val |= crtc->config->dpll_hw_state.ctrl1 << (dpll * 6);
-
-			I915_WRITE(DPLL_CTRL1, val);
-			POSTING_READ(DPLL_CTRL1);
-		}
-
-		/* DDI -> PLL mapping  */
-		val = I915_READ(DPLL_CTRL2);
-
-		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
-			DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
-		val |= (DPLL_CTRL2_DDI_CLK_SEL(dpll, port) |
-			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
-
-		I915_WRITE(DPLL_CTRL2, val);
-
-	} else if (INTEL_INFO(dev)->gen < 9) {
-		WARN_ON(crtc->config->ddi_pll_sel == PORT_CLK_SEL_NONE);
-		I915_WRITE(PORT_CLK_SEL(port), crtc->config->ddi_pll_sel);
-	}
+	intel_ddi_clk_select(intel_encoder, crtc->config);
 
 	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2369,7 +2391,7 @@
 		intel_edp_panel_off(intel_dp);
 	}
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) |
 					DPLL_CTRL2_DDI_CLK_OFF(port)));
 	else if (INTEL_INFO(dev)->gen < 9)
@@ -2437,7 +2459,7 @@
 	}
 }
 
-static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
+static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv,
 			       struct intel_shared_dpll *pll)
 {
 	I915_WRITE(WRPLL_CTL(pll->id), pll->config.hw_state.wrpll);
@@ -2445,9 +2467,17 @@
 	udelay(20);
 }
 
-static void hsw_ddi_pll_disable(struct drm_i915_private *dev_priv,
+static void hsw_ddi_spll_enable(struct drm_i915_private *dev_priv,
 				struct intel_shared_dpll *pll)
 {
+	I915_WRITE(SPLL_CTL, pll->config.hw_state.spll);
+	POSTING_READ(SPLL_CTL);
+	udelay(20);
+}
+
+static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv,
+				  struct intel_shared_dpll *pll)
+{
 	uint32_t val;
 
 	val = I915_READ(WRPLL_CTL(pll->id));
@@ -2455,9 +2485,19 @@
 	POSTING_READ(WRPLL_CTL(pll->id));
 }
 
-static bool hsw_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
-				     struct intel_shared_dpll *pll,
-				     struct intel_dpll_hw_state *hw_state)
+static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv,
+				 struct intel_shared_dpll *pll)
+{
+	uint32_t val;
+
+	val = I915_READ(SPLL_CTL);
+	I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
+	POSTING_READ(SPLL_CTL);
+}
+
+static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
+				       struct intel_shared_dpll *pll,
+				       struct intel_dpll_hw_state *hw_state)
 {
 	uint32_t val;
 
@@ -2470,25 +2510,50 @@
 	return val & WRPLL_PLL_ENABLE;
 }
 
+static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
+				      struct intel_shared_dpll *pll,
+				      struct intel_dpll_hw_state *hw_state)
+{
+	uint32_t val;
+
+	if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+		return false;
+
+	val = I915_READ(SPLL_CTL);
+	hw_state->spll = val;
+
+	return val & SPLL_PLL_ENABLE;
+}
+
+
 static const char * const hsw_ddi_pll_names[] = {
 	"WRPLL 1",
 	"WRPLL 2",
+	"SPLL"
 };
 
 static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv)
 {
 	int i;
 
-	dev_priv->num_shared_dpll = 2;
+	dev_priv->num_shared_dpll = 3;
 
-	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+	for (i = 0; i < 2; i++) {
 		dev_priv->shared_dplls[i].id = i;
 		dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i];
-		dev_priv->shared_dplls[i].disable = hsw_ddi_pll_disable;
-		dev_priv->shared_dplls[i].enable = hsw_ddi_pll_enable;
+		dev_priv->shared_dplls[i].disable = hsw_ddi_wrpll_disable;
+		dev_priv->shared_dplls[i].enable = hsw_ddi_wrpll_enable;
 		dev_priv->shared_dplls[i].get_hw_state =
-			hsw_ddi_pll_get_hw_state;
+			hsw_ddi_wrpll_get_hw_state;
 	}
+
+	/* SPLL is special, but needs to be initialized anyway.. */
+	dev_priv->shared_dplls[i].id = i;
+	dev_priv->shared_dplls[i].name = hsw_ddi_pll_names[i];
+	dev_priv->shared_dplls[i].disable = hsw_ddi_spll_disable;
+	dev_priv->shared_dplls[i].enable = hsw_ddi_spll_enable;
+	dev_priv->shared_dplls[i].get_hw_state = hsw_ddi_spll_get_hw_state;
+
 }
 
 static const char * const skl_ddi_pll_names[] = {
@@ -2498,7 +2563,7 @@
 };
 
 struct skl_dpll_regs {
-	u32 ctl, cfgcr1, cfgcr2;
+	i915_reg_t ctl, cfgcr1, cfgcr2;
 };
 
 /* this array is indexed by the *shared* pll id */
@@ -2511,13 +2576,13 @@
 	},
 	{
 		/* DPLL 2 */
-		.ctl = WRPLL_CTL1,
+		.ctl = WRPLL_CTL(0),
 		.cfgcr1 = DPLL_CFGCR1(SKL_DPLL2),
 		.cfgcr2 = DPLL_CFGCR2(SKL_DPLL2),
 	},
 	{
 		/* DPLL 3 */
-		.ctl = WRPLL_CTL2,
+		.ctl = WRPLL_CTL(1),
 		.cfgcr1 = DPLL_CFGCR1(SKL_DPLL3),
 		.cfgcr2 = DPLL_CFGCR2(SKL_DPLL3),
 	},
@@ -2937,22 +3002,22 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t val = I915_READ(LCPLL_CTL);
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skl_shared_dplls_init(dev_priv);
 	else if (IS_BROXTON(dev))
 		bxt_shared_dplls_init(dev_priv);
 	else
 		hsw_shared_dplls_init(dev_priv);
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		int cdclk_freq;
 
 		cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
 		dev_priv->skl_boot_cdclk = cdclk_freq;
+		if (skl_sanitize_cdclk(dev_priv))
+			DRM_DEBUG_KMS("Sanitized cdclk programmed by pre-os\n");
 		if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
 			DRM_ERROR("LCPLL1 is disabled\n");
-		else
-			intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
 	} else if (IS_BROXTON(dev)) {
 		broxton_init_cdclk(dev);
 		broxton_ddi_phy_init(dev);
@@ -2971,11 +3036,11 @@
 	}
 }
 
-void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder)
+void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 {
-	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv =
+		to_i915(intel_dig_port->base.base.dev);
 	enum port port = intel_dig_port->port;
 	uint32_t val;
 	bool wait = false;
@@ -3086,7 +3151,7 @@
 		pipe_config->has_hdmi_sink = true;
 		intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 
-		if (intel_hdmi->infoframe_enabled(&encoder->base))
+		if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 			pipe_config->has_infoframe = true;
 		break;
 	case TRANS_DDI_MODE_SELECT_DVI:
@@ -3219,7 +3284,7 @@
 	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, encoder, &intel_ddi_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_ddi_compute_config;
 	intel_encoder->enable = intel_enable_ddi;
@@ -3234,6 +3299,20 @@
 					  (DDI_BUF_PORT_REVERSAL |
 					   DDI_A_4_LANES);
 
+	/*
+	 * Bspec says that DDI_A_4_LANES is the only supported configuration
+	 * for Broxton.  Yet some BIOS fail to set this bit on port A if eDP
+	 * wasn't lit up at boot.  Force this bit on in our internal
+	 * configuration so that we use the proper lane count for our
+	 * calculations.
+	 */
+	if (IS_BROXTON(dev) && port == PORT_A) {
+		if (!(intel_dig_port->saved_port_bits & DDI_A_4_LANES)) {
+			DRM_DEBUG_KMS("BXT BIOS forgot to set DDI_A_4_LANES for port A; fixing\n");
+			intel_dig_port->saved_port_bits |= DDI_A_4_LANES;
+		}
+	}
+
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
 	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 	intel_encoder->cloneable = 0;
@@ -3247,8 +3326,7 @@
 		 * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 		 * interrupts to check the external panel connection.
 		 */
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0)
-					 && port == PORT_B)
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1) && port == PORT_B)
 			dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port;
 		else
 			dev_priv->hotplug.irq_port[port] = intel_dig_port;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f62ffc0..bda6b9c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -44,6 +44,8 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
 #include <linux/dma_remapping.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
 
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
@@ -1095,7 +1097,7 @@
 static bool pipe_dsl_stopped(struct drm_device *dev, enum pipe pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg = PIPEDSL(pipe);
+	i915_reg_t reg = PIPEDSL(pipe);
 	u32 line1, line2;
 	u32 line_mask;
 
@@ -1135,7 +1137,7 @@
 	enum pipe pipe = crtc->pipe;
 
 	if (INTEL_INFO(dev)->gen >= 4) {
-		int reg = PIPECONF(cpu_transcoder);
+		i915_reg_t reg = PIPECONF(cpu_transcoder);
 
 		/* Wait for the Pipe State to go off */
 		if (wait_for((I915_READ(reg) & I965_PIPECONF_ACTIVE) == 0,
@@ -1285,7 +1287,7 @@
 			   enum pipe pipe)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int pp_reg;
+	i915_reg_t pp_reg;
 	u32 val;
 	enum pipe panel_pipe = PIPE_A;
 	bool locked = true;
@@ -1480,8 +1482,7 @@
 		return false;
 
 	if (HAS_PCH_CPT(dev_priv->dev)) {
-		u32	trans_dp_ctl_reg = TRANS_DP_CTL(pipe);
-		u32	trans_dp_ctl = I915_READ(trans_dp_ctl_reg);
+		u32 trans_dp_ctl = I915_READ(TRANS_DP_CTL(pipe));
 		if ((trans_dp_ctl & TRANS_DP_PORT_SEL_MASK) != port_sel)
 			return false;
 	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
@@ -1545,12 +1546,13 @@
 }
 
 static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv,
-				   enum pipe pipe, int reg, u32 port_sel)
+				   enum pipe pipe, i915_reg_t reg,
+				   u32 port_sel)
 {
 	u32 val = I915_READ(reg);
 	I915_STATE_WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val),
 	     "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n",
-	     reg, pipe_name(pipe));
+	     i915_mmio_reg_offset(reg), pipe_name(pipe));
 
 	I915_STATE_WARN(HAS_PCH_IBX(dev_priv->dev) && (val & DP_PORT_EN) == 0
 	     && (val & DP_PIPEB_SELECT),
@@ -1558,12 +1560,12 @@
 }
 
 static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv,
-				     enum pipe pipe, int reg)
+				     enum pipe pipe, i915_reg_t reg)
 {
 	u32 val = I915_READ(reg);
 	I915_STATE_WARN(hdmi_pipe_enabled(dev_priv, pipe, val),
 	     "PCH HDMI (0x%08x) enabled on transcoder %c, should be disabled\n",
-	     reg, pipe_name(pipe));
+	     i915_mmio_reg_offset(reg), pipe_name(pipe));
 
 	I915_STATE_WARN(HAS_PCH_IBX(dev_priv->dev) && (val & SDVO_ENABLE) == 0
 	     && (val & SDVO_PIPE_B_SELECT),
@@ -1599,7 +1601,7 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int reg = DPLL(crtc->pipe);
+	i915_reg_t reg = DPLL(crtc->pipe);
 	u32 dpll = pipe_config->dpll_hw_state.dpll;
 
 	assert_pipe_disabled(dev_priv, crtc->pipe);
@@ -1688,7 +1690,7 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int reg = DPLL(crtc->pipe);
+	i915_reg_t reg = DPLL(crtc->pipe);
 	u32 dpll = crtc->config->dpll_hw_state.dpll;
 
 	assert_pipe_disabled(dev_priv, crtc->pipe);
@@ -1837,7 +1839,7 @@
 			 unsigned int expected_mask)
 {
 	u32 port_mask;
-	int dpll_reg;
+	i915_reg_t dpll_reg;
 
 	switch (dport->port) {
 	case PORT_B:
@@ -1962,7 +1964,8 @@
 	struct drm_device *dev = dev_priv->dev;
 	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t reg, val, pipeconf_val;
+	i915_reg_t reg;
+	uint32_t val, pipeconf_val;
 
 	/* PCH only available on ILK+ */
 	BUG_ON(!HAS_PCH_SPLIT(dev));
@@ -2051,7 +2054,8 @@
 					    enum pipe pipe)
 {
 	struct drm_device *dev = dev_priv->dev;
-	uint32_t reg, val;
+	i915_reg_t reg;
+	uint32_t val;
 
 	/* FDI relies on the transcoder */
 	assert_fdi_tx_disabled(dev_priv, pipe);
@@ -2068,7 +2072,7 @@
 	if (wait_for((I915_READ(reg) & TRANS_STATE_ENABLE) == 0, 50))
 		DRM_ERROR("failed to disable transcoder %c\n", pipe_name(pipe));
 
-	if (!HAS_PCH_IBX(dev)) {
+	if (HAS_PCH_CPT(dev)) {
 		/* Workaround: Clear the timing override chicken bit again. */
 		reg = TRANS_CHICKEN2(pipe);
 		val = I915_READ(reg);
@@ -2106,10 +2110,9 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum pipe pipe = crtc->pipe;
-	enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
-								      pipe);
+	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	enum pipe pch_transcoder;
-	int reg;
+	i915_reg_t reg;
 	u32 val;
 
 	DRM_DEBUG_KMS("enabling pipe %c\n", pipe_name(pipe));
@@ -2129,7 +2132,7 @@
 	 * need the check.
 	 */
 	if (HAS_GMCH_DISPLAY(dev_priv->dev))
-		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI))
+		if (crtc->config->has_dsi_encoder)
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
@@ -2170,7 +2173,7 @@
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
 	enum pipe pipe = crtc->pipe;
-	int reg;
+	i915_reg_t reg;
 	u32 val;
 
 	DRM_DEBUG_KMS("disabling pipe %c\n", pipe_name(pipe));
@@ -2269,20 +2272,20 @@
 					       fb_format_modifier, 0));
 }
 
-static int
+static void
 intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb,
 			const struct drm_plane_state *plane_state)
 {
-	struct intel_rotation_info *info = &view->rotation_info;
+	struct intel_rotation_info *info = &view->params.rotation_info;
 	unsigned int tile_height, tile_pitch;
 
 	*view = i915_ggtt_view_normal;
 
 	if (!plane_state)
-		return 0;
+		return;
 
 	if (!intel_rotation_90_or_270(plane_state->rotation))
-		return 0;
+		return;
 
 	*view = i915_ggtt_view_rotated;
 
@@ -2309,8 +2312,6 @@
 		info->size_uv = info->width_pages_uv * info->height_pages_uv *
 				PAGE_SIZE;
 	}
-
-	return 0;
 }
 
 static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
@@ -2329,9 +2330,7 @@
 int
 intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 			   struct drm_framebuffer *fb,
-			   const struct drm_plane_state *plane_state,
-			   struct intel_engine_cs *pipelined,
-			   struct drm_i915_gem_request **pipelined_request)
+			   const struct drm_plane_state *plane_state)
 {
 	struct drm_device *dev = fb->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2366,9 +2365,7 @@
 		return -EINVAL;
 	}
 
-	ret = intel_fill_fb_ggtt_view(&view, fb, plane_state);
-	if (ret)
-		return ret;
+	intel_fill_fb_ggtt_view(&view, fb, plane_state);
 
 	/* Note that the w/a also requires 64 PTE of padding following the
 	 * bo. We currently fill all unused PTE with the shadow page and so
@@ -2387,11 +2384,10 @@
 	 */
 	intel_runtime_pm_get(dev_priv);
 
-	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined,
-						   pipelined_request, &view);
+	ret = i915_gem_object_pin_to_display_plane(obj, alignment,
+						   &view);
 	if (ret)
-		goto err_interruptible;
+		goto err_pm;
 
 	/* Install a fence for tiled scan-out. Pre-i965 always needs a
 	 * fence, whereas 965+ only requires a fence if using
@@ -2417,14 +2413,12 @@
 		i915_gem_object_pin_fence(obj);
 	}
 
-	dev_priv->mm.interruptible = true;
 	intel_runtime_pm_put(dev_priv);
 	return 0;
 
 err_unpin:
 	i915_gem_object_unpin_from_display_plane(obj, &view);
-err_interruptible:
-	dev_priv->mm.interruptible = true;
+err_pm:
 	intel_runtime_pm_put(dev_priv);
 	return ret;
 }
@@ -2434,12 +2428,10 @@
 {
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct i915_ggtt_view view;
-	int ret;
 
 	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
 
-	ret = intel_fill_fb_ggtt_view(&view, fb, plane_state);
-	WARN_ONCE(ret, "Couldn't get view from plane state!");
+	intel_fill_fb_ggtt_view(&view, fb, plane_state);
 
 	if (view.type == I915_GGTT_VIEW_NORMAL)
 		i915_gem_object_unpin_fence(obj);
@@ -2646,11 +2638,13 @@
 	return;
 
 valid_fb:
-	plane_state->src_x = plane_state->src_y = 0;
+	plane_state->src_x = 0;
+	plane_state->src_y = 0;
 	plane_state->src_w = fb->width << 16;
 	plane_state->src_h = fb->height << 16;
 
-	plane_state->crtc_x = plane_state->src_y = 0;
+	plane_state->crtc_x = 0;
+	plane_state->crtc_y = 0;
 	plane_state->crtc_w = fb->width;
 	plane_state->crtc_h = fb->height;
 
@@ -2678,7 +2672,7 @@
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg = DSPCNTR(plane);
+	i915_reg_t reg = DSPCNTR(plane);
 	int pixel_size;
 
 	if (!visible || !fb) {
@@ -2808,7 +2802,7 @@
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg = DSPCNTR(plane);
+	i915_reg_t reg = DSPCNTR(plane);
 	int pixel_size;
 
 	if (!visible || !fb) {
@@ -2933,30 +2927,32 @@
 	}
 }
 
-unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
-				     struct drm_i915_gem_object *obj,
-				     unsigned int plane)
+u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
+			   struct drm_i915_gem_object *obj,
+			   unsigned int plane)
 {
-	const struct i915_ggtt_view *view = &i915_ggtt_view_normal;
+	struct i915_ggtt_view view;
 	struct i915_vma *vma;
-	unsigned char *offset;
+	u64 offset;
 
-	if (intel_rotation_90_or_270(intel_plane->base.state->rotation))
-		view = &i915_ggtt_view_rotated;
+	intel_fill_fb_ggtt_view(&view, intel_plane->base.fb,
+				intel_plane->base.state);
 
-	vma = i915_gem_obj_to_ggtt_view(obj, view);
+	vma = i915_gem_obj_to_ggtt_view(obj, &view);
 	if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n",
-		view->type))
+		view.type))
 		return -1;
 
-	offset = (unsigned char *)vma->node.start;
+	offset = vma->node.start;
 
 	if (plane == 1) {
-		offset += vma->ggtt_view.rotation_info.uv_start_page *
+		offset += vma->ggtt_view.params.rotation_info.uv_start_page *
 			  PAGE_SIZE;
 	}
 
-	return (unsigned long)offset;
+	WARN_ON(upper_32_bits(offset));
+
+	return lower_32_bits(offset);
 }
 
 static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
@@ -3082,7 +3078,7 @@
 	u32 tile_height, plane_offset, plane_size;
 	unsigned int rotation;
 	int x_offset, y_offset;
-	unsigned long surf_addr;
+	u32 surf_addr;
 	struct intel_crtc_state *crtc_state = intel_crtc->config;
 	struct intel_plane_state *plane_state;
 	int src_x = 0, src_y = 0, src_w = 0, src_h = 0;
@@ -3180,8 +3176,8 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->fbc.disable_fbc)
-		dev_priv->fbc.disable_fbc(dev_priv);
+	if (dev_priv->fbc.deactivate)
+		dev_priv->fbc.deactivate(dev_priv);
 
 	dev_priv->display.update_primary_plane(crtc, fb, x, y);
 
@@ -3210,10 +3206,9 @@
 		struct intel_plane_state *plane_state;
 
 		drm_modeset_lock_crtc(crtc, &plane->base);
-
 		plane_state = to_intel_plane_state(plane->base.state);
 
-		if (plane_state->base.fb)
+		if (crtc->state->active && plane_state->base.fb)
 			plane->commit_plane(&plane->base, plane_state);
 
 		drm_modeset_unlock_crtc(crtc);
@@ -3289,32 +3284,6 @@
 	drm_modeset_unlock_all(dev);
 }
 
-static void
-intel_finish_fb(struct drm_framebuffer *old_fb)
-{
-	struct drm_i915_gem_object *obj = intel_fb_obj(old_fb);
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	bool was_interruptible = dev_priv->mm.interruptible;
-	int ret;
-
-	/* Big Hammer, we also need to ensure that any pending
-	 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
-	 * current scanout is retired before unpinning the old
-	 * framebuffer. Note that we rely on userspace rendering
-	 * into the buffer attached to the pipe they are waiting
-	 * on. If not, userspace generates a GPU hang with IPEHR
-	 * point to the MI_WAIT_FOR_EVENT.
-	 *
-	 * This should only fail upon a hung GPU, in which case we
-	 * can safely continue.
-	 */
-	dev_priv->mm.interruptible = false;
-	ret = i915_gem_object_wait_rendering(obj, true);
-	dev_priv->mm.interruptible = was_interruptible;
-
-	WARN_ON(ret);
-}
-
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3384,7 +3353,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* enable normal train */
 	reg = FDI_TX_CTL(pipe);
@@ -3426,7 +3396,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, tries;
+	i915_reg_t reg;
+	u32 temp, tries;
 
 	/* FDI needs bits from pipe first */
 	assert_pipe_enabled(dev_priv, pipe);
@@ -3526,7 +3497,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i, retry;
+	i915_reg_t reg;
+	u32 temp, i, retry;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -3658,7 +3630,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i, j;
+	i915_reg_t reg;
+	u32 temp, i, j;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -3775,8 +3748,8 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
-
+	i915_reg_t reg;
+	u32 temp;
 
 	/* enable PCH FDI RX PLL, wait warmup plus DMI latency */
 	reg = FDI_RX_CTL(pipe);
@@ -3812,7 +3785,8 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* Switch from PCDclk to Rawclk */
 	reg = FDI_RX_CTL(pipe);
@@ -3842,7 +3816,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	i915_reg_t reg;
+	u32 temp;
 
 	/* disable CPU FDI tx and PCH FDI rx */
 	reg = FDI_TX_CTL(pipe);
@@ -3935,15 +3910,23 @@
 				 work->pending_flip_obj);
 }
 
-void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
+static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	long ret;
 
 	WARN_ON(waitqueue_active(&dev_priv->pending_flip_queue));
-	if (WARN_ON(wait_event_timeout(dev_priv->pending_flip_queue,
-				       !intel_crtc_has_pending_flip(crtc),
-				       60*HZ) == 0)) {
+
+	ret = wait_event_interruptible_timeout(
+					dev_priv->pending_flip_queue,
+					!intel_crtc_has_pending_flip(crtc),
+					60*HZ);
+
+	if (ret < 0)
+		return ret;
+
+	if (ret == 0) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 		spin_lock_irq(&dev->event_lock);
@@ -3954,11 +3937,7 @@
 		spin_unlock_irq(&dev->event_lock);
 	}
 
-	if (crtc->primary->fb) {
-		mutex_lock(&dev->struct_mutex);
-		intel_finish_fb(crtc->primary->fb);
-		mutex_unlock(&dev->struct_mutex);
-	}
+	return 0;
 }
 
 /* Program iCLKIP clock to the desired frequency */
@@ -4118,6 +4097,22 @@
 	}
 }
 
+/* Return which DP Port should be selected for Transcoder DP control */
+static enum port
+intel_trans_dp_port_sel(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct intel_encoder *encoder;
+
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		if (encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
+		    encoder->type == INTEL_OUTPUT_EDP)
+			return enc_to_dig_port(&encoder->base)->port;
+	}
+
+	return -1;
+}
+
 /*
  * Enable PCH resources required for PCH ports:
  *   - PCH PLLs
@@ -4132,7 +4127,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	u32 temp;
 
 	assert_pch_transcoder_disabled(dev_priv, pipe);
 
@@ -4144,6 +4139,12 @@
 	I915_WRITE(FDI_RX_TUSIZE1(pipe),
 		   I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK);
 
+	/*
+	 * Sometimes spurious CPU pipe underruns happen during FDI
+	 * training, at least with VGA+HDMI cloning. Suppress them.
+	 */
+	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
 	/* For PCH output, training FDI link */
 	dev_priv->display.fdi_link_train(crtc);
 
@@ -4177,10 +4178,14 @@
 
 	intel_fdi_normal_train(crtc);
 
+	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	/* For PCH DP, enable TRANS_DP_CTL */
 	if (HAS_PCH_CPT(dev) && intel_crtc->config->has_dp_encoder) {
+		const struct drm_display_mode *adjusted_mode =
+			&intel_crtc->config->base.adjusted_mode;
 		u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5;
-		reg = TRANS_DP_CTL(pipe);
+		i915_reg_t reg = TRANS_DP_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~(TRANS_DP_PORT_SEL_MASK |
 			  TRANS_DP_SYNC_MASK |
@@ -4188,19 +4193,19 @@
 		temp |= TRANS_DP_OUTPUT_ENABLE;
 		temp |= bpc << 9; /* same format but at 11:9 */
 
-		if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
+		if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 			temp |= TRANS_DP_HSYNC_ACTIVE_HIGH;
-		if (crtc->mode.flags & DRM_MODE_FLAG_PVSYNC)
+		if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 			temp |= TRANS_DP_VSYNC_ACTIVE_HIGH;
 
 		switch (intel_trans_dp_port_sel(crtc)) {
-		case PCH_DP_B:
+		case PORT_B:
 			temp |= TRANS_DP_PORT_SEL_B;
 			break;
-		case PCH_DP_C:
+		case PORT_C:
 			temp |= TRANS_DP_PORT_SEL_C;
 			break;
-		case PCH_DP_D:
+		case PORT_D:
 			temp |= TRANS_DP_PORT_SEL_D;
 			break;
 		default:
@@ -4237,6 +4242,7 @@
 	struct intel_shared_dpll *pll;
 	struct intel_shared_dpll_config *shared_dpll;
 	enum intel_dpll_id i;
+	int max = dev_priv->num_shared_dpll;
 
 	shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state);
 
@@ -4271,9 +4277,11 @@
 		WARN_ON(shared_dpll[i].crtc_mask);
 
 		goto found;
-	}
+	} else if (INTEL_INFO(dev_priv)->gen < 9 && HAS_DDI(dev_priv))
+		/* Do not consider SPLL */
+		max = 2;
 
-	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+	for (i = 0; i < max; i++) {
 		pll = &dev_priv->shared_dplls[i];
 
 		/* Only want to check enabled timings first */
@@ -4337,7 +4345,7 @@
 static void cpt_verify_modeset(struct drm_device *dev, int pipe)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int dslreg = PIPEDSL(pipe);
+	i915_reg_t dslreg = PIPEDSL(pipe);
 	u32 temp;
 
 	temp = I915_READ(dslreg);
@@ -4630,7 +4638,7 @@
 		return;
 
 	if (HAS_GMCH_DISPLAY(dev_priv->dev)) {
-		if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI))
+		if (intel_crtc->config->has_dsi_encoder)
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
@@ -4647,7 +4655,7 @@
 	}
 
 	for (i = 0; i < 256; i++) {
-		u32 palreg;
+		i915_reg_t palreg;
 
 		if (HAS_GMCH_DISPLAY(dev))
 			palreg = PALETTE(pipe, i);
@@ -4726,9 +4734,9 @@
 	if (IS_GEN2(dev))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 
-	/* Underruns don't raise interrupts, so check manually. */
-	if (HAS_GMCH_DISPLAY(dev))
-		i9xx_check_fifo_underruns(dev_priv);
+	/* Underruns don't always raise interrupts, so check manually. */
+	intel_check_cpu_fifo_underruns(dev_priv);
+	intel_check_pch_fifo_underruns(dev_priv);
 }
 
 /**
@@ -4786,8 +4794,6 @@
 {
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
 	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_plane *plane;
 
 	if (atomic->wait_vblank)
 		intel_wait_for_vblank(dev, crtc->pipe);
@@ -4801,15 +4807,11 @@
 		intel_update_watermarks(&crtc->base);
 
 	if (atomic->update_fbc)
-		intel_fbc_update(dev_priv);
+		intel_fbc_update(crtc);
 
 	if (atomic->post_enable_primary)
 		intel_post_enable_primary(&crtc->base);
 
-	drm_for_each_plane_mask(plane, dev, atomic->update_sprite_watermarks)
-		intel_update_sprite_watermarks(plane, &crtc->base,
-					       0, 0, 0, false, false);
-
 	memset(atomic, 0, sizeof(*atomic));
 }
 
@@ -4818,23 +4820,9 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
-	struct drm_plane *p;
-
-	/* Track fb's for any planes being disabled */
-	drm_for_each_plane_mask(p, dev, atomic->disabled_planes) {
-		struct intel_plane *plane = to_intel_plane(p);
-
-		mutex_lock(&dev->struct_mutex);
-		i915_gem_track_fb(intel_fb_obj(plane->base.fb), NULL,
-				  plane->frontbuffer_bit);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	if (atomic->wait_for_flips)
-		intel_crtc_wait_for_pending_flips(&crtc->base);
 
 	if (atomic->disable_fbc)
-		intel_fbc_disable_crtc(crtc);
+		intel_fbc_deactivate(crtc);
 
 	if (crtc->atomic.disable_ips)
 		hsw_disable_ips(crtc);
@@ -4880,6 +4868,9 @@
 		return;
 
 	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
+
+	if (intel_crtc->config->has_pch_encoder)
 		intel_prepare_shared_dpll(intel_crtc);
 
 	if (intel_crtc->config->has_dp_encoder)
@@ -4897,7 +4888,6 @@
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
-	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
@@ -4935,6 +4925,13 @@
 
 	if (HAS_PCH_CPT(dev))
 		cpt_verify_modeset(dev, intel_crtc->pipe);
+
+	/* Must wait for vblank to avoid spurious PCH FIFO underruns */
+	if (intel_crtc->config->has_pch_encoder)
+		intel_wait_for_vblank(dev, pipe);
+	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
+
+	intel_fbc_enable(intel_crtc);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -4952,11 +4949,14 @@
 	int pipe = intel_crtc->pipe, hsw_workaround_pipe;
 	struct intel_crtc_state *pipe_config =
 		to_intel_crtc_state(crtc->state);
-	bool is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
 
 	if (WARN_ON(intel_crtc->active))
 		return;
 
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      false);
+
 	if (intel_crtc_to_shared_dpll(intel_crtc))
 		intel_enable_shared_dpll(intel_crtc);
 
@@ -4981,21 +4981,20 @@
 
 	intel_crtc->active = true;
 
-	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+	else
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
 		if (encoder->pre_enable)
 			encoder->pre_enable(encoder);
 	}
 
-	if (intel_crtc->config->has_pch_encoder) {
-		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
-						      true);
+	if (intel_crtc->config->has_pch_encoder)
 		dev_priv->display.fdi_link_train(crtc);
-	}
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_enable_pipe_clock(intel_crtc);
 
 	if (INTEL_INFO(dev)->gen >= 9)
@@ -5010,7 +5009,7 @@
 	intel_crtc_load_lut(crtc);
 
 	intel_ddi_set_pipe_settings(crtc);
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_enable_transcoder_func(crtc);
 
 	intel_update_watermarks(crtc);
@@ -5019,7 +5018,7 @@
 	if (intel_crtc->config->has_pch_encoder)
 		lpt_pch_enable(crtc);
 
-	if (intel_crtc->config->dp_encoder_is_mst && !is_dsi)
+	if (intel_crtc->config->dp_encoder_is_mst)
 		intel_ddi_set_vc_payload_alloc(crtc, true);
 
 	assert_vblank_disabled(crtc);
@@ -5030,6 +5029,14 @@
 		intel_opregion_notify_encoder(encoder, true);
 	}
 
+	if (intel_crtc->config->has_pch_encoder) {
+		intel_wait_for_vblank(dev, pipe);
+		intel_wait_for_vblank(dev, pipe);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      true);
+	}
+
 	/* If we change the relative order between pipe/planes enabling, we need
 	 * to change the workaround. */
 	hsw_workaround_pipe = pipe_config->hsw_workaround_pipe;
@@ -5037,6 +5044,8 @@
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 	}
+
+	intel_fbc_enable(intel_crtc);
 }
 
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force)
@@ -5061,7 +5070,9 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->disable(encoder);
@@ -5069,15 +5080,22 @@
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
+	/*
+	 * Sometimes spurious CPU pipe underruns happen when the
+	 * pipe is already disabled, but FDI RX/TX is still enabled.
+	 * Happens at least with VGA+HDMI cloning. Suppress them.
+	 */
 	if (intel_crtc->config->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, false);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 
 	intel_disable_pipe(intel_crtc);
 
 	ironlake_pfit_disable(intel_crtc, false);
 
-	if (intel_crtc->config->has_pch_encoder)
+	if (intel_crtc->config->has_pch_encoder) {
 		ironlake_fdi_disable(crtc);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+	}
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->post_disable)
@@ -5087,6 +5105,9 @@
 		ironlake_disable_pch_transcoder(dev_priv, pipe);
 
 		if (HAS_PCH_CPT(dev)) {
+			i915_reg_t reg;
+			u32 temp;
+
 			/* disable TRANS_DP_CTL */
 			reg = TRANS_DP_CTL(pipe);
 			temp = I915_READ(reg);
@@ -5103,6 +5124,10 @@
 
 		ironlake_fdi_pll_disable(intel_crtc);
 	}
+
+	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void haswell_crtc_disable(struct drm_crtc *crtc)
@@ -5112,7 +5137,10 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
-	bool is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
+
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      false);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
 		intel_opregion_notify_encoder(encoder, false);
@@ -5122,15 +5150,12 @@
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
-	if (intel_crtc->config->has_pch_encoder)
-		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
-						      false);
 	intel_disable_pipe(intel_crtc);
 
 	if (intel_crtc->config->dp_encoder_is_mst)
 		intel_ddi_set_vc_payload_alloc(crtc, false);
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
 	if (INTEL_INFO(dev)->gen >= 9)
@@ -5138,7 +5163,7 @@
 	else
 		ironlake_pfit_disable(intel_crtc, false);
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_disable_pipe_clock(intel_crtc);
 
 	if (intel_crtc->config->has_pch_encoder) {
@@ -5149,6 +5174,12 @@
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
+
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
+						      true);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -5179,24 +5210,40 @@
 {
 	switch (port) {
 	case PORT_A:
-		return POWER_DOMAIN_PORT_DDI_A_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_A_LANES;
 	case PORT_B:
-		return POWER_DOMAIN_PORT_DDI_B_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_B_LANES;
 	case PORT_C:
-		return POWER_DOMAIN_PORT_DDI_C_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_C_LANES;
 	case PORT_D:
-		return POWER_DOMAIN_PORT_DDI_D_4_LANES;
+		return POWER_DOMAIN_PORT_DDI_D_LANES;
 	case PORT_E:
-		return POWER_DOMAIN_PORT_DDI_E_2_LANES;
+		return POWER_DOMAIN_PORT_DDI_E_LANES;
 	default:
-		WARN_ON_ONCE(1);
+		MISSING_CASE(port);
 		return POWER_DOMAIN_PORT_OTHER;
 	}
 }
 
-#define for_each_power_domain(domain, mask)				\
-	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
-		if ((1 << (domain)) & (mask))
+static enum intel_display_power_domain port_to_aux_power_domain(enum port port)
+{
+	switch (port) {
+	case PORT_A:
+		return POWER_DOMAIN_AUX_A;
+	case PORT_B:
+		return POWER_DOMAIN_AUX_B;
+	case PORT_C:
+		return POWER_DOMAIN_AUX_C;
+	case PORT_D:
+		return POWER_DOMAIN_AUX_D;
+	case PORT_E:
+		/* FIXME: Check VBT for actual wiring of PORT E */
+		return POWER_DOMAIN_AUX_D;
+	default:
+		MISSING_CASE(port);
+		return POWER_DOMAIN_AUX_A;
+	}
+}
 
 enum intel_display_power_domain
 intel_display_port_power_domain(struct intel_encoder *intel_encoder)
@@ -5225,6 +5272,36 @@
 	}
 }
 
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder)
+{
+	struct drm_device *dev = intel_encoder->base.dev;
+	struct intel_digital_port *intel_dig_port;
+
+	switch (intel_encoder->type) {
+	case INTEL_OUTPUT_UNKNOWN:
+	case INTEL_OUTPUT_HDMI:
+		/*
+		 * Only DDI platforms should ever use these output types.
+		 * We can get here after the HDMI detect code has already set
+		 * the type of the shared encoder. Since we can't be sure
+		 * what's the status of the given connectors, play safe and
+		 * run the DP detection too.
+		 */
+		WARN_ON_ONCE(!HAS_DDI(dev));
+	case INTEL_OUTPUT_DISPLAYPORT:
+	case INTEL_OUTPUT_EDP:
+		intel_dig_port = enc_to_dig_port(&intel_encoder->base);
+		return port_to_aux_power_domain(intel_dig_port->port);
+	case INTEL_OUTPUT_DP_MST:
+		intel_dig_port = enc_to_mst(&intel_encoder->base)->primary;
+		return port_to_aux_power_domain(intel_dig_port->port);
+	default:
+		MISSING_CASE(intel_encoder->type);
+		return POWER_DOMAIN_AUX_A;
+	}
+}
+
 static unsigned long get_crtc_power_domains(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -5232,13 +5309,11 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	unsigned long mask;
-	enum transcoder transcoder;
+	enum transcoder transcoder = intel_crtc->config->cpu_transcoder;
 
 	if (!crtc->state->active)
 		return 0;
 
-	transcoder = intel_pipe_to_cpu_transcoder(dev->dev_private, pipe);
-
 	mask = BIT(POWER_DOMAIN_PIPE(pipe));
 	mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder));
 	if (intel_crtc->config->pch_pfit.enabled ||
@@ -5325,7 +5400,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK;
 
 		if (limit == SKL_DFSM_CDCLK_LIMIT_675)
@@ -5742,32 +5817,16 @@
 	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
 		DRM_ERROR("DBuf power disable timeout\n");
 
-	/*
-	 * DMC assumes ownership of LCPLL and will get confused if we touch it.
-	 */
-	if (dev_priv->csr.dmc_payload) {
-		/* disable DPLL0 */
-		I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) &
-					~LCPLL_PLL_ENABLE);
-		if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
-			DRM_ERROR("Couldn't disable DPLL0\n");
-	}
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	/* disable DPLL0 */
+	I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
+	if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
+		DRM_ERROR("Couldn't disable DPLL0\n");
 }
 
 void skl_init_cdclk(struct drm_i915_private *dev_priv)
 {
-	u32 val;
 	unsigned int required_vco;
 
-	/* enable PCH reset handshake */
-	val = I915_READ(HSW_NDE_RSTWRN_OPT);
-	I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
-
-	/* enable PG1 and Misc I/O */
-	intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
-
 	/* DPLL0 not enabled (happens on early BIOS versions) */
 	if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE)) {
 		/* enable DPLL0 */
@@ -5788,6 +5847,45 @@
 		DRM_ERROR("DBuf power enable timeout\n");
 }
 
+int skl_sanitize_cdclk(struct drm_i915_private *dev_priv)
+{
+	uint32_t lcpll1 = I915_READ(LCPLL1_CTL);
+	uint32_t cdctl = I915_READ(CDCLK_CTL);
+	int freq = dev_priv->skl_boot_cdclk;
+
+	/*
+	 * check if the pre-os intialized the display
+	 * There is SWF18 scratchpad register defined which is set by the
+	 * pre-os which can be used by the OS drivers to check the status
+	 */
+	if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0)
+		goto sanitize;
+
+	/* Is PLL enabled and locked ? */
+	if (!((lcpll1 & LCPLL_PLL_ENABLE) && (lcpll1 & LCPLL_PLL_LOCK)))
+		goto sanitize;
+
+	/* DPLL okay; verify the cdclock
+	 *
+	 * Noticed in some instances that the freq selection is correct but
+	 * decimal part is programmed wrong from BIOS where pre-os does not
+	 * enable display. Verify the same as well.
+	 */
+	if (cdctl == ((cdctl & CDCLK_FREQ_SEL_MASK) | skl_cdclk_decimal(freq)))
+		/* All well; nothing to sanitize */
+		return false;
+sanitize:
+	/*
+	 * As of now initialize with max cdclk till
+	 * we get dynamic cdclk support
+	 * */
+	dev_priv->skl_boot_cdclk = dev_priv->max_cdclk_freq;
+	skl_init_cdclk(dev_priv);
+
+	/* we did have to sanitize */
+	return true;
+}
+
 /* Adjust CDclk dividers to allow high res or save power if possible */
 static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
 {
@@ -6069,13 +6167,10 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	bool is_dsi;
 
 	if (WARN_ON(intel_crtc->active))
 		return;
 
-	is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
-
 	if (intel_crtc->config->has_dp_encoder)
 		intel_dp_set_m_n(intel_crtc, M1_N1);
 
@@ -6098,7 +6193,7 @@
 		if (encoder->pre_pll_enable)
 			encoder->pre_pll_enable(encoder);
 
-	if (!is_dsi) {
+	if (!intel_crtc->config->has_dsi_encoder) {
 		if (IS_CHERRYVIEW(dev)) {
 			chv_prepare_pll(intel_crtc, intel_crtc->config);
 			chv_enable_pll(intel_crtc, intel_crtc->config);
@@ -6177,6 +6272,8 @@
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	intel_fbc_enable(intel_crtc);
 }
 
 static void i9xx_pfit_disable(struct intel_crtc *crtc)
@@ -6224,7 +6321,7 @@
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
 
-	if (!intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI)) {
+	if (!intel_crtc->config->has_dsi_encoder) {
 		if (IS_CHERRYVIEW(dev))
 			chv_disable_pll(dev_priv, pipe);
 		else if (IS_VALLEYVIEW(dev))
@@ -6239,6 +6336,8 @@
 
 	if (!IS_GEN2(dev))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
@@ -6252,7 +6351,8 @@
 		return;
 
 	if (to_intel_plane_state(crtc->primary->state)->visible) {
-		intel_crtc_wait_for_pending_flips(crtc);
+		WARN_ON(intel_crtc->unpin_work);
+
 		intel_pre_disable_primary(crtc);
 	}
 
@@ -6570,6 +6670,15 @@
 		pipe_config_supports_ips(dev_priv, pipe_config);
 }
 
+static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
+{
+	const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	/* GDG double wide on either pipe, otherwise pipe A only */
+	return INTEL_INFO(dev_priv)->gen < 4 &&
+		(crtc->pipe == PIPE_A || IS_I915G(dev_priv));
+}
+
 static int intel_crtc_compute_config(struct intel_crtc *crtc,
 				     struct intel_crtc_state *pipe_config)
 {
@@ -6579,23 +6688,24 @@
 
 	/* FIXME should check pixel clock limits on all platforms */
 	if (INTEL_INFO(dev)->gen < 4) {
-		int clock_limit = dev_priv->max_cdclk_freq;
+		int clock_limit = dev_priv->max_cdclk_freq * 9 / 10;
 
 		/*
-		 * Enable pixel doubling when the dot clock
+		 * Enable double wide mode when the dot clock
 		 * is > 90% of the (display) core speed.
-		 *
-		 * GDG double wide on either pipe,
-		 * otherwise pipe A only.
 		 */
-		if ((crtc->pipe == PIPE_A || IS_I915G(dev)) &&
-		    adjusted_mode->crtc_clock > clock_limit * 9 / 10) {
+		if (intel_crtc_supports_double_wide(crtc) &&
+		    adjusted_mode->crtc_clock > clock_limit) {
 			clock_limit *= 2;
 			pipe_config->double_wide = true;
 		}
 
-		if (adjusted_mode->crtc_clock > clock_limit * 9 / 10)
+		if (adjusted_mode->crtc_clock > clock_limit) {
+			DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n",
+				      adjusted_mode->crtc_clock, clock_limit,
+				      yesno(pipe_config->double_wide));
 			return -EINVAL;
+		}
 	}
 
 	/*
@@ -7360,7 +7470,7 @@
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
-	int dpll_reg = DPLL(crtc->pipe);
+	i915_reg_t dpll_reg = DPLL(crtc->pipe);
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	u32 loopfilter, tribuf_calcntr;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac;
@@ -7826,8 +7936,6 @@
 	int refclk, num_connectors = 0;
 	intel_clock_t clock;
 	bool ok;
-	bool is_dsi = false;
-	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
 	struct drm_atomic_state *state = crtc_state->base.state;
 	struct drm_connector *connector;
@@ -7837,26 +7945,14 @@
 	memset(&crtc_state->dpll_hw_state, 0,
 	       sizeof(crtc_state->dpll_hw_state));
 
-	for_each_connector_in_state(state, connector, connector_state, i) {
-		if (connector_state->crtc != &crtc->base)
-			continue;
-
-		encoder = to_intel_encoder(connector_state->best_encoder);
-
-		switch (encoder->type) {
-		case INTEL_OUTPUT_DSI:
-			is_dsi = true;
-			break;
-		default:
-			break;
-		}
-
-		num_connectors++;
-	}
-
-	if (is_dsi)
+	if (crtc_state->has_dsi_encoder)
 		return 0;
 
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc == &crtc->base)
+			num_connectors++;
+	}
+
 	if (!crtc_state->clock_set) {
 		refclk = i9xx_get_refclk(crtc_state, num_connectors);
 
@@ -8849,7 +8945,7 @@
 	memset(&crtc_state->dpll_hw_state, 0,
 	       sizeof(crtc_state->dpll_hw_state));
 
-	is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS);
+	is_lvds = intel_pipe_will_have_type(crtc_state, INTEL_OUTPUT_LVDS);
 
 	WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
 	     "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev));
@@ -9278,8 +9374,8 @@
 
 	I915_STATE_WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
 	I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL1) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL2) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
 	I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
 	I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
 	     "CPU PWM1 enabled\n");
@@ -9623,14 +9719,10 @@
 	else
 		cdclk = 337500;
 
-	/*
-	 * FIXME move the cdclk caclulation to
-	 * compute_config() so we can fail gracegully.
-	 */
 	if (cdclk > dev_priv->max_cdclk_freq) {
-		DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n",
-			  cdclk, dev_priv->max_cdclk_freq);
-		cdclk = dev_priv->max_cdclk_freq;
+		DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n",
+			      cdclk, dev_priv->max_cdclk_freq);
+		return -EINVAL;
 	}
 
 	to_intel_atomic_state(state)->cdclk = cdclk;
@@ -9723,6 +9815,9 @@
 	case PORT_CLK_SEL_WRPLL2:
 		pipe_config->shared_dpll = DPLL_ID_WRPLL2;
 		break;
+	case PORT_CLK_SEL_SPLL:
+		pipe_config->shared_dpll = DPLL_ID_SPLL;
+		break;
 	}
 }
 
@@ -9739,7 +9834,7 @@
 
 	port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT;
 
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		skylake_get_ddi_pll(dev_priv, port, pipe_config);
 	else if (IS_BROXTON(dev))
 		bxt_get_ddi_pll(dev_priv, port, pipe_config);
@@ -10085,20 +10180,17 @@
 	int ret;
 
 	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
-	if (!intel_fb) {
-		drm_gem_object_unreference(&obj->base);
+	if (!intel_fb)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
 	if (ret)
 		goto err;
 
 	return &intel_fb->base;
-err:
-	drm_gem_object_unreference(&obj->base);
-	kfree(intel_fb);
 
+err:
+	kfree(intel_fb);
 	return ERR_PTR(ret);
 }
 
@@ -10138,6 +10230,7 @@
 				  struct drm_display_mode *mode,
 				  int depth, int bpp)
 {
+	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 
@@ -10152,7 +10245,11 @@
 								bpp);
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
 
-	return intel_framebuffer_create(dev, &mode_cmd, obj);
+	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+	if (IS_ERR(fb))
+		drm_gem_object_unreference_unlocked(&obj->base);
+
+	return fb;
 }
 
 static struct drm_framebuffer *
@@ -11055,7 +11152,7 @@
 	 */
 	if (ring->id == RCS) {
 		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit_reg(ring, DERRMR);
 		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 					DERRMR_PIPEB_PRI_FLIP_DONE |
 					DERRMR_PIPEC_PRI_FLIP_DONE));
@@ -11065,7 +11162,7 @@
 		else
 			intel_ring_emit(ring, MI_STORE_REGISTER_MEM |
 					      MI_SRM_LRM_GLOBAL_GTT);
-		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit_reg(ring, DERRMR);
 		intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
 		if (IS_GEN8(dev)) {
 			intel_ring_emit(ring, 0);
@@ -11105,18 +11202,23 @@
 		return true;
 	else if (i915.enable_execlists)
 		return true;
+	else if (obj->base.dma_buf &&
+		 !reservation_object_test_signaled_rcu(obj->base.dma_buf->resv,
+						       false))
+		return true;
 	else
 		return ring != i915_gem_request_get_ring(obj->last_write_req);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
+			     unsigned int rotation,
 			     struct intel_unpin_work *work)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_framebuffer *fb = intel_crtc->base.primary->fb;
 	const enum pipe pipe = intel_crtc->pipe;
-	u32 ctl, stride;
+	u32 ctl, stride, tile_height;
 
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
@@ -11140,9 +11242,16 @@
 	 * The stride is either expressed as a multiple of 64 bytes chunks for
 	 * linear buffers or in number of tiles for tiled buffers.
 	 */
-	stride = fb->pitches[0] /
-		 intel_fb_stride_alignment(dev, fb->modifier[0],
-					   fb->pixel_format);
+	if (intel_rotation_90_or_270(rotation)) {
+		/* stride = Surface height in tiles */
+		tile_height = intel_tile_height(dev, fb->pixel_format,
+						fb->modifier[0], 0);
+		stride = DIV_ROUND_UP(fb->height, tile_height);
+	} else {
+		stride = fb->pitches[0] /
+				intel_fb_stride_alignment(dev, fb->modifier[0],
+							  fb->pixel_format);
+	}
 
 	/*
 	 * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on
@@ -11163,10 +11272,9 @@
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(intel_crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
+	i915_reg_t reg = DSPCNTR(intel_crtc->plane);
 	u32 dspcntr;
-	u32 reg;
 
-	reg = DSPCNTR(intel_crtc->plane);
 	dspcntr = I915_READ(reg);
 
 	if (obj->tiling_mode != I915_TILING_NONE)
@@ -11200,7 +11308,7 @@
 	intel_pipe_update_start(crtc);
 
 	if (INTEL_INFO(mmio_flip->i915)->gen >= 9)
-		skl_do_mmio_flip(crtc, work);
+		skl_do_mmio_flip(crtc, mmio_flip->rotation, work);
 	else
 		/* use_mmio_flip() retricts MMIO flips to ilk+ */
 		ilk_do_mmio_flip(crtc, work);
@@ -11212,6 +11320,9 @@
 {
 	struct intel_mmio_flip *mmio_flip =
 		container_of(work, struct intel_mmio_flip, work);
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(mmio_flip->crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
 
 	if (mmio_flip->req) {
 		WARN_ON(__i915_wait_request(mmio_flip->req,
@@ -11221,16 +11332,19 @@
 		i915_gem_request_unreference__unlocked(mmio_flip->req);
 	}
 
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj->base.dma_buf)
+		WARN_ON(reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							    false, false,
+							    MAX_SCHEDULE_TIMEOUT) < 0);
+
 	intel_do_mmio_flip(mmio_flip);
 	kfree(mmio_flip);
 }
 
 static int intel_queue_mmio_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
-				 uint32_t flags)
+				 struct drm_i915_gem_object *obj)
 {
 	struct intel_mmio_flip *mmio_flip;
 
@@ -11241,6 +11355,7 @@
 	mmio_flip->i915 = to_i915(dev);
 	mmio_flip->req = i915_gem_request_reference(obj->last_write_req);
 	mmio_flip->crtc = to_intel_crtc(crtc);
+	mmio_flip->rotation = crtc->primary->state->rotation;
 
 	INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func);
 	schedule_work(&mmio_flip->work);
@@ -11446,9 +11561,14 @@
 	 * synchronisation, so all we want here is to pin the framebuffer
 	 * into the display plane and skip any waits.
 	 */
+	if (!mmio_flip) {
+		ret = i915_gem_object_sync(obj, ring, &request);
+		if (ret)
+			goto cleanup_pending;
+	}
+
 	ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
-					 crtc->primary->state,
-					 mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request);
+					 crtc->primary->state);
 	if (ret)
 		goto cleanup_pending;
 
@@ -11457,8 +11577,7 @@
 	work->gtt_offset += intel_crtc->dspaddr_offset;
 
 	if (mmio_flip) {
-		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
-					    page_flip_flags);
+		ret = intel_queue_mmio_flip(dev, crtc, obj);
 		if (ret)
 			goto cleanup_unpin;
 
@@ -11489,7 +11608,7 @@
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_fbc_disable_crtc(intel_crtc);
+	intel_fbc_deactivate(intel_crtc);
 	intel_frontbuffer_flip_prepare(dev,
 				       to_intel_plane(primary)->frontbuffer_bit);
 
@@ -11572,18 +11691,32 @@
 static bool intel_wm_need_update(struct drm_plane *plane,
 				 struct drm_plane_state *state)
 {
-	/* Update watermarks on tiling changes. */
+	struct intel_plane_state *new = to_intel_plane_state(state);
+	struct intel_plane_state *cur = to_intel_plane_state(plane->state);
+
+	/* Update watermarks on tiling or size changes. */
 	if (!plane->state->fb || !state->fb ||
 	    plane->state->fb->modifier[0] != state->fb->modifier[0] ||
-	    plane->state->rotation != state->rotation)
-		return true;
-
-	if (plane->state->crtc_w != state->crtc_w)
+	    plane->state->rotation != state->rotation ||
+	    drm_rect_width(&new->src) != drm_rect_width(&cur->src) ||
+	    drm_rect_height(&new->src) != drm_rect_height(&cur->src) ||
+	    drm_rect_width(&new->dst) != drm_rect_width(&cur->dst) ||
+	    drm_rect_height(&new->dst) != drm_rect_height(&cur->dst))
 		return true;
 
 	return false;
 }
 
+static bool needs_scaling(struct intel_plane_state *state)
+{
+	int src_w = drm_rect_width(&state->src) >> 16;
+	int src_h = drm_rect_height(&state->src) >> 16;
+	int dst_w = drm_rect_width(&state->dst);
+	int dst_h = drm_rect_height(&state->dst);
+
+	return (src_w != dst_w || src_h != dst_h);
+}
+
 int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 				    struct drm_plane_state *plane_state)
 {
@@ -11599,7 +11732,6 @@
 	bool mode_changed = needs_modeset(crtc_state);
 	bool was_crtc_enabled = crtc->state->active;
 	bool is_crtc_enabled = crtc_state->active;
-
 	bool turn_off, turn_on, visible, was_visible;
 	struct drm_framebuffer *fb = plane_state->fb;
 
@@ -11612,14 +11744,6 @@
 			return ret;
 	}
 
-	/*
-	 * Disabling a plane is always okay; we just need to update
-	 * fb tracking in a special way since cleanup_fb() won't
-	 * get called by the plane helpers.
-	 */
-	if (old_plane_state->base.fb && !fb)
-		intel_crtc->atomic.disabled_planes |= 1 << i;
-
 	was_visible = old_plane_state->visible;
 	visible = to_intel_plane_state(plane_state)->visible;
 
@@ -11669,7 +11793,6 @@
 
 	switch (plane->type) {
 	case DRM_PLANE_TYPE_PRIMARY:
-		intel_crtc->atomic.wait_for_flips = true;
 		intel_crtc->atomic.pre_disable_primary = turn_off;
 		intel_crtc->atomic.post_enable_primary = turn_on;
 
@@ -11717,11 +11840,23 @@
 	case DRM_PLANE_TYPE_CURSOR:
 		break;
 	case DRM_PLANE_TYPE_OVERLAY:
-		if (turn_off && !mode_changed) {
+		/*
+		 * WaCxSRDisabledForSpriteScaling:ivb
+		 *
+		 * cstate->update_wm was already set above, so this flag will
+		 * take effect when we commit and program watermarks.
+		 */
+		if (IS_IVYBRIDGE(dev) &&
+		    needs_scaling(to_intel_plane_state(plane_state)) &&
+		    !needs_scaling(old_plane_state)) {
+			to_intel_crtc_state(crtc_state)->disable_lp_wm = true;
+		} else if (turn_off && !mode_changed) {
 			intel_crtc->atomic.wait_vblank = true;
 			intel_crtc->atomic.update_sprite_watermarks |=
 				1 << i;
 		}
+
+		break;
 	}
 	return 0;
 }
@@ -11806,6 +11941,12 @@
 	}
 
 	ret = 0;
+	if (dev_priv->display.compute_pipe_wm) {
+		ret = dev_priv->display.compute_pipe_wm(intel_crtc, state);
+		if (ret)
+			return ret;
+	}
+
 	if (INTEL_INFO(dev)->gen >= 9) {
 		if (mode_changed)
 			ret = skl_update_scaler_crtc(pipe_config);
@@ -11995,7 +12136,7 @@
 			      pipe_config->dpll_hw_state.pll9,
 			      pipe_config->dpll_hw_state.pll10,
 			      pipe_config->dpll_hw_state.pcsdw12);
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
 			      "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
 			      pipe_config->ddi_pll_sel,
@@ -12003,9 +12144,10 @@
 			      pipe_config->dpll_hw_state.cfgcr1,
 			      pipe_config->dpll_hw_state.cfgcr2);
 	} else if (HAS_DDI(dev)) {
-		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n",
+		DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
 			      pipe_config->ddi_pll_sel,
-			      pipe_config->dpll_hw_state.wrpll);
+			      pipe_config->dpll_hw_state.wrpll,
+			      pipe_config->dpll_hw_state.spll);
 	} else {
 		DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
 			      "fp0: 0x%x, fp1: 0x%x\n",
@@ -12248,6 +12390,18 @@
 			crtc->hwmode = crtc->state->adjusted_mode;
 		else
 			crtc->hwmode.crtc_clock = 0;
+
+		/*
+		 * Update legacy state to satisfy fbc code. This can
+		 * be removed when fbc uses the atomic state.
+		 */
+		if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
+			struct drm_plane_state *plane_state = crtc->primary->state;
+
+			crtc->primary->fb = plane_state->fb;
+			crtc->x = plane_state->src_x >> 16;
+			crtc->y = plane_state->src_y >> 16;
+		}
 	}
 }
 
@@ -12273,7 +12427,7 @@
 	list_for_each_entry((intel_crtc), \
 			    &(dev)->mode_config.crtc_list, \
 			    base.head) \
-		if (mask & (1 <<(intel_crtc)->pipe))
+		for_each_if (mask & (1 <<(intel_crtc)->pipe))
 
 static bool
 intel_compare_m_n(unsigned int m, unsigned int n,
@@ -12452,12 +12606,13 @@
 	if (INTEL_INFO(dev)->gen < 8) {
 		PIPE_CONF_CHECK_M_N(dp_m_n);
 
-		PIPE_CONF_CHECK_I(has_drrs);
 		if (current_config->has_drrs)
 			PIPE_CONF_CHECK_M_N(dp_m2_n2);
 	} else
 		PIPE_CONF_CHECK_M_N_ALT(dp_m_n, dp_m2_n2);
 
+	PIPE_CONF_CHECK_I(has_dsi_encoder);
+
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hdisplay);
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_htotal);
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_start);
@@ -12528,6 +12683,7 @@
 	PIPE_CONF_CHECK_X(dpll_hw_state.fp0);
 	PIPE_CONF_CHECK_X(dpll_hw_state.fp1);
 	PIPE_CONF_CHECK_X(dpll_hw_state.wrpll);
+	PIPE_CONF_CHECK_X(dpll_hw_state.spll);
 	PIPE_CONF_CHECK_X(dpll_hw_state.ctrl1);
 	PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1);
 	PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2);
@@ -13011,6 +13167,45 @@
 	return 0;
 }
 
+/*
+ * Handle calculation of various watermark data at the end of the atomic check
+ * phase.  The code here should be run after the per-crtc and per-plane 'check'
+ * handlers to ensure that all derived state has been updated.
+ */
+static void calc_watermark_data(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
+	struct drm_plane *plane;
+	struct drm_plane_state *pstate;
+
+	/*
+	 * Calculate watermark configuration details now that derived
+	 * plane/crtc state is all properly updated.
+	 */
+	drm_for_each_crtc(crtc, dev) {
+		cstate = drm_atomic_get_existing_crtc_state(state, crtc) ?:
+			crtc->state;
+
+		if (cstate->active)
+			intel_state->wm_config.num_pipes_active++;
+	}
+	drm_for_each_legacy_plane(plane, dev) {
+		pstate = drm_atomic_get_existing_plane_state(state, plane) ?:
+			plane->state;
+
+		if (!to_intel_plane_state(pstate)->visible)
+			continue;
+
+		intel_state->wm_config.sprites_enabled = true;
+		if (pstate->crtc_w != pstate->src_w >> 16 ||
+		    pstate->crtc_h != pstate->src_h >> 16)
+			intel_state->wm_config.sprites_scaled = true;
+	}
+}
+
 /**
  * intel_atomic_check - validate state object
  * @dev: drm device
@@ -13019,6 +13214,7 @@
 static int intel_atomic_check(struct drm_device *dev,
 			      struct drm_atomic_state *state)
 {
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
 	int ret, i;
@@ -13032,6 +13228,9 @@
 		struct intel_crtc_state *pipe_config =
 			to_intel_crtc_state(crtc_state);
 
+		memset(&to_intel_crtc(crtc)->atomic, 0,
+		       sizeof(struct intel_crtc_atomic_commit));
+
 		/* Catch I915_MODE_FLAG_INHERITED */
 		if (crtc_state->mode.private_flags != crtc->state->mode.private_flags)
 			crtc_state->mode_changed = true;
@@ -13056,7 +13255,8 @@
 		if (ret)
 			return ret;
 
-		if (intel_pipe_config_compare(state->dev,
+		if (i915.fastboot &&
+		    intel_pipe_config_compare(state->dev,
 					to_intel_crtc_state(crtc->state),
 					pipe_config, true)) {
 			crtc_state->mode_changed = false;
@@ -13082,10 +13282,81 @@
 		if (ret)
 			return ret;
 	} else
-		to_intel_atomic_state(state)->cdclk =
-			to_i915(state->dev)->cdclk_freq;
+		intel_state->cdclk = to_i915(state->dev)->cdclk_freq;
 
-	return drm_atomic_helper_check_planes(state->dev, state);
+	ret = drm_atomic_helper_check_planes(state->dev, state);
+	if (ret)
+		return ret;
+
+	calc_watermark_data(state);
+
+	return 0;
+}
+
+static int intel_atomic_prepare_commit(struct drm_device *dev,
+				       struct drm_atomic_state *state,
+				       bool async)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_plane_state *plane_state;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	int i, ret;
+
+	if (async) {
+		DRM_DEBUG_KMS("i915 does not yet support async commit\n");
+		return -EINVAL;
+	}
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		ret = intel_crtc_wait_for_pending_flips(crtc);
+		if (ret)
+			return ret;
+
+		if (atomic_read(&to_intel_crtc(crtc)->unpin_work_count) >= 2)
+			flush_workqueue(dev_priv->wq);
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (!ret && !async && !i915_reset_in_progress(&dev_priv->gpu_error)) {
+		u32 reset_counter;
+
+		reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+		mutex_unlock(&dev->struct_mutex);
+
+		for_each_plane_in_state(state, plane, plane_state, i) {
+			struct intel_plane_state *intel_plane_state =
+				to_intel_plane_state(plane_state);
+
+			if (!intel_plane_state->wait_req)
+				continue;
+
+			ret = __i915_wait_request(intel_plane_state->wait_req,
+						  reset_counter, true,
+						  NULL, NULL);
+
+			/* Swallow -EIO errors to allow updates during hw lockup. */
+			if (ret == -EIO)
+				ret = 0;
+
+			if (ret)
+				break;
+		}
+
+		if (!ret)
+			return 0;
+
+		mutex_lock(&dev->struct_mutex);
+		drm_atomic_helper_cleanup_planes(dev, state);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
 }
 
 /**
@@ -13109,22 +13380,20 @@
 			       bool async)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
 	int ret = 0;
 	int i;
 	bool any_ms = false;
 
-	if (async) {
-		DRM_DEBUG_KMS("i915 does not yet support async commit\n");
-		return -EINVAL;
+	ret = intel_atomic_prepare_commit(dev, state, async);
+	if (ret) {
+		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+		return ret;
 	}
 
-	ret = drm_atomic_helper_prepare_planes(dev, state);
-	if (ret)
-		return ret;
-
 	drm_atomic_helper_swap_state(dev, state);
+	dev_priv->wm.config = to_intel_atomic_state(state)->wm_config;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -13140,6 +13409,13 @@
 			dev_priv->display.crtc_disable(crtc);
 			intel_crtc->active = false;
 			intel_disable_shared_dpll(intel_crtc);
+
+			/*
+			 * Underruns don't always raise
+			 * interrupts, so check manually.
+			 */
+			intel_check_cpu_fifo_underruns(dev_priv);
+			intel_check_pch_fifo_underruns(dev_priv);
 		}
 	}
 
@@ -13162,6 +13438,9 @@
 			to_intel_crtc_state(crtc->state)->update_pipe;
 		unsigned long put_domains = 0;
 
+		if (modeset)
+			intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+
 		if (modeset && crtc->state->active) {
 			update_scanline_offset(to_intel_crtc(crtc));
 			dev_priv->display.crtc_enable(crtc);
@@ -13177,18 +13456,26 @@
 		if (!modeset)
 			intel_pre_plane_update(intel_crtc);
 
-		drm_atomic_helper_commit_planes_on_crtc(crtc_state);
+		if (crtc->state->active &&
+		    (crtc->state->planes_changed || update_pipe))
+			drm_atomic_helper_commit_planes_on_crtc(crtc_state);
 
 		if (put_domains)
 			modeset_put_power_domains(dev_priv, put_domains);
 
 		intel_post_plane_update(intel_crtc);
+
+		if (modeset)
+			intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
 	}
 
 	/* FIXME: add subpixel order */
 
 	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	mutex_lock(&dev->struct_mutex);
 	drm_atomic_helper_cleanup_planes(dev, state);
+	mutex_unlock(&dev->struct_mutex);
 
 	if (any_ms)
 		intel_modeset_check_state(dev, state);
@@ -13357,6 +13644,8 @@
  * bits.  Some older platforms need special physical address handling for
  * cursor planes.
  *
+ * Must be called with struct_mutex held.
+ *
  * Returns 0 on success, negative error code on failure.
  */
 int
@@ -13367,28 +13656,69 @@
 	struct drm_framebuffer *fb = new_state->fb;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb);
+	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret = 0;
 
-	if (!obj)
+	if (!obj && !old_obj)
 		return 0;
 
-	mutex_lock(&dev->struct_mutex);
+	if (old_obj) {
+		struct drm_crtc_state *crtc_state =
+			drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc);
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		/* Big Hammer, we also need to ensure that any pending
+		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
+		 * current scanout is retired before unpinning the old
+		 * framebuffer. Note that we rely on userspace rendering
+		 * into the buffer attached to the pipe they are waiting
+		 * on. If not, userspace generates a GPU hang with IPEHR
+		 * point to the MI_WAIT_FOR_EVENT.
+		 *
+		 * This should only fail upon a hung GPU, in which case we
+		 * can safely continue.
+		 */
+		if (needs_modeset(crtc_state))
+			ret = i915_gem_object_wait_rendering(old_obj, true);
+
+		/* Swallow -EIO errors to allow updates during hw lockup. */
+		if (ret && ret != -EIO)
+			return ret;
+	}
+
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj && obj->base.dma_buf) {
+		ret = reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							  false, true,
+							  MAX_SCHEDULE_TIMEOUT);
+		if (ret == -ERESTARTSYS)
+			return ret;
+
+		WARN_ON(ret < 0);
+	}
+
+	if (!obj) {
+		ret = 0;
+	} else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
 		if (ret)
 			DRM_DEBUG_KMS("failed to attach phys object\n");
 	} else {
-		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL);
+		ret = intel_pin_and_fence_fb_obj(plane, fb, new_state);
 	}
 
-	if (ret == 0)
-		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+	if (ret == 0) {
+		if (obj) {
+			struct intel_plane_state *plane_state =
+				to_intel_plane_state(new_state);
 
-	mutex_unlock(&dev->struct_mutex);
+			i915_gem_request_assign(&plane_state->wait_req,
+						obj->last_write_req);
+		}
+
+		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+	}
 
 	return ret;
 }
@@ -13399,23 +13729,35 @@
  * @fb: old framebuffer that was on plane
  *
  * Cleans up a framebuffer that has just been removed from a plane.
+ *
+ * Must be called with struct_mutex held.
  */
 void
 intel_cleanup_plane_fb(struct drm_plane *plane,
 		       const struct drm_plane_state *old_state)
 {
 	struct drm_device *dev = plane->dev;
-	struct drm_i915_gem_object *obj = intel_fb_obj(old_state->fb);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct intel_plane_state *old_intel_state;
+	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
+	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
 
-	if (!obj)
+	old_intel_state = to_intel_plane_state(old_state);
+
+	if (!obj && !old_obj)
 		return;
 
-	if (plane->type != DRM_PLANE_TYPE_CURSOR ||
-	    !INTEL_INFO(dev)->cursor_needs_physical) {
-		mutex_lock(&dev->struct_mutex);
+	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
+	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state);
-		mutex_unlock(&dev->struct_mutex);
-	}
+
+	/* prepare_fb aborted? */
+	if ((old_obj && (old_obj->frontbuffer_bits & intel_plane->frontbuffer_bit)) ||
+	    (obj && !(obj->frontbuffer_bits & intel_plane->frontbuffer_bit)))
+		i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
+
+	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
+
 }
 
 int
@@ -13434,7 +13776,7 @@
 	crtc_clock = crtc_state->base.adjusted_mode.crtc_clock;
 	cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk;
 
-	if (!crtc_clock || !cdclk)
+	if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock))
 		return DRM_PLANE_HELPER_NO_SCALING;
 
 	/*
@@ -13482,18 +13824,8 @@
 	struct drm_framebuffer *fb = state->base.fb;
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc;
-	struct drm_rect *src = &state->src;
 
 	crtc = crtc ? crtc : plane->crtc;
-	intel_crtc = to_intel_crtc(crtc);
-
-	plane->fb = fb;
-	crtc->x = src->x1 >> 16;
-	crtc->y = src->y1 >> 16;
-
-	if (!crtc->state->active)
-		return;
 
 	dev_priv->display.update_primary_plane(crtc, fb,
 					       state->src.x1 >> 16,
@@ -13523,8 +13855,7 @@
 		intel_update_watermarks(crtc);
 
 	/* Perform vblank evasion around commit operation */
-	if (crtc->state->active)
-		intel_pipe_update_start(intel_crtc);
+	intel_pipe_update_start(intel_crtc);
 
 	if (modeset)
 		return;
@@ -13540,8 +13871,7 @@
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	if (crtc->state->active)
-		intel_pipe_update_end(intel_crtc);
+	intel_pipe_update_end(intel_crtc);
 }
 
 /**
@@ -13618,7 +13948,7 @@
 	drm_universal_plane_init(dev, &primary->base, 0,
 				 &intel_plane_funcs,
 				 intel_primary_formats, num_formats,
-				 DRM_PLANE_TYPE_PRIMARY);
+				 DRM_PLANE_TYPE_PRIMARY, NULL);
 
 	if (INTEL_INFO(dev)->gen >= 4)
 		intel_create_rotation_property(dev, primary);
@@ -13724,8 +14054,7 @@
 	intel_crtc->cursor_bo = obj;
 
 update:
-	if (crtc->state->active)
-		intel_crtc_update_cursor(crtc, state->visible);
+	intel_crtc_update_cursor(crtc, state->visible);
 }
 
 static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
@@ -13758,7 +14087,7 @@
 				 &intel_plane_funcs,
 				 intel_cursor_formats,
 				 ARRAY_SIZE(intel_cursor_formats),
-				 DRM_PLANE_TYPE_CURSOR);
+				 DRM_PLANE_TYPE_CURSOR, NULL);
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		if (!dev->mode_config.rotation_property)
@@ -13835,7 +14164,7 @@
 		goto fail;
 
 	ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, primary,
-					cursor, &intel_crtc_funcs);
+					cursor, &intel_crtc_funcs, NULL);
 	if (ret)
 		goto fail;
 
@@ -13961,7 +14290,14 @@
 	if (IS_CHERRYVIEW(dev))
 		return false;
 
-	if (IS_VALLEYVIEW(dev) && !dev_priv->vbt.int_crt_support)
+	if (HAS_PCH_LPT_H(dev) && I915_READ(SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED)
+		return false;
+
+	/* DDI E can't be used if DDI A requires 4 lanes */
+	if (HAS_DDI(dev) && I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)
+		return false;
+
+	if (!dev_priv->vbt.int_crt_support)
 		return false;
 
 	return true;
@@ -13997,7 +14333,7 @@
 		 */
 		found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED;
 		/* WaIgnoreDDIAStrap: skl */
-		if (found || IS_SKYLAKE(dev))
+		if (found || IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			intel_ddi_init(dev, PORT_A);
 
 		/* DDI B, C and D detection is indicated by the SFUSE_STRAP
@@ -14013,7 +14349,7 @@
 		/*
 		 * On SKL we don't have a way to detect DDI-E so we rely on VBT.
 		 */
-		if (IS_SKYLAKE(dev) &&
+		if ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
 		    (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp ||
 		     dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi ||
 		     dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi))
@@ -14028,7 +14364,7 @@
 
 		if (I915_READ(PCH_HDMIB) & SDVO_DETECTED) {
 			/* PCH SDVOB multiplex with HDMIB */
-			found = intel_sdvo_init(dev, PCH_SDVOB, true);
+			found = intel_sdvo_init(dev, PCH_SDVOB, PORT_B);
 			if (!found)
 				intel_hdmi_init(dev, PCH_HDMIB, PORT_B);
 			if (!found && (I915_READ(PCH_DP_B) & DP_DETECTED))
@@ -14084,7 +14420,7 @@
 
 		if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) {
 			DRM_DEBUG_KMS("probing SDVOB\n");
-			found = intel_sdvo_init(dev, GEN3_SDVOB, true);
+			found = intel_sdvo_init(dev, GEN3_SDVOB, PORT_B);
 			if (!found && IS_G4X(dev)) {
 				DRM_DEBUG_KMS("probing HDMI on SDVOB\n");
 				intel_hdmi_init(dev, GEN4_HDMIB, PORT_B);
@@ -14098,7 +14434,7 @@
 
 		if (I915_READ(GEN3_SDVOB) & SDVO_DETECTED) {
 			DRM_DEBUG_KMS("probing SDVOC\n");
-			found = intel_sdvo_init(dev, GEN3_SDVOC, false);
+			found = intel_sdvo_init(dev, GEN3_SDVOC, PORT_C);
 		}
 
 		if (!found && (I915_READ(GEN3_SDVOC) & SDVO_DETECTED)) {
@@ -14364,16 +14700,22 @@
 static struct drm_framebuffer *
 intel_user_framebuffer_create(struct drm_device *dev,
 			      struct drm_file *filp,
-			      struct drm_mode_fb_cmd2 *mode_cmd)
+			      const struct drm_mode_fb_cmd2 *user_mode_cmd)
 {
+	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
+	struct drm_mode_fb_cmd2 mode_cmd = *user_mode_cmd;
 
 	obj = to_intel_bo(drm_gem_object_lookup(dev, filp,
-						mode_cmd->handles[0]));
+						mode_cmd.handles[0]));
 	if (&obj->base == NULL)
 		return ERR_PTR(-ENOENT);
 
-	return intel_framebuffer_create(dev, mode_cmd, obj);
+	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
+	if (IS_ERR(fb))
+		drm_gem_object_unreference_unlocked(&obj->base);
+
+	return fb;
 }
 
 #ifndef CONFIG_DRM_FBDEV_EMULATION
@@ -14458,7 +14800,7 @@
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_SKYLAKE(dev))
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 		dev_priv->display.get_display_clock_speed =
 			skylake_get_display_clock_speed;
 	else if (IS_BROXTON(dev))
@@ -14498,9 +14840,6 @@
 	else if (IS_I945GM(dev) || IS_845G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i9xx_misc_get_display_clock_speed;
-	else if (IS_PINEVIEW(dev))
-		dev_priv->display.get_display_clock_speed =
-			pnv_get_display_clock_speed;
 	else if (IS_I915GM(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915gm_get_display_clock_speed;
@@ -14705,6 +15044,9 @@
 	/* Apple Macbook 2,1 (Core 2 T7400) */
 	{ 0x27a2, 0x8086, 0x7270, quirk_backlight_present },
 
+	/* Apple Macbook 4,1 */
+	{ 0x2a02, 0x106b, 0x00a1, quirk_backlight_present },
+
 	/* Toshiba CB35 Chromebook (Celeron 2955U) */
 	{ 0x0a06, 0x1179, 0x0a88, quirk_backlight_present },
 
@@ -14744,7 +15086,7 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u8 sr1;
-	u32 vga_reg = i915_vgacntrl_reg(dev);
+	i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
 
 	/* WaEnableVGAAccessThroughIOPort:ctg,elk,ilk,snb,ivb,vlv,hsw */
 	vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
@@ -14860,9 +15202,6 @@
 	i915_disable_vga(dev);
 	intel_setup_outputs(dev);
 
-	/* Just in case the BIOS is doing something questionable. */
-	intel_fbc_disable(dev_priv);
-
 	drm_modeset_lock_all(dev);
 	intel_modeset_setup_hw_state(dev);
 	drm_modeset_unlock_all(dev);
@@ -14949,10 +15288,9 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg;
+	i915_reg_t reg = PIPECONF(crtc->config->cpu_transcoder);
 
 	/* Clear any frame start delays used for debugging left by the BIOS */
-	reg = PIPECONF(crtc->config->cpu_transcoder);
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
 	/* restore vblank interrupts to correct state */
@@ -15106,7 +15444,7 @@
 void i915_redisable_vga_power_on(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 vga_reg = i915_vgacntrl_reg(dev);
+	i915_reg_t vga_reg = i915_vgacntrl_reg(dev);
 
 	if (!(I915_READ(vga_reg) & VGA_DISP_DISABLE)) {
 		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
@@ -15145,7 +15483,7 @@
 	struct intel_plane_state *plane_state =
 		to_intel_plane_state(primary->state);
 
-	plane_state->visible =
+	plane_state->visible = crtc->active &&
 		primary_get_hw_state(to_intel_plane(primary));
 
 	if (plane_state->visible)
@@ -15402,8 +15740,7 @@
 		mutex_lock(&dev->struct_mutex);
 		ret = intel_pin_and_fence_fb_obj(c->primary,
 						 c->primary->fb,
-						 c->primary->state,
-						 NULL, NULL);
+						 c->primary->state);
 		mutex_unlock(&dev->struct_mutex);
 		if (ret) {
 			DRM_ERROR("failed to pin boot fb on pipe %d\n",
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 09bdd94..0f0573a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -277,7 +277,7 @@
 	 * See vlv_power_sequencer_reset() why we need
 	 * a power domain reference here.
 	 */
-	power_domain = intel_display_port_power_domain(encoder);
+	power_domain = intel_display_port_aux_power_domain(encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	mutex_lock(&dev_priv->pps_mutex);
@@ -293,7 +293,7 @@
 
 	mutex_unlock(&dev_priv->pps_mutex);
 
-	power_domain = intel_display_port_power_domain(encoder);
+	power_domain = intel_display_port_aux_power_domain(encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -541,7 +541,8 @@
 	}
 }
 
-static u32 _pp_ctrl_reg(struct intel_dp *intel_dp)
+static i915_reg_t
+_pp_ctrl_reg(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
@@ -553,7 +554,8 @@
 		return VLV_PIPE_PP_CONTROL(vlv_power_sequencer_pipe(intel_dp));
 }
 
-static u32 _pp_stat_reg(struct intel_dp *intel_dp)
+static i915_reg_t
+_pp_stat_reg(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 
@@ -582,7 +584,7 @@
 
 	if (IS_VALLEYVIEW(dev)) {
 		enum pipe pipe = vlv_power_sequencer_pipe(intel_dp);
-		u32 pp_ctrl_reg, pp_div_reg;
+		i915_reg_t pp_ctrl_reg, pp_div_reg;
 		u32 pp_div;
 
 		pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe);
@@ -652,7 +654,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t ch_ctl = intel_dp->aux_ch_ctl_reg;
+	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg;
 	uint32_t status;
 	bool done;
 
@@ -679,7 +681,7 @@
 	 * The clock divider is based off the hrawclk, and would like to run at
 	 * 2MHz.  So, take the hrawclk value and divide by 2 and use that
 	 */
-	return index ? 0 : intel_hrawclk(dev) / 2;
+	return index ? 0 : DIV_ROUND_CLOSEST(intel_hrawclk(dev), 2);
 }
 
 static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
@@ -692,10 +694,10 @@
 		return 0;
 
 	if (intel_dig_port->port == PORT_A) {
-		return DIV_ROUND_UP(dev_priv->cdclk_freq, 2000);
+		return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000);
 
 	} else {
-		return DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
+		return DIV_ROUND_CLOSEST(intel_pch_rawclk(dev), 2);
 	}
 }
 
@@ -709,7 +711,7 @@
 		if (index)
 			return 0;
 		return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000);
-	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
+	} else if (HAS_PCH_LPT_H(dev_priv)) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
 		case 0: return 63;
@@ -717,7 +719,7 @@
 		default: return 0;
 		}
 	} else  {
-		return index ? 0 : DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
+		return index ? 0 : DIV_ROUND_CLOSEST(intel_pch_rawclk(dev), 2);
 	}
 }
 
@@ -750,7 +752,7 @@
 	else
 		precharge = 5;
 
-	if (IS_BROADWELL(dev) && intel_dp->aux_ch_ctl_reg == DPA_AUX_CH_CTL)
+	if (IS_BROADWELL(dev) && intel_dig_port->port == PORT_A)
 		timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
 	else
 		timeout = DP_AUX_CH_CTL_TIME_OUT_400us;
@@ -789,8 +791,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t ch_ctl = intel_dp->aux_ch_ctl_reg;
-	uint32_t ch_data = ch_ctl + 4;
+	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg;
 	uint32_t aux_clock_divider;
 	int i, ret, recv_bytes;
 	uint32_t status;
@@ -816,8 +817,6 @@
 
 	intel_dp_check_edp(intel_dp);
 
-	intel_aux_display_runtime_get(dev_priv);
-
 	/* Try to wait for any previous AUX channel activity */
 	for (try = 0; try < 3; try++) {
 		status = I915_READ_NOTRACE(ch_ctl);
@@ -856,7 +855,7 @@
 		for (try = 0; try < 5; try++) {
 			/* Load the send data into the aux channel data registers */
 			for (i = 0; i < send_bytes; i += 4)
-				I915_WRITE(ch_data + i,
+				I915_WRITE(intel_dp->aux_ch_data_reg[i >> 2],
 					   intel_dp_pack_aux(send + i,
 							     send_bytes - i));
 
@@ -920,13 +919,12 @@
 		recv_bytes = recv_size;
 
 	for (i = 0; i < recv_bytes; i += 4)
-		intel_dp_unpack_aux(I915_READ(ch_data + i),
+		intel_dp_unpack_aux(I915_READ(intel_dp->aux_ch_data_reg[i >> 2]),
 				    recv + i, recv_bytes - i);
 
 	ret = recv_bytes;
 out:
 	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
-	intel_aux_display_runtime_put(dev_priv);
 
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
@@ -1008,96 +1006,206 @@
 	return ret;
 }
 
-static void
-intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
+static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	enum port port = intel_dig_port->port;
-	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
-	const char *name = NULL;
-	uint32_t porte_aux_ctl_reg = DPA_AUX_CH_CTL;
-	int ret;
-
-	/* On SKL we don't have Aux for port E so we rely on VBT to set
-	 * a proper alternate aux channel.
-	 */
-	if (IS_SKYLAKE(dev) && port == PORT_E) {
-		switch (info->alternate_aux_channel) {
-		case DP_AUX_B:
-			porte_aux_ctl_reg = DPB_AUX_CH_CTL;
-			break;
-		case DP_AUX_C:
-			porte_aux_ctl_reg = DPC_AUX_CH_CTL;
-			break;
-		case DP_AUX_D:
-			porte_aux_ctl_reg = DPD_AUX_CH_CTL;
-			break;
-		case DP_AUX_A:
-		default:
-			porte_aux_ctl_reg = DPA_AUX_CH_CTL;
-		}
+	switch (port) {
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_CTL(port);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_B);
 	}
+}
+
+static i915_reg_t g4x_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	switch (port) {
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_B, index);
+	}
+}
+
+static i915_reg_t ilk_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	switch (port) {
+	case PORT_A:
+		return DP_AUX_CH_CTL(port);
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return PCH_DP_AUX_CH_CTL(port);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_A);
+	}
+}
+
+static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	switch (port) {
+	case PORT_A:
+		return DP_AUX_CH_DATA(port, index);
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return PCH_DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_A, index);
+	}
+}
+
+/*
+ * On SKL we don't have Aux for port E so we rely
+ * on VBT to set a proper alternate aux channel.
+ */
+static enum port skl_porte_aux_port(struct drm_i915_private *dev_priv)
+{
+	const struct ddi_vbt_port_info *info =
+		&dev_priv->vbt.ddi_port_info[PORT_E];
+
+	switch (info->alternate_aux_channel) {
+	case DP_AUX_A:
+		return PORT_A;
+	case DP_AUX_B:
+		return PORT_B;
+	case DP_AUX_C:
+		return PORT_C;
+	case DP_AUX_D:
+		return PORT_D;
+	default:
+		MISSING_CASE(info->alternate_aux_channel);
+		return PORT_A;
+	}
+}
+
+static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	if (port == PORT_E)
+		port = skl_porte_aux_port(dev_priv);
 
 	switch (port) {
 	case PORT_A:
-		intel_dp->aux_ch_ctl_reg = DPA_AUX_CH_CTL;
-		name = "DPDDC-A";
-		break;
 	case PORT_B:
-		intel_dp->aux_ch_ctl_reg = PCH_DPB_AUX_CH_CTL;
-		name = "DPDDC-B";
-		break;
 	case PORT_C:
-		intel_dp->aux_ch_ctl_reg = PCH_DPC_AUX_CH_CTL;
-		name = "DPDDC-C";
-		break;
 	case PORT_D:
-		intel_dp->aux_ch_ctl_reg = PCH_DPD_AUX_CH_CTL;
-		name = "DPDDC-D";
-		break;
-	case PORT_E:
-		intel_dp->aux_ch_ctl_reg = porte_aux_ctl_reg;
-		name = "DPDDC-E";
-		break;
+		return DP_AUX_CH_CTL(port);
 	default:
-		BUG();
+		MISSING_CASE(port);
+		return DP_AUX_CH_CTL(PORT_A);
 	}
+}
 
-	/*
-	 * The AUX_CTL register is usually DP_CTL + 0x10.
-	 *
-	 * On Haswell and Broadwell though:
-	 *   - Both port A DDI_BUF_CTL and DDI_AUX_CTL are on the CPU
-	 *   - Port B/C/D AUX channels are on the PCH, DDI_BUF_CTL on the CPU
-	 *
-	 * Skylake moves AUX_CTL back next to DDI_BUF_CTL, on the CPU.
-	 */
-	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev) && port != PORT_E)
-		intel_dp->aux_ch_ctl_reg = intel_dp->output_reg + 0x10;
+static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	if (port == PORT_E)
+		port = skl_porte_aux_port(dev_priv);
 
-	intel_dp->aux.name = name;
+	switch (port) {
+	case PORT_A:
+	case PORT_B:
+	case PORT_C:
+	case PORT_D:
+		return DP_AUX_CH_DATA(port, index);
+	default:
+		MISSING_CASE(port);
+		return DP_AUX_CH_DATA(PORT_A, index);
+	}
+}
+
+static i915_reg_t intel_aux_ctl_reg(struct drm_i915_private *dev_priv,
+					 enum port port)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return skl_aux_ctl_reg(dev_priv, port);
+	else if (HAS_PCH_SPLIT(dev_priv))
+		return ilk_aux_ctl_reg(dev_priv, port);
+	else
+		return g4x_aux_ctl_reg(dev_priv, port);
+}
+
+static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv,
+					  enum port port, int index)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return skl_aux_data_reg(dev_priv, port, index);
+	else if (HAS_PCH_SPLIT(dev_priv))
+		return ilk_aux_data_reg(dev_priv, port, index);
+	else
+		return g4x_aux_data_reg(dev_priv, port, index);
+}
+
+static void intel_aux_reg_init(struct intel_dp *intel_dp)
+{
+	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	int i;
+
+	intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port);
+	for (i = 0; i < ARRAY_SIZE(intel_dp->aux_ch_data_reg); i++)
+		intel_dp->aux_ch_data_reg[i] = intel_aux_data_reg(dev_priv, port, i);
+}
+
+static void
+intel_dp_aux_fini(struct intel_dp *intel_dp)
+{
+	drm_dp_aux_unregister(&intel_dp->aux);
+	kfree(intel_dp->aux.name);
+}
+
+static int
+intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	enum port port = intel_dig_port->port;
+	int ret;
+
+	intel_aux_reg_init(intel_dp);
+
+	intel_dp->aux.name = kasprintf(GFP_KERNEL, "DPDDC-%c", port_name(port));
+	if (!intel_dp->aux.name)
+		return -ENOMEM;
+
 	intel_dp->aux.dev = dev->dev;
 	intel_dp->aux.transfer = intel_dp_aux_transfer;
 
-	DRM_DEBUG_KMS("registering %s bus for %s\n", name,
+	DRM_DEBUG_KMS("registering %s bus for %s\n",
+		      intel_dp->aux.name,
 		      connector->base.kdev->kobj.name);
 
 	ret = drm_dp_aux_register(&intel_dp->aux);
 	if (ret < 0) {
 		DRM_ERROR("drm_dp_aux_register() for %s failed (%d)\n",
-			  name, ret);
-		return;
+			  intel_dp->aux.name, ret);
+		kfree(intel_dp->aux.name);
+		return ret;
 	}
 
 	ret = sysfs_create_link(&connector->base.kdev->kobj,
 				&intel_dp->aux.ddc.dev.kobj,
 				intel_dp->aux.ddc.dev.kobj.name);
 	if (ret < 0) {
-		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n", name, ret);
-		drm_dp_aux_unregister(&intel_dp->aux);
+		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n",
+			  intel_dp->aux.name, ret);
+		intel_dp_aux_fini(intel_dp);
+		return ret;
 	}
+
+	return 0;
 }
 
 static void
@@ -1189,10 +1297,13 @@
 	return (intel_dp_max_link_bw(intel_dp) >> 3) + 1;
 }
 
-static bool intel_dp_source_supports_hbr2(struct drm_device *dev)
+bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp)
 {
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+
 	/* WaDisableHBR2:skl */
-	if (IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0)
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0))
 		return false;
 
 	if ((IS_HASWELL(dev) && !IS_HSW_ULX(dev)) || IS_BROADWELL(dev) ||
@@ -1203,14 +1314,16 @@
 }
 
 static int
-intel_dp_source_rates(struct drm_device *dev, const int **source_rates)
+intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates)
 {
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	int size;
 
 	if (IS_BROXTON(dev)) {
 		*source_rates = bxt_rates;
 		size = ARRAY_SIZE(bxt_rates);
-	} else if (IS_SKYLAKE(dev)) {
+	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		*source_rates = skl_rates;
 		size = ARRAY_SIZE(skl_rates);
 	} else {
@@ -1219,7 +1332,7 @@
 	}
 
 	/* This depends on the fact that 5.4 is last value in the array */
-	if (!intel_dp_source_supports_hbr2(dev))
+	if (!intel_dp_source_supports_hbr2(intel_dp))
 		size--;
 
 	return size;
@@ -1284,12 +1397,11 @@
 static int intel_dp_common_rates(struct intel_dp *intel_dp,
 				 int *common_rates)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	const int *source_rates, *sink_rates;
 	int source_len, sink_len;
 
 	sink_len = intel_dp_sink_rates(intel_dp, &sink_rates);
-	source_len = intel_dp_source_rates(dev, &source_rates);
+	source_len = intel_dp_source_rates(intel_dp, &source_rates);
 
 	return intersect_rates(source_rates, source_len,
 			       sink_rates, sink_len,
@@ -1314,7 +1426,6 @@
 
 static void intel_dp_print_rates(struct intel_dp *intel_dp)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	const int *source_rates, *sink_rates;
 	int source_len, sink_len, common_len;
 	int common_rates[DP_MAX_SUPPORTED_RATES];
@@ -1323,7 +1434,7 @@
 	if ((drm_debug & DRM_UT_KMS) == 0)
 		return;
 
-	source_len = intel_dp_source_rates(dev, &source_rates);
+	source_len = intel_dp_source_rates(intel_dp, &source_rates);
 	snprintf_int_array(str, sizeof(str), source_rates, source_len);
 	DRM_DEBUG_KMS("source rates: %s\n", str);
 
@@ -1365,8 +1476,8 @@
 	return rate_to_index(rate, intel_dp->sink_rates);
 }
 
-static void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
-				  uint8_t *link_bw, uint8_t *rate_select)
+void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
+			   uint8_t *link_bw, uint8_t *rate_select)
 {
 	if (intel_dp->num_sink_rates) {
 		*link_bw = 0;
@@ -1426,7 +1537,7 @@
 				return ret;
 		}
 
-		if (!HAS_PCH_SPLIT(dev))
+		if (HAS_GMCH_DISPLAY(dev))
 			intel_gmch_panel_fitting(intel_crtc, pipe_config,
 						 intel_connector->panel.fitting_mode);
 		else
@@ -1530,7 +1641,7 @@
 				&pipe_config->dp_m2_n2);
 	}
 
-	if (IS_SKYLAKE(dev) && is_edp(intel_dp))
+	if ((IS_SKYLAKE(dev)  || IS_KABYLAKE(dev)) && is_edp(intel_dp))
 		skl_edp_set_pll_config(pipe_config);
 	else if (IS_BROXTON(dev))
 		/* handled in ddi */;
@@ -1542,37 +1653,6 @@
 	return true;
 }
 
-static void ironlake_set_pll_cpu_edp(struct intel_dp *intel_dp)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
-
-	DRM_DEBUG_KMS("eDP PLL enable for clock %d\n",
-		      crtc->config->port_clock);
-	dpa_ctl = I915_READ(DP_A);
-	dpa_ctl &= ~DP_PLL_FREQ_MASK;
-
-	if (crtc->config->port_clock == 162000) {
-		/* For a long time we've carried around a ILK-DevA w/a for the
-		 * 160MHz clock. If we're really unlucky, it's still required.
-		 */
-		DRM_DEBUG_KMS("160MHz cpu eDP clock, might need ilk devA w/a\n");
-		dpa_ctl |= DP_PLL_FREQ_160MHZ;
-		intel_dp->DP |= DP_PLL_FREQ_160MHZ;
-	} else {
-		dpa_ctl |= DP_PLL_FREQ_270MHZ;
-		intel_dp->DP |= DP_PLL_FREQ_270MHZ;
-	}
-
-	I915_WRITE(DP_A, dpa_ctl);
-
-	POSTING_READ(DP_A);
-	udelay(500);
-}
-
 void intel_dp_set_link_params(struct intel_dp *intel_dp,
 			      const struct intel_crtc_state *pipe_config)
 {
@@ -1617,9 +1697,6 @@
 	intel_dp->DP |= DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0;
 	intel_dp->DP |= DP_PORT_WIDTH(crtc->config->lane_count);
 
-	if (crtc->config->has_audio)
-		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
-
 	/* Split out the IBX/CPU vs CPT settings */
 
 	if (IS_GEN7(dev) && port == PORT_A) {
@@ -1680,7 +1757,7 @@
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1770,7 +1847,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 	bool need_to_disable = !intel_dp->want_panel_vdd;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
@@ -1784,7 +1861,7 @@
 	if (edp_have_panel_vdd(intel_dp))
 		return need_to_disable;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	DRM_DEBUG_KMS("Turning eDP port %c VDD on\n",
@@ -1846,7 +1923,7 @@
 	struct intel_encoder *intel_encoder = &intel_dig_port->base;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_stat_reg, pp_ctrl_reg;
+	i915_reg_t pp_stat_reg, pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1874,7 +1951,7 @@
 	if ((pp & POWER_TARGET_ON) == 0)
 		intel_dp->last_power_cycle = jiffies;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -1933,7 +2010,7 @@
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -1995,7 +2072,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum intel_display_power_domain power_domain;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -2025,7 +2102,7 @@
 	wait_panel_off(intel_dp);
 
 	/* We got a reference when we enabled the VDD. */
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_put(dev_priv, power_domain);
 }
 
@@ -2046,7 +2123,7 @@
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	/*
 	 * If we enable the backlight right away following a panel power
@@ -2087,7 +2164,7 @@
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
-	u32 pp_ctrl_reg;
+	i915_reg_t pp_ctrl_reg;
 
 	if (!is_edp(intel_dp))
 		return;
@@ -2146,27 +2223,61 @@
 		_intel_edp_backlight_off(intel_dp);
 }
 
+static const char *state_string(bool enabled)
+{
+	return enabled ? "on" : "off";
+}
+
+static void assert_dp_port(struct intel_dp *intel_dp, bool state)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+	bool cur_state = I915_READ(intel_dp->output_reg) & DP_PORT_EN;
+
+	I915_STATE_WARN(cur_state != state,
+			"DP port %c state assertion failure (expected %s, current %s)\n",
+			port_name(dig_port->port),
+			state_string(state), state_string(cur_state));
+}
+#define assert_dp_port_disabled(d) assert_dp_port((d), false)
+
+static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state)
+{
+	bool cur_state = I915_READ(DP_A) & DP_PLL_ENABLE;
+
+	I915_STATE_WARN(cur_state != state,
+			"eDP PLL state assertion failure (expected %s, current %s)\n",
+			state_string(state), state_string(cur_state));
+}
+#define assert_edp_pll_enabled(d) assert_edp_pll((d), true)
+#define assert_edp_pll_disabled(d) assert_edp_pll((d), false)
+
 static void ironlake_edp_pll_on(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv,
-			     to_intel_crtc(crtc)->pipe);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_dp_port_disabled(intel_dp);
+	assert_edp_pll_disabled(dev_priv);
 
-	DRM_DEBUG_KMS("\n");
-	dpa_ctl = I915_READ(DP_A);
-	WARN(dpa_ctl & DP_PLL_ENABLE, "dp pll on, should be off\n");
-	WARN(dpa_ctl & DP_PORT_EN, "dp port still on, should be off\n");
+	DRM_DEBUG_KMS("enabling eDP PLL for clock %d\n",
+		      crtc->config->port_clock);
 
-	/* We don't adjust intel_dp->DP while tearing down the link, to
-	 * facilitate link retraining (e.g. after hotplug). Hence clear all
-	 * enable bits here to ensure that we don't enable too much. */
-	intel_dp->DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+	intel_dp->DP &= ~DP_PLL_FREQ_MASK;
+
+	if (crtc->config->port_clock == 162000)
+		intel_dp->DP |= DP_PLL_FREQ_162MHZ;
+	else
+		intel_dp->DP |= DP_PLL_FREQ_270MHZ;
+
+	I915_WRITE(DP_A, intel_dp->DP);
+	POSTING_READ(DP_A);
+	udelay(500);
+
 	intel_dp->DP |= DP_PLL_ENABLE;
+
 	I915_WRITE(DP_A, intel_dp->DP);
 	POSTING_READ(DP_A);
 	udelay(200);
@@ -2175,24 +2286,18 @@
 static void ironlake_edp_pll_off(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_crtc *crtc = intel_dig_port->base.base.crtc;
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 dpa_ctl;
+	struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-	assert_pipe_disabled(dev_priv,
-			     to_intel_crtc(crtc)->pipe);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+	assert_dp_port_disabled(intel_dp);
+	assert_edp_pll_enabled(dev_priv);
 
-	dpa_ctl = I915_READ(DP_A);
-	WARN((dpa_ctl & DP_PLL_ENABLE) == 0,
-	     "dp pll off, should be on\n");
-	WARN(dpa_ctl & DP_PORT_EN, "dp port still on, should be off\n");
+	DRM_DEBUG_KMS("disabling eDP PLL\n");
 
-	/* We can't rely on the value tracked for the DP register in
-	 * intel_dp->DP because link_down must not change that (otherwise link
-	 * re-training will fail. */
-	dpa_ctl &= ~DP_PLL_ENABLE;
-	I915_WRITE(DP_A, dpa_ctl);
+	intel_dp->DP &= ~DP_PLL_ENABLE;
+
+	I915_WRITE(DP_A, intel_dp->DP);
 	POSTING_READ(DP_A);
 	udelay(200);
 }
@@ -2261,7 +2366,7 @@
 		}
 
 		DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
-			      intel_dp->output_reg);
+			      i915_mmio_reg_offset(intel_dp->output_reg));
 	} else if (IS_CHERRYVIEW(dev)) {
 		*pipe = DP_PORT_TO_PIPE_CHV(tmp);
 	} else {
@@ -2324,7 +2429,7 @@
 	intel_dp_get_m_n(crtc, pipe_config);
 
 	if (port == PORT_A) {
-		if ((I915_READ(DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_160MHZ)
+		if ((I915_READ(DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_162MHZ)
 			pipe_config->port_clock = 162000;
 		else
 			pipe_config->port_clock = 270000;
@@ -2389,6 +2494,8 @@
 	enum port port = dp_to_dig_port(intel_dp)->port;
 
 	intel_dp_link_down(intel_dp);
+
+	/* Only ilk+ has port A */
 	if (port == PORT_A)
 		ironlake_edp_pll_off(intel_dp);
 }
@@ -2548,6 +2655,8 @@
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc =
+		to_intel_crtc(dp_to_dig_port(intel_dp)->base.base.crtc);
 
 	/* enable with pattern 1 (as per spec) */
 	_intel_dp_set_link_train(intel_dp, &intel_dp->DP,
@@ -2563,6 +2672,8 @@
 	 * fail when the power sequencer is freshly used for this port.
 	 */
 	intel_dp->DP |= DP_PORT_EN;
+	if (crtc->config->has_audio)
+		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
 
 	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
 	POSTING_READ(intel_dp->output_reg);
@@ -2575,6 +2686,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	uint32_t dp_reg = I915_READ(intel_dp->output_reg);
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	enum pipe pipe = crtc->pipe;
 
 	if (WARN_ON(dp_reg & DP_PORT_EN))
 		return;
@@ -2584,12 +2697,35 @@
 	if (IS_VALLEYVIEW(dev))
 		vlv_init_panel_power_sequencer(intel_dp);
 
+	/*
+	 * We get an occasional spurious underrun between the port
+	 * enable and vdd enable, when enabling port A eDP.
+	 *
+	 * FIXME: Not sure if this applies to (PCH) port D eDP as well
+	 */
+	if (port == PORT_A)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
 	intel_dp_enable_port(intel_dp);
 
+	if (port == PORT_A && IS_GEN5(dev_priv)) {
+		/*
+		 * Underrun reporting for the other pipe was disabled in
+		 * g4x_pre_enable_dp(). The eDP PLL and port have now been
+		 * enabled, so it's now safe to re-enable underrun reporting.
+		 */
+		intel_wait_for_vblank_if_active(dev_priv->dev, !pipe);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, !pipe, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, !pipe, true);
+	}
+
 	edp_panel_vdd_on(intel_dp);
 	edp_panel_on(intel_dp);
 	edp_panel_vdd_off(intel_dp, true);
 
+	if (port == PORT_A)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	pps_unlock(intel_dp);
 
 	if (IS_VALLEYVIEW(dev)) {
@@ -2608,7 +2744,7 @@
 
 	if (crtc->config->has_audio) {
 		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
-				 pipe_name(crtc->pipe));
+				 pipe_name(pipe));
 		intel_audio_codec_enable(encoder);
 	}
 }
@@ -2631,16 +2767,29 @@
 
 static void g4x_pre_enable_dp(struct intel_encoder *encoder)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
-	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+	enum port port = dp_to_dig_port(intel_dp)->port;
+	enum pipe pipe = to_intel_crtc(encoder->base.crtc)->pipe;
 
 	intel_dp_prepare(encoder);
 
-	/* Only ilk+ has port A */
-	if (dport->port == PORT_A) {
-		ironlake_set_pll_cpu_edp(intel_dp);
-		ironlake_edp_pll_on(intel_dp);
+	if (port == PORT_A && IS_GEN5(dev_priv)) {
+		/*
+		 * We get FIFO underruns on the other pipe when
+		 * enabling the CPU eDP PLL, and when enabling CPU
+		 * eDP port. We could potentially avoid the PLL
+		 * underrun with a vblank wait just prior to enabling
+		 * the PLL, but that doesn't appear to help the port
+		 * enable case. Just sweep it all under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, !pipe, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, !pipe, false);
 	}
+
+	/* Only ilk+ has port A */
+	if (port == PORT_A)
+		ironlake_edp_pll_on(intel_dp);
 }
 
 static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
@@ -2648,7 +2797,7 @@
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = intel_dig_port->base.base.dev->dev_private;
 	enum pipe pipe = intel_dp->pps_pipe;
-	int pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
+	i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe);
 
 	edp_panel_vdd_off_sync(intel_dp);
 
@@ -3046,7 +3195,7 @@
  * Fetch AUX CH registers 0x202 - 0x207 which contain
  * link status information
  */
-static bool
+bool
 intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE])
 {
 	return intel_dp_dpcd_read_wake(&intel_dp->aux,
@@ -3056,7 +3205,7 @@
 }
 
 /* These are source-specific values. */
-static uint8_t
+uint8_t
 intel_dp_voltage_max(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
@@ -3079,7 +3228,7 @@
 		return DP_TRAIN_VOLTAGE_SWING_LEVEL_2;
 }
 
-static uint8_t
+uint8_t
 intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
@@ -3421,38 +3570,6 @@
 	return 0;
 }
 
-static void
-intel_get_adjust_train(struct intel_dp *intel_dp,
-		       const uint8_t link_status[DP_LINK_STATUS_SIZE])
-{
-	uint8_t v = 0;
-	uint8_t p = 0;
-	int lane;
-	uint8_t voltage_max;
-	uint8_t preemph_max;
-
-	for (lane = 0; lane < intel_dp->lane_count; lane++) {
-		uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
-		uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
-
-		if (this_v > v)
-			v = this_v;
-		if (this_p > p)
-			p = this_p;
-	}
-
-	voltage_max = intel_dp_voltage_max(intel_dp);
-	if (v >= voltage_max)
-		v = voltage_max | DP_TRAIN_MAX_SWING_REACHED;
-
-	preemph_max = intel_dp_pre_emphasis_max(intel_dp, v);
-	if (p >= preemph_max)
-		p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
-
-	for (lane = 0; lane < 4; lane++)
-		intel_dp->train_set[lane] = v | p;
-}
-
 static uint32_t
 gen4_signal_levels(uint8_t train_set)
 {
@@ -3550,13 +3667,13 @@
 	}
 }
 
-/* Properly updates "DP" with the correct signal levels. */
-static void
-intel_dp_set_signal_levels(struct intel_dp *intel_dp, uint32_t *DP)
+void
+intel_dp_set_signal_levels(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	enum port port = intel_dig_port->port;
 	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	uint32_t signal_levels, mask = 0;
 	uint8_t train_set = intel_dp->train_set[0];
 
@@ -3591,74 +3708,27 @@
 		(train_set & DP_TRAIN_PRE_EMPHASIS_MASK) >>
 			DP_TRAIN_PRE_EMPHASIS_SHIFT);
 
-	*DP = (*DP & ~mask) | signal_levels;
+	intel_dp->DP = (intel_dp->DP & ~mask) | signal_levels;
+
+	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
+	POSTING_READ(intel_dp->output_reg);
 }
 
-static bool
-intel_dp_set_link_train(struct intel_dp *intel_dp,
-			uint32_t *DP,
-			uint8_t dp_train_pat)
+void
+intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
+				       uint8_t dp_train_pat)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv =
 		to_i915(intel_dig_port->base.base.dev);
-	uint8_t buf[sizeof(intel_dp->train_set) + 1];
-	int ret, len;
 
-	_intel_dp_set_link_train(intel_dp, DP, dp_train_pat);
+	_intel_dp_set_link_train(intel_dp, &intel_dp->DP, dp_train_pat);
 
-	I915_WRITE(intel_dp->output_reg, *DP);
+	I915_WRITE(intel_dp->output_reg, intel_dp->DP);
 	POSTING_READ(intel_dp->output_reg);
-
-	buf[0] = dp_train_pat;
-	if ((dp_train_pat & DP_TRAINING_PATTERN_MASK) ==
-	    DP_TRAINING_PATTERN_DISABLE) {
-		/* don't write DP_TRAINING_LANEx_SET on disable */
-		len = 1;
-	} else {
-		/* DP_TRAINING_LANEx_SET follow DP_TRAINING_PATTERN_SET */
-		memcpy(buf + 1, intel_dp->train_set, intel_dp->lane_count);
-		len = intel_dp->lane_count + 1;
-	}
-
-	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
-				buf, len);
-
-	return ret == len;
 }
 
-static bool
-intel_dp_reset_link_train(struct intel_dp *intel_dp, uint32_t *DP,
-			uint8_t dp_train_pat)
-{
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
-	intel_dp_set_signal_levels(intel_dp, DP);
-	return intel_dp_set_link_train(intel_dp, DP, dp_train_pat);
-}
-
-static bool
-intel_dp_update_link_train(struct intel_dp *intel_dp, uint32_t *DP,
-			   const uint8_t link_status[DP_LINK_STATUS_SIZE])
-{
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv =
-		to_i915(intel_dig_port->base.base.dev);
-	int ret;
-
-	intel_get_adjust_train(intel_dp, link_status);
-	intel_dp_set_signal_levels(intel_dp, DP);
-
-	I915_WRITE(intel_dp->output_reg, *DP);
-	POSTING_READ(intel_dp->output_reg);
-
-	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
-				intel_dp->train_set, intel_dp->lane_count);
-
-	return ret == intel_dp->lane_count;
-}
-
-static void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
+void intel_dp_set_idle_link_train(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = intel_dig_port->base.base.dev;
@@ -3689,232 +3759,6 @@
 		DRM_ERROR("Timed out waiting for DP idle patterns\n");
 }
 
-/* Enable corresponding port and start training pattern 1 */
-static void
-intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
-{
-	struct drm_encoder *encoder = &dp_to_dig_port(intel_dp)->base.base;
-	struct drm_device *dev = encoder->dev;
-	int i;
-	uint8_t voltage;
-	int voltage_tries, loop_tries;
-	uint32_t DP = intel_dp->DP;
-	uint8_t link_config[2];
-	uint8_t link_bw, rate_select;
-
-	if (HAS_DDI(dev))
-		intel_ddi_prepare_link_retrain(encoder);
-
-	intel_dp_compute_rate(intel_dp, intel_dp->link_rate,
-			      &link_bw, &rate_select);
-
-	/* Write the link configuration data */
-	link_config[0] = link_bw;
-	link_config[1] = intel_dp->lane_count;
-	if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
-		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
-	if (intel_dp->num_sink_rates)
-		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
-				  &rate_select, 1);
-
-	link_config[0] = 0;
-	link_config[1] = DP_SET_ANSI_8B10B;
-	drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2);
-
-	DP |= DP_PORT_EN;
-
-	/* clock recovery */
-	if (!intel_dp_reset_link_train(intel_dp, &DP,
-				       DP_TRAINING_PATTERN_1 |
-				       DP_LINK_SCRAMBLING_DISABLE)) {
-		DRM_ERROR("failed to enable link training\n");
-		return;
-	}
-
-	voltage = 0xff;
-	voltage_tries = 0;
-	loop_tries = 0;
-	for (;;) {
-		uint8_t link_status[DP_LINK_STATUS_SIZE];
-
-		drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd);
-		if (!intel_dp_get_link_status(intel_dp, link_status)) {
-			DRM_ERROR("failed to get link status\n");
-			break;
-		}
-
-		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
-			DRM_DEBUG_KMS("clock recovery OK\n");
-			break;
-		}
-
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp, &DP,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
-		/* Check to see if we've tried the max voltage */
-		for (i = 0; i < intel_dp->lane_count; i++)
-			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
-				break;
-		if (i == intel_dp->lane_count) {
-			++loop_tries;
-			if (loop_tries == 5) {
-				DRM_ERROR("too many full retries, give up\n");
-				break;
-			}
-			intel_dp_reset_link_train(intel_dp, &DP,
-						  DP_TRAINING_PATTERN_1 |
-						  DP_LINK_SCRAMBLING_DISABLE);
-			voltage_tries = 0;
-			continue;
-		}
-
-		/* Check to see if we've tried the same voltage 5 times */
-		if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
-			++voltage_tries;
-			if (voltage_tries == 5) {
-				DRM_ERROR("too many voltage retries, give up\n");
-				break;
-			}
-		} else
-			voltage_tries = 0;
-		voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
-
-		/* Update training set as requested by target */
-		if (!intel_dp_update_link_train(intel_dp, &DP, link_status)) {
-			DRM_ERROR("failed to update link training\n");
-			break;
-		}
-	}
-
-	intel_dp->DP = DP;
-}
-
-static void
-intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dig_port->base.base.dev;
-	bool channel_eq = false;
-	int tries, cr_tries;
-	uint32_t DP = intel_dp->DP;
-	uint32_t training_pattern = DP_TRAINING_PATTERN_2;
-
-	/*
-	 * Training Pattern 3 for HBR2 or 1.2 devices that support it.
-	 *
-	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
-	 * also mandatory for downstream devices that support HBR2.
-	 *
-	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
-	 * supported but still not enabled.
-	 */
-	if (intel_dp_source_supports_hbr2(dev) &&
-	    drm_dp_tps3_supported(intel_dp->dpcd))
-		training_pattern = DP_TRAINING_PATTERN_3;
-	else if (intel_dp->link_rate == 540000)
-		DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
-
-	/* channel equalization */
-	if (!intel_dp_set_link_train(intel_dp, &DP,
-				     training_pattern |
-				     DP_LINK_SCRAMBLING_DISABLE)) {
-		DRM_ERROR("failed to start channel equalization\n");
-		return;
-	}
-
-	tries = 0;
-	cr_tries = 0;
-	channel_eq = false;
-	for (;;) {
-		uint8_t link_status[DP_LINK_STATUS_SIZE];
-
-		if (cr_tries > 5) {
-			DRM_ERROR("failed to train DP, aborting\n");
-			break;
-		}
-
-		drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
-		if (!intel_dp_get_link_status(intel_dp, link_status)) {
-			DRM_ERROR("failed to get link status\n");
-			break;
-		}
-
-		/* Make sure clock is still ok */
-		if (!drm_dp_clock_recovery_ok(link_status,
-					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
-			intel_dp_link_training_clock_recovery(intel_dp);
-			intel_dp_set_link_train(intel_dp, &DP,
-						training_pattern |
-						DP_LINK_SCRAMBLING_DISABLE);
-			cr_tries++;
-			continue;
-		}
-
-		if (drm_dp_channel_eq_ok(link_status,
-					 intel_dp->lane_count)) {
-			channel_eq = true;
-			break;
-		}
-
-		/* Try 5 times, then try clock recovery if that fails */
-		if (tries > 5) {
-			intel_dp->train_set_valid = false;
-			intel_dp_link_training_clock_recovery(intel_dp);
-			intel_dp_set_link_train(intel_dp, &DP,
-						training_pattern |
-						DP_LINK_SCRAMBLING_DISABLE);
-			tries = 0;
-			cr_tries++;
-			continue;
-		}
-
-		/* Update training set as requested by target */
-		if (!intel_dp_update_link_train(intel_dp, &DP, link_status)) {
-			DRM_ERROR("failed to update link training\n");
-			break;
-		}
-		++tries;
-	}
-
-	intel_dp_set_idle_link_train(intel_dp);
-
-	intel_dp->DP = DP;
-
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
-		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
-}
-
-void intel_dp_stop_link_train(struct intel_dp *intel_dp)
-{
-	intel_dp_set_link_train(intel_dp, &intel_dp->DP,
-				DP_TRAINING_PATTERN_DISABLE);
-}
-
-void
-intel_dp_start_link_train(struct intel_dp *intel_dp)
-{
-	intel_dp_link_training_clock_recovery(intel_dp);
-	intel_dp_link_training_channel_equalization(intel_dp);
-}
-
 static void
 intel_dp_link_down(struct intel_dp *intel_dp)
 {
@@ -3957,6 +3801,13 @@
 	 * matching HDMI port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		/* always enable with pattern 1 (as per spec) */
 		DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK);
 		DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1;
@@ -3966,9 +3817,15 @@
 		DP &= ~DP_PORT_EN;
 		I915_WRITE(intel_dp->output_reg, DP);
 		POSTING_READ(intel_dp->output_reg);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 
 	msleep(intel_dp->panel_power_down_delay);
+
+	intel_dp->DP = DP;
 }
 
 static bool
@@ -4016,7 +3873,7 @@
 	}
 
 	DRM_DEBUG_KMS("Display Port TPS3 support: source %s, sink %s\n",
-		      yesno(intel_dp_source_supports_hbr2(dev)),
+		      yesno(intel_dp_source_supports_hbr2(intel_dp)),
 		      yesno(drm_dp_tps3_supported(intel_dp->dpcd)));
 
 	/* Intermediate frequency support */
@@ -4106,9 +3963,12 @@
 static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
 	u8 buf;
 	int ret = 0;
+	int count = 0;
+	int attempts = 10;
 
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
 		DRM_DEBUG_KMS("Sink CRC couldn't be stopped properly\n");
@@ -4123,7 +3983,22 @@
 		goto out;
 	}
 
-	intel_dp->sink_crc.started = false;
+	do {
+		intel_wait_for_vblank(dev, intel_crtc->pipe);
+
+		if (drm_dp_dpcd_readb(&intel_dp->aux,
+				      DP_TEST_SINK_MISC, &buf) < 0) {
+			ret = -EIO;
+			goto out;
+		}
+		count = buf & DP_TEST_COUNT_MASK;
+	} while (--attempts && count);
+
+	if (attempts == 0) {
+		DRM_ERROR("TIMEOUT: Sink CRC counter is not zeroed\n");
+		ret = -ETIMEDOUT;
+	}
+
  out:
 	hsw_enable_ips(intel_crtc);
 	return ret;
@@ -4132,27 +4007,26 @@
 static int intel_dp_sink_crc_start(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
 	struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc);
 	u8 buf;
 	int ret;
 
-	if (intel_dp->sink_crc.started) {
-		ret = intel_dp_sink_crc_stop(intel_dp);
-		if (ret)
-			return ret;
-	}
-
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
 		return -EIO;
 
 	if (!(buf & DP_TEST_CRC_SUPPORTED))
 		return -ENOTTY;
 
-	intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK;
-
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
 		return -EIO;
 
+	if (buf & DP_TEST_SINK_START) {
+		ret = intel_dp_sink_crc_stop(intel_dp);
+		if (ret)
+			return ret;
+	}
+
 	hsw_disable_ips(intel_crtc);
 
 	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
@@ -4161,7 +4035,7 @@
 		return -EIO;
 	}
 
-	intel_dp->sink_crc.started = true;
+	intel_wait_for_vblank(dev, intel_crtc->pipe);
 	return 0;
 }
 
@@ -4173,7 +4047,6 @@
 	u8 buf;
 	int count, ret;
 	int attempts = 6;
-	bool old_equal_new;
 
 	ret = intel_dp_sink_crc_start(intel_dp);
 	if (ret)
@@ -4189,35 +4062,17 @@
 		}
 		count = buf & DP_TEST_COUNT_MASK;
 
-		/*
-		 * Count might be reset during the loop. In this case
-		 * last known count needs to be reset as well.
-		 */
-		if (count == 0)
-			intel_dp->sink_crc.last_count = 0;
-
-		if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
-			ret = -EIO;
-			goto stop;
-		}
-
-		old_equal_new = (count == intel_dp->sink_crc.last_count &&
-				 !memcmp(intel_dp->sink_crc.last_crc, crc,
-					 6 * sizeof(u8)));
-
-	} while (--attempts && (count == 0 || old_equal_new));
-
-	intel_dp->sink_crc.last_count = buf & DP_TEST_COUNT_MASK;
-	memcpy(intel_dp->sink_crc.last_crc, crc, 6 * sizeof(u8));
+	} while (--attempts && count == 0);
 
 	if (attempts == 0) {
-		if (old_equal_new) {
-			DRM_DEBUG_KMS("Unreliable Sink CRC counter: Current returned CRC is identical to the previous one\n");
-		} else {
-			DRM_ERROR("Panel is unable to calculate any CRC after 6 vblanks\n");
-			ret = -ETIMEDOUT;
-			goto stop;
-		}
+		DRM_ERROR("Panel is unable to calculate any CRC after 6 vblanks\n");
+		ret = -ETIMEDOUT;
+		goto stop;
+	}
+
+	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
+		ret = -EIO;
+		goto stop;
 	}
 
 stop:
@@ -4317,13 +4172,6 @@
 	uint8_t rxdata = 0;
 	int status = 0;
 
-	intel_dp->compliance_test_active = 0;
-	intel_dp->compliance_test_type = 0;
-	intel_dp->compliance_test_data = 0;
-
-	intel_dp->aux.i2c_nack_count = 0;
-	intel_dp->aux.i2c_defer_count = 0;
-
 	status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1);
 	if (status <= 0) {
 		DRM_DEBUG_KMS("Could not read test request from sink\n");
@@ -4439,6 +4287,14 @@
 
 	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
 
+	/*
+	 * Clearing compliance test variables to allow capturing
+	 * of values for next automated test request.
+	 */
+	intel_dp->compliance_test_active = 0;
+	intel_dp->compliance_test_type = 0;
+	intel_dp->compliance_test_data = 0;
+
 	if (!intel_encoder->base.crtc)
 		return;
 
@@ -4469,7 +4325,9 @@
 			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
 	}
 
-	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
+	/* if link training is requested we should perform it always */
+	if ((intel_dp->compliance_test_type == DP_TEST_LINK_TRAINING) ||
+		(!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) {
 		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
 			      intel_encoder->base.name);
 		intel_dp_start_link_train(intel_dp);
@@ -4687,41 +4545,6 @@
 		return g4x_digital_port_connected(dev_priv, port);
 }
 
-static enum drm_connector_status
-ironlake_dp_detect(struct intel_dp *intel_dp)
-{
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-
-	if (!intel_digital_port_connected(dev_priv, intel_dig_port))
-		return connector_status_disconnected;
-
-	return intel_dp_detect_dpcd(intel_dp);
-}
-
-static enum drm_connector_status
-g4x_dp_detect(struct intel_dp *intel_dp)
-{
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
-	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-
-	/* Can't disconnect eDP, but you can close the lid... */
-	if (is_edp(intel_dp)) {
-		enum drm_connector_status status;
-
-		status = intel_panel_detect(dev);
-		if (status == connector_status_unknown)
-			status = connector_status_connected;
-		return status;
-	}
-
-	if (!intel_digital_port_connected(dev->dev_private, intel_dig_port))
-		return connector_status_disconnected;
-
-	return intel_dp_detect_dpcd(intel_dp);
-}
-
 static struct edid *
 intel_dp_get_edid(struct intel_dp *intel_dp)
 {
@@ -4765,26 +4588,6 @@
 	intel_dp->has_audio = false;
 }
 
-static enum intel_display_power_domain
-intel_dp_power_get(struct intel_dp *dp)
-{
-	struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
-	enum intel_display_power_domain power_domain;
-
-	power_domain = intel_display_port_power_domain(encoder);
-	intel_display_power_get(to_i915(encoder->base.dev), power_domain);
-
-	return power_domain;
-}
-
-static void
-intel_dp_power_put(struct intel_dp *dp,
-		   enum intel_display_power_domain power_domain)
-{
-	struct intel_encoder *encoder = &dp_to_dig_port(dp)->base;
-	intel_display_power_put(to_i915(encoder->base.dev), power_domain);
-}
-
 static enum drm_connector_status
 intel_dp_detect(struct drm_connector *connector, bool force)
 {
@@ -4808,17 +4611,25 @@
 		return connector_status_disconnected;
 	}
 
-	power_domain = intel_dp_power_get(intel_dp);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
+	intel_display_power_get(to_i915(dev), power_domain);
 
 	/* Can't disconnect eDP, but you can close the lid... */
 	if (is_edp(intel_dp))
 		status = edp_detect(intel_dp);
-	else if (HAS_PCH_SPLIT(dev))
-		status = ironlake_dp_detect(intel_dp);
+	else if (intel_digital_port_connected(to_i915(dev),
+					      dp_to_dig_port(intel_dp)))
+		status = intel_dp_detect_dpcd(intel_dp);
 	else
-		status = g4x_dp_detect(intel_dp);
-	if (status != connector_status_connected)
+		status = connector_status_disconnected;
+
+	if (status != connector_status_connected) {
+		intel_dp->compliance_test_active = 0;
+		intel_dp->compliance_test_type = 0;
+		intel_dp->compliance_test_data = 0;
+
 		goto out;
+	}
 
 	intel_dp_probe_oui(intel_dp);
 
@@ -4832,6 +4643,14 @@
 		goto out;
 	}
 
+	/*
+	 * Clearing NACK and defer counts to get their exact values
+	 * while reading EDID which are required by Compliance tests
+	 * 4.2.2.4 and 4.2.2.5
+	 */
+	intel_dp->aux.i2c_nack_count = 0;
+	intel_dp->aux.i2c_defer_count = 0;
+
 	intel_dp_set_edid(intel_dp);
 
 	if (intel_encoder->type != INTEL_OUTPUT_EDP)
@@ -4853,7 +4672,7 @@
 	}
 
 out:
-	intel_dp_power_put(intel_dp, power_domain);
+	intel_display_power_put(to_i915(dev), power_domain);
 	return status;
 }
 
@@ -4862,6 +4681,7 @@
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
 	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev);
 	enum intel_display_power_domain power_domain;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
@@ -4871,11 +4691,12 @@
 	if (connector->status != connector_status_connected)
 		return;
 
-	power_domain = intel_dp_power_get(intel_dp);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
+	intel_display_power_get(dev_priv, power_domain);
 
 	intel_dp_set_edid(intel_dp);
 
-	intel_dp_power_put(intel_dp, power_domain);
+	intel_display_power_put(dev_priv, power_domain);
 
 	if (intel_encoder->type != INTEL_OUTPUT_EDP)
 		intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
@@ -5034,7 +4855,7 @@
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
-	drm_dp_aux_unregister(&intel_dp->aux);
+	intel_dp_aux_fini(intel_dp);
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
@@ -5091,7 +4912,7 @@
 	 * indefinitely.
 	 */
 	DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n");
-	power_domain = intel_display_port_power_domain(&intel_dig_port->base);
+	power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base);
 	intel_display_power_get(dev_priv, power_domain);
 
 	edp_panel_vdd_schedule_off(intel_dp);
@@ -5153,7 +4974,8 @@
 	enum intel_display_power_domain power_domain;
 	enum irqreturn ret = IRQ_NONE;
 
-	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP)
+	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP &&
+	    intel_dig_port->base.type != INTEL_OUTPUT_HDMI)
 		intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT;
 
 	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
@@ -5172,7 +4994,7 @@
 		      port_name(intel_dig_port->port),
 		      long_hpd ? "long" : "short");
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
+	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
@@ -5223,25 +5045,6 @@
 	return ret;
 }
 
-/* Return which DP Port should be selected for Transcoder DP control */
-int
-intel_trans_dp_port_sel(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct intel_encoder *intel_encoder;
-	struct intel_dp *intel_dp;
-
-	for_each_encoder_on_crtc(dev, crtc, intel_encoder) {
-		intel_dp = enc_to_intel_dp(&intel_encoder->base);
-
-		if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
-		    intel_encoder->type == INTEL_OUTPUT_EDP)
-			return intel_dp->output_reg;
-	}
-
-	return -1;
-}
-
 /* check the VBT to see whether the eDP is on another port */
 bool intel_dp_is_edp(struct drm_device *dev, enum port port)
 {
@@ -5313,7 +5116,7 @@
 	struct edp_power_seq cur, vbt, spec,
 		*final = &intel_dp->pps_delays;
 	u32 pp_on, pp_off, pp_div = 0, pp_ctl = 0;
-	int pp_ctrl_reg, pp_on_reg, pp_off_reg, pp_div_reg = 0;
+	i915_reg_t pp_ctrl_reg, pp_on_reg, pp_off_reg, pp_div_reg;
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
@@ -5435,7 +5238,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp_on, pp_off, pp_div, port_sel = 0;
 	int div = HAS_PCH_SPLIT(dev) ? intel_pch_rawclk(dev) : intel_hrawclk(dev);
-	int pp_on_reg, pp_off_reg, pp_div_reg = 0, pp_ctrl_reg;
+	i915_reg_t pp_on_reg, pp_off_reg, pp_div_reg, pp_ctrl_reg;
 	enum port port = dp_to_dig_port(intel_dp)->port;
 	const struct edp_power_seq *seq = &intel_dp->pps_delays;
 
@@ -5597,7 +5400,7 @@
 			DRM_ERROR("Unsupported refreshrate type\n");
 		}
 	} else if (INTEL_INFO(dev)->gen > 6) {
-		u32 reg = PIPECONF(intel_crtc->config->cpu_transcoder);
+		i915_reg_t reg = PIPECONF(intel_crtc->config->cpu_transcoder);
 		u32 val;
 
 		val = I915_READ(reg);
@@ -6015,7 +5818,7 @@
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum port port = intel_dig_port->port;
-	int type;
+	int type, ret;
 
 	intel_dp->pps_pipe = INVALID_PIPE;
 
@@ -6036,6 +5839,9 @@
 	else
 		intel_dp->get_aux_send_ctl = i9xx_get_aux_send_ctl;
 
+	if (HAS_DDI(dev))
+		intel_dp->prepare_link_retrain = intel_ddi_prepare_link_retrain;
+
 	/* Preserve the current hw state. */
 	intel_dp->DP = I915_READ(intel_dp->output_reg);
 	intel_dp->attached_connector = intel_connector;
@@ -6087,7 +5893,7 @@
 		break;
 	case PORT_B:
 		intel_encoder->hpd_pin = HPD_PORT_B;
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0))
+		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 			intel_encoder->hpd_pin = HPD_PORT_A;
 		break;
 	case PORT_C:
@@ -6113,7 +5919,9 @@
 		pps_unlock(intel_dp);
 	}
 
-	intel_dp_aux_init(intel_dp, intel_connector);
+	ret = intel_dp_aux_init(intel_dp, intel_connector);
+	if (ret)
+		goto fail;
 
 	/* init MST on ports that can support it */
 	if (HAS_DP_MST(dev) &&
@@ -6122,20 +5930,9 @@
 					  intel_connector->base.base.id);
 
 	if (!intel_edp_init_connector(intel_dp, intel_connector)) {
-		drm_dp_aux_unregister(&intel_dp->aux);
-		if (is_edp(intel_dp)) {
-			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
-			/*
-			 * vdd might still be enabled do to the delayed vdd off.
-			 * Make sure vdd is actually turned off here.
-			 */
-			pps_lock(intel_dp);
-			edp_panel_vdd_off_sync(intel_dp);
-			pps_unlock(intel_dp);
-		}
-		drm_connector_unregister(connector);
-		drm_connector_cleanup(connector);
-		return false;
+		intel_dp_aux_fini(intel_dp);
+		intel_dp_mst_encoder_cleanup(intel_dig_port);
+		goto fail;
 	}
 
 	intel_dp_add_properties(intel_dp, connector);
@@ -6152,10 +5949,27 @@
 	i915_debugfs_connector_add(connector);
 
 	return true;
+
+fail:
+	if (is_edp(intel_dp)) {
+		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
+		/*
+		 * vdd might still be enabled do to the delayed vdd off.
+		 * Make sure vdd is actually turned off here.
+		 */
+		pps_lock(intel_dp);
+		edp_panel_vdd_off_sync(intel_dp);
+		pps_unlock(intel_dp);
+	}
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+
+	return false;
 }
 
 void
-intel_dp_init(struct drm_device *dev, int output_reg, enum port port)
+intel_dp_init(struct drm_device *dev,
+	      i915_reg_t output_reg, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_digital_port *intel_dig_port;
@@ -6175,7 +5989,7 @@
 	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_dp_compute_config;
 	intel_encoder->disable = intel_disable_dp;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
new file mode 100644
index 0000000..8888793
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "intel_drv.h"
+
+static void
+intel_get_adjust_train(struct intel_dp *intel_dp,
+		       const uint8_t link_status[DP_LINK_STATUS_SIZE])
+{
+	uint8_t v = 0;
+	uint8_t p = 0;
+	int lane;
+	uint8_t voltage_max;
+	uint8_t preemph_max;
+
+	for (lane = 0; lane < intel_dp->lane_count; lane++) {
+		uint8_t this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
+		uint8_t this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
+
+		if (this_v > v)
+			v = this_v;
+		if (this_p > p)
+			p = this_p;
+	}
+
+	voltage_max = intel_dp_voltage_max(intel_dp);
+	if (v >= voltage_max)
+		v = voltage_max | DP_TRAIN_MAX_SWING_REACHED;
+
+	preemph_max = intel_dp_pre_emphasis_max(intel_dp, v);
+	if (p >= preemph_max)
+		p = preemph_max | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+	for (lane = 0; lane < 4; lane++)
+		intel_dp->train_set[lane] = v | p;
+}
+
+static bool
+intel_dp_set_link_train(struct intel_dp *intel_dp,
+			uint8_t dp_train_pat)
+{
+	uint8_t buf[sizeof(intel_dp->train_set) + 1];
+	int ret, len;
+
+	intel_dp_program_link_training_pattern(intel_dp, dp_train_pat);
+
+	buf[0] = dp_train_pat;
+	if ((dp_train_pat & DP_TRAINING_PATTERN_MASK) ==
+	    DP_TRAINING_PATTERN_DISABLE) {
+		/* don't write DP_TRAINING_LANEx_SET on disable */
+		len = 1;
+	} else {
+		/* DP_TRAINING_LANEx_SET follow DP_TRAINING_PATTERN_SET */
+		memcpy(buf + 1, intel_dp->train_set, intel_dp->lane_count);
+		len = intel_dp->lane_count + 1;
+	}
+
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
+				buf, len);
+
+	return ret == len;
+}
+
+static bool
+intel_dp_reset_link_train(struct intel_dp *intel_dp,
+			uint8_t dp_train_pat)
+{
+	if (!intel_dp->train_set_valid)
+		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	intel_dp_set_signal_levels(intel_dp);
+	return intel_dp_set_link_train(intel_dp, dp_train_pat);
+}
+
+static bool
+intel_dp_update_link_train(struct intel_dp *intel_dp)
+{
+	int ret;
+
+	intel_dp_set_signal_levels(intel_dp);
+
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
+				intel_dp->train_set, intel_dp->lane_count);
+
+	return ret == intel_dp->lane_count;
+}
+
+/* Enable corresponding port and start training pattern 1 */
+static void
+intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
+{
+	int i;
+	uint8_t voltage;
+	int voltage_tries, loop_tries;
+	uint8_t link_config[2];
+	uint8_t link_bw, rate_select;
+
+	if (intel_dp->prepare_link_retrain)
+		intel_dp->prepare_link_retrain(intel_dp);
+
+	intel_dp_compute_rate(intel_dp, intel_dp->link_rate,
+			      &link_bw, &rate_select);
+
+	/* Write the link configuration data */
+	link_config[0] = link_bw;
+	link_config[1] = intel_dp->lane_count;
+	if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
+		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);
+	if (intel_dp->num_sink_rates)
+		drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_RATE_SET,
+				  &rate_select, 1);
+
+	link_config[0] = 0;
+	link_config[1] = DP_SET_ANSI_8B10B;
+	drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2);
+
+	intel_dp->DP |= DP_PORT_EN;
+
+	/* clock recovery */
+	if (!intel_dp_reset_link_train(intel_dp,
+				       DP_TRAINING_PATTERN_1 |
+				       DP_LINK_SCRAMBLING_DISABLE)) {
+		DRM_ERROR("failed to enable link training\n");
+		return;
+	}
+
+	voltage = 0xff;
+	voltage_tries = 0;
+	loop_tries = 0;
+	for (;;) {
+		uint8_t link_status[DP_LINK_STATUS_SIZE];
+
+		drm_dp_link_train_clock_recovery_delay(intel_dp->dpcd);
+		if (!intel_dp_get_link_status(intel_dp, link_status)) {
+			DRM_ERROR("failed to get link status\n");
+			break;
+		}
+
+		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
+			DRM_DEBUG_KMS("clock recovery OK\n");
+			break;
+		}
+
+		/*
+		 * if we used previously trained voltage and pre-emphasis values
+		 * and we don't get clock recovery, reset link training values
+		 */
+		if (intel_dp->train_set_valid) {
+			DRM_DEBUG_KMS("clock recovery not ok, reset");
+			/* clear the flag as we are not reusing train set */
+			intel_dp->train_set_valid = false;
+			if (!intel_dp_reset_link_train(intel_dp,
+						       DP_TRAINING_PATTERN_1 |
+						       DP_LINK_SCRAMBLING_DISABLE)) {
+				DRM_ERROR("failed to enable link training\n");
+				return;
+			}
+			continue;
+		}
+
+		/* Check to see if we've tried the max voltage */
+		for (i = 0; i < intel_dp->lane_count; i++)
+			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
+				break;
+		if (i == intel_dp->lane_count) {
+			++loop_tries;
+			if (loop_tries == 5) {
+				DRM_ERROR("too many full retries, give up\n");
+				break;
+			}
+			intel_dp_reset_link_train(intel_dp,
+						  DP_TRAINING_PATTERN_1 |
+						  DP_LINK_SCRAMBLING_DISABLE);
+			voltage_tries = 0;
+			continue;
+		}
+
+		/* Check to see if we've tried the same voltage 5 times */
+		if ((intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
+			++voltage_tries;
+			if (voltage_tries == 5) {
+				DRM_ERROR("too many voltage retries, give up\n");
+				break;
+			}
+		} else
+			voltage_tries = 0;
+		voltage = intel_dp->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+
+		/* Update training set as requested by target */
+		intel_get_adjust_train(intel_dp, link_status);
+		if (!intel_dp_update_link_train(intel_dp)) {
+			DRM_ERROR("failed to update link training\n");
+			break;
+		}
+	}
+}
+
+static void
+intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
+{
+	bool channel_eq = false;
+	int tries, cr_tries;
+	uint32_t training_pattern = DP_TRAINING_PATTERN_2;
+
+	/*
+	 * Training Pattern 3 for HBR2 or 1.2 devices that support it.
+	 *
+	 * Intel platforms that support HBR2 also support TPS3. TPS3 support is
+	 * also mandatory for downstream devices that support HBR2.
+	 *
+	 * Due to WaDisableHBR2 SKL < B0 is the only exception where TPS3 is
+	 * supported but still not enabled.
+	 */
+	if (intel_dp_source_supports_hbr2(intel_dp) &&
+	    drm_dp_tps3_supported(intel_dp->dpcd))
+		training_pattern = DP_TRAINING_PATTERN_3;
+	else if (intel_dp->link_rate == 540000)
+		DRM_ERROR("5.4 Gbps link rate without HBR2/TPS3 support\n");
+
+	/* channel equalization */
+	if (!intel_dp_set_link_train(intel_dp,
+				     training_pattern |
+				     DP_LINK_SCRAMBLING_DISABLE)) {
+		DRM_ERROR("failed to start channel equalization\n");
+		return;
+	}
+
+	tries = 0;
+	cr_tries = 0;
+	channel_eq = false;
+	for (;;) {
+		uint8_t link_status[DP_LINK_STATUS_SIZE];
+
+		if (cr_tries > 5) {
+			DRM_ERROR("failed to train DP, aborting\n");
+			break;
+		}
+
+		drm_dp_link_train_channel_eq_delay(intel_dp->dpcd);
+		if (!intel_dp_get_link_status(intel_dp, link_status)) {
+			DRM_ERROR("failed to get link status\n");
+			break;
+		}
+
+		/* Make sure clock is still ok */
+		if (!drm_dp_clock_recovery_ok(link_status,
+					      intel_dp->lane_count)) {
+			intel_dp->train_set_valid = false;
+			intel_dp_link_training_clock_recovery(intel_dp);
+			intel_dp_set_link_train(intel_dp,
+						training_pattern |
+						DP_LINK_SCRAMBLING_DISABLE);
+			cr_tries++;
+			continue;
+		}
+
+		if (drm_dp_channel_eq_ok(link_status,
+					 intel_dp->lane_count)) {
+			channel_eq = true;
+			break;
+		}
+
+		/* Try 5 times, then try clock recovery if that fails */
+		if (tries > 5) {
+			intel_dp->train_set_valid = false;
+			intel_dp_link_training_clock_recovery(intel_dp);
+			intel_dp_set_link_train(intel_dp,
+						training_pattern |
+						DP_LINK_SCRAMBLING_DISABLE);
+			tries = 0;
+			cr_tries++;
+			continue;
+		}
+
+		/* Update training set as requested by target */
+		intel_get_adjust_train(intel_dp, link_status);
+		if (!intel_dp_update_link_train(intel_dp)) {
+			DRM_ERROR("failed to update link training\n");
+			break;
+		}
+		++tries;
+	}
+
+	intel_dp_set_idle_link_train(intel_dp);
+
+	if (channel_eq) {
+		intel_dp->train_set_valid = true;
+		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
+	}
+}
+
+void intel_dp_stop_link_train(struct intel_dp *intel_dp)
+{
+	intel_dp_set_link_train(intel_dp,
+				DP_TRAINING_PATTERN_DISABLE);
+}
+
+void
+intel_dp_start_link_train(struct intel_dp *intel_dp)
+{
+	intel_dp_link_training_clock_recovery(intel_dp);
+	intel_dp_link_training_channel_equalization(intel_dp);
+}
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 0639275..e8d369d 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -173,20 +173,14 @@
 	intel_mst->port = found->port;
 
 	if (intel_dp->active_mst_links == 0) {
-		enum port port = intel_ddi_get_encoder_port(encoder);
+		intel_ddi_clk_select(encoder, intel_crtc->config);
 
 		intel_dp_set_link_params(intel_dp, intel_crtc->config);
 
-		/* FIXME: add support for SKL */
-		if (INTEL_INFO(dev)->gen < 9)
-			I915_WRITE(PORT_CLK_SEL(port),
-				   intel_crtc->config->ddi_pll_sel);
-
 		intel_ddi_init_dp_buf_reg(&intel_dig_port->base);
 
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 
-
 		intel_dp_start_link_train(intel_dp);
 		intel_dp_stop_link_train(intel_dp);
 	}
@@ -414,7 +408,10 @@
 {
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	drm_fb_helper_add_one_connector(&dev_priv->fbdev->helper, &connector->base);
+
+	if (dev_priv->fbdev)
+		drm_fb_helper_add_one_connector(&dev_priv->fbdev->helper,
+						&connector->base);
 #endif
 }
 
@@ -422,7 +419,10 @@
 {
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
-	drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper, &connector->base);
+
+	if (dev_priv->fbdev)
+		drm_fb_helper_remove_one_connector(&dev_priv->fbdev->helper,
+						   &connector->base);
 #endif
 }
 
@@ -536,7 +536,7 @@
 	intel_mst->primary = intel_dig_port;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_mst_enc_funcs,
-			 DRM_MODE_ENCODER_DPMST);
+			 DRM_MODE_ENCODER_DPMST, NULL);
 
 	intel_encoder->type = INTEL_OUTPUT_DP_MST;
 	intel_encoder->crtc_mask = 0x7;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0598932..50f83d2 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -123,8 +123,6 @@
 struct intel_fbdev {
 	struct drm_fb_helper helper;
 	struct intel_framebuffer *fb;
-	struct list_head fbdev_list;
-	struct drm_display_mode *our_mode;
 	int preferred_bpp;
 };
 
@@ -250,6 +248,7 @@
 	unsigned int cdclk;
 	bool dpll_set;
 	struct intel_shared_dpll_config shared_dpll[I915_NUM_PLLS];
+	struct intel_wm_config wm_config;
 };
 
 struct intel_plane_state {
@@ -280,6 +279,9 @@
 	int scaler_id;
 
 	struct drm_intel_sprite_colorkey ckey;
+
+	/* async flip related structures */
+	struct drm_i915_gem_request *wait_req;
 };
 
 struct intel_initial_plane_config {
@@ -334,6 +336,21 @@
 /* drm_mode->private_flags */
 #define I915_MODE_FLAG_INHERITED 1
 
+struct intel_pipe_wm {
+	struct intel_wm_level wm[5];
+	uint32_t linetime;
+	bool fbc_wm_enabled;
+	bool pipe_enabled;
+	bool sprites_enabled;
+	bool sprites_scaled;
+};
+
+struct skl_pipe_wm {
+	struct skl_wm_level wm[8];
+	struct skl_wm_level trans_wm;
+	uint32_t linetime;
+};
+
 struct intel_crtc_state {
 	struct drm_crtc_state base;
 
@@ -376,6 +393,9 @@
 	 * accordingly. */
 	bool has_dp_encoder;
 
+	/* DSI has special cases */
+	bool has_dsi_encoder;
+
 	/* Whether we should send NULL infoframes. Required for audio. */
 	bool has_hdmi_sink;
 
@@ -468,6 +488,20 @@
 
 	/* w/a for waiting 2 vblanks during crtc enable */
 	enum pipe hsw_workaround_pipe;
+
+	/* IVB sprite scaling w/a (WaCxSRDisabledForSpriteScaling:ivb) */
+	bool disable_lp_wm;
+
+	struct {
+		/*
+		 * optimal watermarks, programmed post-vblank when this state
+		 * is committed
+		 */
+		union {
+			struct intel_pipe_wm ilk;
+			struct skl_pipe_wm skl;
+		} optimal;
+	} wm;
 };
 
 struct vlv_wm_state {
@@ -479,26 +513,12 @@
 	bool cxsr;
 };
 
-struct intel_pipe_wm {
-	struct intel_wm_level wm[5];
-	uint32_t linetime;
-	bool fbc_wm_enabled;
-	bool pipe_enabled;
-	bool sprites_enabled;
-	bool sprites_scaled;
-};
-
 struct intel_mmio_flip {
 	struct work_struct work;
 	struct drm_i915_private *i915;
 	struct drm_i915_gem_request *req;
 	struct intel_crtc *crtc;
-};
-
-struct skl_pipe_wm {
-	struct skl_wm_level wm[8];
-	struct skl_wm_level trans_wm;
-	uint32_t linetime;
+	unsigned int rotation;
 };
 
 /*
@@ -509,13 +529,11 @@
  */
 struct intel_crtc_atomic_commit {
 	/* Sleepable operations to perform before commit */
-	bool wait_for_flips;
 	bool disable_fbc;
 	bool disable_ips;
 	bool disable_cxsr;
 	bool pre_disable_primary;
 	bool update_wm_pre, update_wm_post;
-	unsigned disabled_planes;
 
 	/* Sleepable operations to perform after commit */
 	unsigned fb_bits;
@@ -568,9 +586,10 @@
 	/* per-pipe watermark state */
 	struct {
 		/* watermarks currently being used  */
-		struct intel_pipe_wm active;
-		/* SKL wm values currently in use */
-		struct skl_pipe_wm skl_active;
+		union {
+			struct intel_pipe_wm ilk;
+			struct skl_pipe_wm skl;
+		} active;
 		/* allow CxSR on this pipe */
 		bool cxsr_allowed;
 	} wm;
@@ -678,7 +697,7 @@
 #define intel_fb_obj(x) (x ? to_intel_framebuffer(x)->obj : NULL)
 
 struct intel_hdmi {
-	u32 hdmi_reg;
+	i915_reg_t hdmi_reg;
 	int ddc_bus;
 	bool limited_color_range;
 	bool color_range_auto;
@@ -694,7 +713,8 @@
 	void (*set_infoframes)(struct drm_encoder *encoder,
 			       bool enable,
 			       const struct drm_display_mode *adjusted_mode);
-	bool (*infoframe_enabled)(struct drm_encoder *encoder);
+	bool (*infoframe_enabled)(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config);
 };
 
 struct intel_dp_mst_encoder;
@@ -720,15 +740,10 @@
 	M2_N2
 };
 
-struct sink_crc {
-	bool started;
-	u8 last_crc[6];
-	int last_count;
-};
-
 struct intel_dp {
-	uint32_t output_reg;
-	uint32_t aux_ch_ctl_reg;
+	i915_reg_t output_reg;
+	i915_reg_t aux_ch_ctl_reg;
+	i915_reg_t aux_ch_data_reg[5];
 	uint32_t DP;
 	int link_rate;
 	uint8_t lane_count;
@@ -742,7 +757,6 @@
 	/* sink rates as reported by DP_SUPPORTED_LINK_RATES */
 	uint8_t num_sink_rates;
 	int sink_rates[DP_MAX_SUPPORTED_RATES];
-	struct sink_crc sink_crc;
 	struct drm_dp_aux aux;
 	uint8_t train_set[4];
 	int panel_power_up_delay;
@@ -784,6 +798,10 @@
 				     bool has_aux_irq,
 				     int send_bytes,
 				     uint32_t aux_clock_divider);
+
+	/* This is called before a link training is starterd */
+	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
+
 	bool train_set_valid;
 
 	/* Displayport compliance testing */
@@ -943,7 +961,8 @@
 					 enum pipe pipe);
 void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
 					 enum transcoder pch_transcoder);
-void i9xx_check_fifo_underruns(struct drm_i915_private *dev_priv);
+void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv);
+void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
@@ -974,6 +993,8 @@
 
 
 /* intel_ddi.c */
+void intel_ddi_clk_select(struct intel_encoder *encoder,
+			  const struct intel_crtc_state *pipe_config);
 void intel_prepare_ddi(struct drm_device *dev);
 void hsw_fdi_link_train(struct drm_crtc *crtc);
 void intel_ddi_init(struct drm_device *dev, enum port port);
@@ -988,7 +1009,7 @@
 bool intel_ddi_pll_select(struct intel_crtc *crtc,
 			  struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(struct drm_crtc *crtc);
-void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder);
+void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_fdi_disable(struct drm_crtc *crtc);
 void intel_ddi_get_config(struct intel_encoder *encoder,
@@ -1056,6 +1077,15 @@
 {
 	drm_wait_one_vblank(dev, pipe);
 }
+static inline void
+intel_wait_for_vblank_if_active(struct drm_device *dev, int pipe)
+{
+	const struct intel_crtc *crtc =
+		to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
+
+	if (crtc->active)
+		intel_wait_for_vblank(dev, pipe);
+}
 int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport,
@@ -1069,9 +1099,7 @@
 				    struct drm_modeset_acquire_ctx *ctx);
 int intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 			       struct drm_framebuffer *fb,
-			       const struct drm_plane_state *plane_state,
-			       struct intel_engine_cs *pipelined,
-			       struct drm_i915_gem_request **pipelined_request);
+			       const struct drm_plane_state *plane_state);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
 			   struct drm_mode_fb_cmd2 *mode_cmd,
@@ -1152,7 +1180,10 @@
 void bxt_enable_dc9(struct drm_i915_private *dev_priv);
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
 void skl_init_cdclk(struct drm_i915_private *dev_priv);
+int skl_sanitize_cdclk(struct drm_i915_private *dev_priv);
 void skl_uninit_cdclk(struct drm_i915_private *dev_priv);
+void skl_enable_dc6(struct drm_i915_private *dev_priv);
+void skl_disable_dc6(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1169,33 +1200,30 @@
 void hsw_disable_ips(struct intel_crtc *crtc);
 enum intel_display_power_domain
 intel_display_port_power_domain(struct intel_encoder *intel_encoder);
+enum intel_display_power_domain
+intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder);
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config);
-void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
 void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
 
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
 
-unsigned long intel_plane_obj_offset(struct intel_plane *intel_plane,
-				     struct drm_i915_gem_object *obj,
-				     unsigned int plane);
+u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
+			   struct drm_i915_gem_object *obj,
+			   unsigned int plane);
 
 u32 skl_plane_ctl_format(uint32_t pixel_format);
 u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
 u32 skl_plane_ctl_rotation(unsigned int rotation);
 
 /* intel_csr.c */
-void intel_csr_ucode_init(struct drm_device *dev);
-enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv);
-void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
-					enum csr_state state);
-void intel_csr_load_program(struct drm_device *dev);
-void intel_csr_ucode_fini(struct drm_device *dev);
-void assert_csr_loaded(struct drm_i915_private *dev_priv);
+void intel_csr_ucode_init(struct drm_i915_private *);
+void intel_csr_load_program(struct drm_i915_private *);
+void intel_csr_ucode_fini(struct drm_i915_private *);
 
 /* intel_dp.c */
-void intel_dp_init(struct drm_device *dev, int output_reg, enum port port);
+void intel_dp_init(struct drm_device *dev, i915_reg_t output_reg, enum port port);
 bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 			     struct intel_connector *intel_connector);
 void intel_dp_set_link_params(struct intel_dp *intel_dp,
@@ -1233,6 +1261,22 @@
 					 struct intel_digital_port *port);
 void hsw_dp_set_ddi_pll_sel(struct intel_crtc_state *pipe_config);
 
+void
+intel_dp_program_link_training_pattern(struct intel_dp *intel_dp,
+				       uint8_t dp_train_pat);
+void
+intel_dp_set_signal_levels(struct intel_dp *intel_dp);
+void intel_dp_set_idle_link_train(struct intel_dp *intel_dp);
+uint8_t
+intel_dp_voltage_max(struct intel_dp *intel_dp);
+uint8_t
+intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing);
+void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock,
+			   uint8_t *link_bw, uint8_t *rate_select);
+bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp);
+bool
+intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE]);
+
 /* intel_dp_mst.c */
 int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
 void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
@@ -1247,7 +1291,7 @@
 /* legacy fbdev emulation in intel_fbdev.c */
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 extern int intel_fbdev_init(struct drm_device *dev);
-extern void intel_fbdev_initial_config(void *data, async_cookie_t cookie);
+extern void intel_fbdev_initial_config_async(struct drm_device *dev);
 extern void intel_fbdev_fini(struct drm_device *dev);
 extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous);
 extern void intel_fbdev_output_poll_changed(struct drm_device *dev);
@@ -1258,7 +1302,7 @@
 	return 0;
 }
 
-static inline void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
+static inline void intel_fbdev_initial_config_async(struct drm_device *dev)
 {
 }
 
@@ -1276,9 +1320,11 @@
 #endif
 
 /* intel_fbc.c */
-bool intel_fbc_enabled(struct drm_i915_private *dev_priv);
-void intel_fbc_update(struct drm_i915_private *dev_priv);
+bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
+void intel_fbc_deactivate(struct intel_crtc *crtc);
+void intel_fbc_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
+void intel_fbc_enable(struct intel_crtc *crtc);
 void intel_fbc_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_disable_crtc(struct intel_crtc *crtc);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
@@ -1286,11 +1332,10 @@
 			  enum fb_op_origin origin);
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
 		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
-const char *intel_no_fbc_reason_str(enum no_fbc_reason reason);
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
 
 /* intel_hdmi.c */
-void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
+void intel_hdmi_init(struct drm_device *dev, i915_reg_t hdmi_reg, enum port port);
 void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 			       struct intel_connector *intel_connector);
 struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
@@ -1366,8 +1411,13 @@
 /* intel_runtime_pm.c */
 int intel_power_domains_init(struct drm_i915_private *);
 void intel_power_domains_fini(struct drm_i915_private *);
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv);
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
+void intel_power_domains_suspend(struct drm_i915_private *dev_priv);
+void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv);
+void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain);
 
 bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
@@ -1377,8 +1427,6 @@
 			     enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
@@ -1396,12 +1444,6 @@
 void intel_suspend_hw(struct drm_device *dev);
 int ilk_wm_max_level(const struct drm_device *dev);
 void intel_update_watermarks(struct drm_crtc *crtc);
-void intel_update_sprite_watermarks(struct drm_plane *plane,
-				    struct drm_crtc *crtc,
-				    uint32_t sprite_width,
-				    uint32_t sprite_height,
-				    int pixel_size,
-				    bool enabled, bool scaled);
 void intel_init_pm(struct drm_device *dev);
 void intel_pm_setup(struct drm_device *dev);
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -1429,7 +1471,8 @@
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
 
 /* intel_sdvo.c */
-bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);
+bool intel_sdvo_init(struct drm_device *dev,
+		     i915_reg_t reg, enum port port);
 
 
 /* intel_sprite.c */
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 170ae6f..fff9a66 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -60,7 +60,8 @@
 		DRM_ERROR("DPI FIFOs are not empty\n");
 }
 
-static void write_data(struct drm_i915_private *dev_priv, u32 reg,
+static void write_data(struct drm_i915_private *dev_priv,
+		       i915_reg_t reg,
 		       const u8 *data, u32 len)
 {
 	u32 i, j;
@@ -75,7 +76,8 @@
 	}
 }
 
-static void read_data(struct drm_i915_private *dev_priv, u32 reg,
+static void read_data(struct drm_i915_private *dev_priv,
+		      i915_reg_t reg,
 		      u8 *data, u32 len)
 {
 	u32 i, j;
@@ -98,7 +100,8 @@
 	struct mipi_dsi_packet packet;
 	ssize_t ret;
 	const u8 *header, *data;
-	u32 data_reg, data_mask, ctrl_reg, ctrl_mask;
+	i915_reg_t data_reg, ctrl_reg;
+	u32 data_mask, ctrl_mask;
 
 	ret = mipi_dsi_create_packet(&packet, msg);
 	if (ret < 0)
@@ -263,16 +266,18 @@
 }
 
 static bool intel_dsi_compute_config(struct intel_encoder *encoder,
-				     struct intel_crtc_state *config)
+				     struct intel_crtc_state *pipe_config)
 {
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
-	struct drm_display_mode *adjusted_mode = &config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
 
 	DRM_DEBUG_KMS("\n");
 
+	pipe_config->has_dsi_encoder = true;
+
 	if (fixed_mode)
 		intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 
@@ -377,10 +382,10 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 temp;
-	u32 port_ctrl;
 
 	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) {
+		u32 temp;
+
 		temp = I915_READ(VLV_CHICKEN_3);
 		temp &= ~PIXEL_OVERLAP_CNT_MASK |
 					intel_dsi->pixel_overlap <<
@@ -389,8 +394,9 @@
 	}
 
 	for_each_dsi_port(port, intel_dsi->ports) {
-		port_ctrl = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 temp;
 
 		temp = I915_READ(port_ctrl);
 
@@ -416,13 +422,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 temp;
-	u32 port_ctrl;
 
 	for_each_dsi_port(port, intel_dsi->ports) {
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 temp;
+
 		/* de-assert ip_tg_enable signal */
-		port_ctrl = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
 		temp = I915_READ(port_ctrl);
 		I915_WRITE(port_ctrl, temp & ~DPI_ENABLE);
 		POSTING_READ(port_ctrl);
@@ -458,6 +464,8 @@
 	intel_panel_enable_backlight(intel_dsi->attached_connector);
 }
 
+static void intel_dsi_prepare(struct intel_encoder *intel_encoder);
+
 static void intel_dsi_pre_enable(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
@@ -470,6 +478,9 @@
 
 	DRM_DEBUG_KMS("\n");
 
+	intel_dsi_prepare(encoder);
+	intel_enable_dsi_pll(encoder);
+
 	/* Panel Enable over CRC PMIC */
 	if (intel_dsi->gpio_panel)
 		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
@@ -580,11 +591,13 @@
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 val;
-	u32 port_ctrl = 0;
 
 	DRM_DEBUG_KMS("\n");
 	for_each_dsi_port(port, intel_dsi->ports) {
+		/* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */
+		i915_reg_t port_ctrl = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A);
+		u32 val;
 
 		I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY |
 							ULPS_STATE_ENTER);
@@ -598,12 +611,6 @@
 							ULPS_STATE_ENTER);
 		usleep_range(2000, 2500);
 
-		if (IS_BROXTON(dev))
-			port_ctrl = BXT_MIPI_PORT_CTRL(port);
-		else if (IS_VALLEYVIEW(dev))
-			/* Common bit for both MIPI Port A & MIPI Port C */
-			port_ctrl = MIPI_PORT_CTRL(PORT_A);
-
 		/* Wait till Clock lanes are in LP-00 state for MIPI Port A
 		 * only. MIPI Port C has no similar bit for checking
 		 */
@@ -656,7 +663,6 @@
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	struct drm_device *dev = encoder->base.dev;
 	enum intel_display_power_domain power_domain;
-	u32 dpi_enabled, func, ctrl_reg;
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -667,9 +673,11 @@
 
 	/* XXX: this only works for one DSI output */
 	for_each_dsi_port(port, intel_dsi->ports) {
+		i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
+			BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(port);
+		u32 dpi_enabled, func;
+
 		func = I915_READ(MIPI_DSI_FUNC_PRG(port));
-		ctrl_reg = IS_BROXTON(dev) ? BXT_MIPI_PORT_CTRL(port) :
-						MIPI_PORT_CTRL(port);
 		dpi_enabled = I915_READ(ctrl_reg) & DPI_ENABLE;
 
 		/* Due to some hardware limitations on BYT, MIPI Port C DPI
@@ -698,6 +706,8 @@
 	u32 pclk = 0;
 	DRM_DEBUG_KMS("\n");
 
+	pipe_config->has_dsi_encoder = true;
+
 	/*
 	 * DPLL_MD is not used in case of DSI, reading will get some default value
 	 * set dpll_md = 0
@@ -1025,15 +1035,6 @@
 	}
 }
 
-static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder)
-{
-	DRM_DEBUG_KMS("\n");
-
-	intel_dsi_prepare(encoder);
-	intel_enable_dsi_pll(encoder);
-
-}
-
 static enum drm_connector_status
 intel_dsi_detect(struct drm_connector *connector, bool force)
 {
@@ -1151,11 +1152,10 @@
 
 	connector = &intel_connector->base;
 
-	drm_encoder_init(dev, encoder, &intel_dsi_funcs, DRM_MODE_ENCODER_DSI);
+	drm_encoder_init(dev, encoder, &intel_dsi_funcs, DRM_MODE_ENCODER_DSI,
+			 NULL);
 
-	/* XXX: very likely not all of these are needed */
 	intel_encoder->compute_config = intel_dsi_compute_config;
-	intel_encoder->pre_pll_enable = intel_dsi_pre_pll_enable;
 	intel_encoder->pre_enable = intel_dsi_pre_enable;
 	intel_encoder->enable = intel_dsi_enable_nop;
 	intel_encoder->disable = intel_dsi_pre_disable;
diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h
index e6cb252..02551ff 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/intel_dsi.h
@@ -117,7 +117,7 @@
 
 #define for_each_dsi_port(__port, __ports_mask) \
 	for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++)	\
-		if ((__ports_mask) & (1 << (__port)))
+		for_each_if ((__ports_mask) & (1 << (__port)))
 
 static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder)
 {
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 8492053..286baec 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -44,6 +44,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "sil164",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = SIL164_ADDR,
 		.dev_ops = &sil164_ops,
 	},
@@ -51,6 +52,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "ch7xxx",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = CH7xxx_ADDR,
 		.dev_ops = &ch7xxx_ops,
 	},
@@ -58,6 +60,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "ch7xxx",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = 0x75, /* For some ch7010 */
 		.dev_ops = &ch7xxx_ops,
 	},
@@ -65,6 +68,7 @@
 		.type = INTEL_DVO_CHIP_LVDS,
 		.name = "ivch",
 		.dvo_reg = DVOA,
+		.dvo_srcdim_reg = DVOA_SRCDIM,
 		.slave_addr = 0x02, /* Might also be 0x44, 0x84, 0xc4 */
 		.dev_ops = &ivch_ops,
 	},
@@ -72,6 +76,7 @@
 		.type = INTEL_DVO_CHIP_TMDS,
 		.name = "tfp410",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = TFP410_ADDR,
 		.dev_ops = &tfp410_ops,
 	},
@@ -79,6 +84,7 @@
 		.type = INTEL_DVO_CHIP_LVDS,
 		.name = "ch7017",
 		.dvo_reg = DVOC,
+		.dvo_srcdim_reg = DVOC_SRCDIM,
 		.slave_addr = 0x75,
 		.gpio = GMBUS_PIN_DPB,
 		.dev_ops = &ch7017_ops,
@@ -87,6 +93,7 @@
 	        .type = INTEL_DVO_CHIP_TMDS,
 		.name = "ns2501",
 		.dvo_reg = DVOB,
+		.dvo_srcdim_reg = DVOB_SRCDIM,
 		.slave_addr = NS2501_ADDR,
 		.dev_ops = &ns2501_ops,
        }
@@ -171,7 +178,7 @@
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
-	u32 dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
 	intel_dvo->dev.dev_ops->dpms(&intel_dvo->dev, false);
@@ -184,7 +191,7 @@
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
-	u32 dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
 	intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev,
@@ -255,20 +262,8 @@
 	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	int pipe = crtc->pipe;
 	u32 dvo_val;
-	u32 dvo_reg = intel_dvo->dev.dvo_reg, dvo_srcdim_reg;
-
-	switch (dvo_reg) {
-	case DVOA:
-	default:
-		dvo_srcdim_reg = DVOA_SRCDIM;
-		break;
-	case DVOB:
-		dvo_srcdim_reg = DVOB_SRCDIM;
-		break;
-	case DVOC:
-		dvo_srcdim_reg = DVOC_SRCDIM;
-		break;
-	}
+	i915_reg_t dvo_reg = intel_dvo->dev.dvo_reg;
+	i915_reg_t dvo_srcdim_reg = intel_dvo->dev.dvo_srcdim_reg;
 
 	/* Save the data order, since I don't know what it should be set to. */
 	dvo_val = I915_READ(dvo_reg) &
@@ -434,7 +429,7 @@
 
 	intel_encoder = &intel_dvo->base;
 	drm_encoder_init(dev, &intel_encoder->base,
-			 &intel_dvo_enc_funcs, encoder_type);
+			 &intel_dvo_enc_funcs, encoder_type, NULL);
 
 	intel_encoder->disable = intel_disable_dvo;
 	intel_encoder->enable = intel_enable_dvo;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index cf47352..a1988a4 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -43,7 +43,17 @@
 
 static inline bool fbc_supported(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->fbc.enable_fbc != NULL;
+	return dev_priv->fbc.activate != NULL;
+}
+
+static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
+{
+	return IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8;
+}
+
+static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv)
+{
+	return INTEL_INFO(dev_priv)->gen < 4;
 }
 
 /*
@@ -59,11 +69,51 @@
 	return crtc->base.y - crtc->adjusted_y;
 }
 
-static void i8xx_fbc_disable(struct drm_i915_private *dev_priv)
+/*
+ * For SKL+, the plane source size used by the hardware is based on the value we
+ * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
+ * we wrote to PIPESRC.
+ */
+static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
+					    int *width, int *height)
+{
+	struct intel_plane_state *plane_state =
+			to_intel_plane_state(crtc->base.primary->state);
+	int w, h;
+
+	if (intel_rotation_90_or_270(plane_state->base.rotation)) {
+		w = drm_rect_height(&plane_state->src) >> 16;
+		h = drm_rect_width(&plane_state->src) >> 16;
+	} else {
+		w = drm_rect_width(&plane_state->src) >> 16;
+		h = drm_rect_height(&plane_state->src) >> 16;
+	}
+
+	if (width)
+		*width = w;
+	if (height)
+		*height = h;
+}
+
+static int intel_fbc_calculate_cfb_size(struct intel_crtc *crtc,
+					struct drm_framebuffer *fb)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	int lines;
+
+	intel_fbc_get_plane_source_size(crtc, NULL, &lines);
+	if (INTEL_INFO(dev_priv)->gen >= 7)
+		lines = min(lines, 2048);
+
+	/* Hardware needs the full buffer stride, not just the active area. */
+	return lines * fb->pitches[0];
+}
+
+static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 fbc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	fbc_ctl = I915_READ(FBC_CONTROL);
@@ -78,11 +128,9 @@
 		DRM_DEBUG_KMS("FBC idle timed out\n");
 		return;
 	}
-
-	DRM_DEBUG_KMS("disabled FBC\n");
 }
 
-static void i8xx_fbc_enable(struct intel_crtc *crtc)
+static void i8xx_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -91,10 +139,10 @@
 	int i;
 	u32 fbc_ctl;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	/* Note: fbc.threshold == 1 for i8xx */
-	cfb_pitch = dev_priv->fbc.uncompressed_size / FBC_LL_SIZE;
+	cfb_pitch = intel_fbc_calculate_cfb_size(crtc, fb) / FBC_LL_SIZE;
 	if (fb->pitches[0] < cfb_pitch)
 		cfb_pitch = fb->pitches[0];
 
@@ -127,24 +175,21 @@
 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
 	fbc_ctl |= obj->fence_reg;
 	I915_WRITE(FBC_CONTROL, fbc_ctl);
-
-	DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n",
-		      cfb_pitch, crtc->base.y, plane_name(crtc->plane));
 }
 
-static bool i8xx_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
 }
 
-static void g4x_fbc_enable(struct intel_crtc *crtc)
+static void g4x_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane) | DPFC_SR_EN;
 	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
@@ -157,38 +202,35 @@
 
 	/* enable it... */
 	I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
-static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
+static void g4x_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	dpfc_ctl = I915_READ(DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
 		dpfc_ctl &= ~DPFC_CTL_EN;
 		I915_WRITE(DPFC_CONTROL, dpfc_ctl);
-
-		DRM_DEBUG_KMS("disabled FBC\n");
 	}
 }
 
-static bool g4x_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool g4x_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
+/* This function forces a CFB recompression through the nuke operation. */
+static void intel_fbc_recompress(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
 	POSTING_READ(MSG_FBC_REND_STATE);
 }
 
-static void ilk_fbc_enable(struct intel_crtc *crtc)
+static void ilk_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -197,7 +239,7 @@
 	int threshold = dev_priv->fbc.threshold;
 	unsigned int y_offset;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane);
 	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
@@ -231,33 +273,29 @@
 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, y_offset);
 	}
 
-	intel_fbc_nuke(dev_priv);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
+	intel_fbc_recompress(dev_priv);
 }
 
-static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
+static void ilk_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
 		dpfc_ctl &= ~DPFC_CTL_EN;
 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
-
-		DRM_DEBUG_KMS("disabled FBC\n");
 	}
 }
 
-static bool ilk_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool ilk_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void gen7_fbc_enable(struct intel_crtc *crtc)
+static void gen7_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -265,7 +303,7 @@
 	u32 dpfc_ctl;
 	int threshold = dev_priv->fbc.threshold;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = 0;
 	if (IS_IVYBRIDGE(dev_priv))
@@ -310,106 +348,42 @@
 		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, get_crtc_fence_y_offset(crtc));
 
-	intel_fbc_nuke(dev_priv);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
+	intel_fbc_recompress(dev_priv);
 }
 
 /**
- * intel_fbc_enabled - Is FBC enabled?
+ * intel_fbc_is_active - Is FBC active?
  * @dev_priv: i915 device instance
  *
  * This function is used to verify the current state of FBC.
  * FIXME: This should be tracked in the plane config eventually
  *        instead of queried at runtime for most callers.
  */
-bool intel_fbc_enabled(struct drm_i915_private *dev_priv)
+bool intel_fbc_is_active(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->fbc.enabled;
+	return dev_priv->fbc.active;
 }
 
-static void intel_fbc_enable(struct intel_crtc *crtc,
-			     const struct drm_framebuffer *fb)
+static void intel_fbc_activate(const struct drm_framebuffer *fb)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_i915_private *dev_priv = fb->dev->dev_private;
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
 
-	dev_priv->fbc.enable_fbc(crtc);
+	dev_priv->fbc.activate(crtc);
 
-	dev_priv->fbc.crtc = crtc;
 	dev_priv->fbc.fb_id = fb->base.id;
 	dev_priv->fbc.y = crtc->base.y;
 }
 
 static void intel_fbc_work_fn(struct work_struct *__work)
 {
-	struct intel_fbc_work *work =
-		container_of(to_delayed_work(__work),
-			     struct intel_fbc_work, work);
-	struct drm_i915_private *dev_priv = work->crtc->base.dev->dev_private;
-	struct drm_framebuffer *crtc_fb = work->crtc->base.primary->fb;
+	struct drm_i915_private *dev_priv =
+		container_of(__work, struct drm_i915_private, fbc.work.work);
+	struct intel_fbc_work *work = &dev_priv->fbc.work;
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	int delay_ms = 50;
 
-	mutex_lock(&dev_priv->fbc.lock);
-	if (work == dev_priv->fbc.fbc_work) {
-		/* Double check that we haven't switched fb without cancelling
-		 * the prior work.
-		 */
-		if (crtc_fb == work->fb)
-			intel_fbc_enable(work->crtc, work->fb);
-
-		dev_priv->fbc.fbc_work = NULL;
-	}
-	mutex_unlock(&dev_priv->fbc.lock);
-
-	kfree(work);
-}
-
-static void intel_fbc_cancel_work(struct drm_i915_private *dev_priv)
-{
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-
-	if (dev_priv->fbc.fbc_work == NULL)
-		return;
-
-	DRM_DEBUG_KMS("cancelling pending FBC enable\n");
-
-	/* Synchronisation is provided by struct_mutex and checking of
-	 * dev_priv->fbc.fbc_work, so we can perform the cancellation
-	 * entirely asynchronously.
-	 */
-	if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
-		/* tasklet was killed before being run, clean up */
-		kfree(dev_priv->fbc.fbc_work);
-
-	/* Mark the work as no longer wanted so that if it does
-	 * wake-up (because the work was already running and waiting
-	 * for our mutex), it will discover that is no longer
-	 * necessary to run.
-	 */
-	dev_priv->fbc.fbc_work = NULL;
-}
-
-static void intel_fbc_schedule_enable(struct intel_crtc *crtc)
-{
-	struct intel_fbc_work *work;
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-
-	intel_fbc_cancel_work(dev_priv);
-
-	work = kzalloc(sizeof(*work), GFP_KERNEL);
-	if (work == NULL) {
-		DRM_ERROR("Failed to allocate FBC work structure\n");
-		intel_fbc_enable(crtc, crtc->base.primary->fb);
-		return;
-	}
-
-	work->crtc = crtc;
-	work->fb = crtc->base.primary->fb;
-	INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
-
-	dev_priv->fbc.fbc_work = work;
-
+retry:
 	/* Delay the actual enabling to let pageflipping cease and the
 	 * display to settle before starting the compression. Note that
 	 * this delay also serves a second purpose: it allows for a
@@ -423,42 +397,71 @@
 	 *
 	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
 	 */
-	schedule_delayed_work(&work->work, msecs_to_jiffies(50));
+	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_ms);
+
+	mutex_lock(&dev_priv->fbc.lock);
+
+	/* Were we cancelled? */
+	if (!work->scheduled)
+		goto out;
+
+	/* Were we delayed again while this function was sleeping? */
+	if (time_after(work->enable_jiffies + msecs_to_jiffies(delay_ms),
+		       jiffies)) {
+		mutex_unlock(&dev_priv->fbc.lock);
+		goto retry;
+	}
+
+	if (crtc->base.primary->fb == work->fb)
+		intel_fbc_activate(work->fb);
+
+	work->scheduled = false;
+
+out:
+	mutex_unlock(&dev_priv->fbc.lock);
 }
 
-static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
+static void intel_fbc_cancel_work(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+	dev_priv->fbc.work.scheduled = false;
+}
+
+static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc_work *work = &dev_priv->fbc.work;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+
+	/* It is useless to call intel_fbc_cancel_work() in this function since
+	 * we're not releasing fbc.lock, so it won't have an opportunity to grab
+	 * it to discover that it was cancelled. So we just update the expected
+	 * jiffy count. */
+	work->fb = crtc->base.primary->fb;
+	work->scheduled = true;
+	work->enable_jiffies = jiffies;
+
+	schedule_work(&work->work);
+}
+
+static void __intel_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
 	intel_fbc_cancel_work(dev_priv);
 
-	dev_priv->fbc.disable_fbc(dev_priv);
-	dev_priv->fbc.crtc = NULL;
-}
-
-/**
- * intel_fbc_disable - disable FBC
- * @dev_priv: i915 device instance
- *
- * This function disables FBC.
- */
-void intel_fbc_disable(struct drm_i915_private *dev_priv)
-{
-	if (!fbc_supported(dev_priv))
-		return;
-
-	mutex_lock(&dev_priv->fbc.lock);
-	__intel_fbc_disable(dev_priv);
-	mutex_unlock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.active)
+		dev_priv->fbc.deactivate(dev_priv);
 }
 
 /*
- * intel_fbc_disable_crtc - disable FBC if it's associated with crtc
+ * intel_fbc_deactivate - deactivate FBC if it's associated with crtc
  * @crtc: the CRTC
  *
- * This function disables FBC if it's associated with the provided CRTC.
+ * This function deactivates FBC if it's associated with the provided CRTC.
  */
-void intel_fbc_disable_crtc(struct intel_crtc *crtc)
+void intel_fbc_deactivate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 
@@ -467,85 +470,42 @@
 
 	mutex_lock(&dev_priv->fbc.lock);
 	if (dev_priv->fbc.crtc == crtc)
-		__intel_fbc_disable(dev_priv);
+		__intel_fbc_deactivate(dev_priv);
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
-const char *intel_no_fbc_reason_str(enum no_fbc_reason reason)
-{
-	switch (reason) {
-	case FBC_OK:
-		return "FBC enabled but currently disabled in hardware";
-	case FBC_UNSUPPORTED:
-		return "unsupported by this chipset";
-	case FBC_NO_OUTPUT:
-		return "no output";
-	case FBC_STOLEN_TOO_SMALL:
-		return "not enough stolen memory";
-	case FBC_UNSUPPORTED_MODE:
-		return "mode incompatible with compression";
-	case FBC_MODE_TOO_LARGE:
-		return "mode too large for compression";
-	case FBC_BAD_PLANE:
-		return "FBC unsupported on plane";
-	case FBC_NOT_TILED:
-		return "framebuffer not tiled or fenced";
-	case FBC_MULTIPLE_PIPES:
-		return "more than one pipe active";
-	case FBC_MODULE_PARAM:
-		return "disabled per module param";
-	case FBC_CHIP_DEFAULT:
-		return "disabled per chip default";
-	case FBC_ROTATION:
-		return "rotation unsupported";
-	case FBC_IN_DBG_MASTER:
-		return "Kernel debugger is active";
-	case FBC_BAD_STRIDE:
-		return "framebuffer stride not supported";
-	case FBC_PIXEL_RATE:
-		return "pixel rate is too big";
-	case FBC_PIXEL_FORMAT:
-		return "pixel format is invalid";
-	default:
-		MISSING_CASE(reason);
-		return "unknown reason";
-	}
-}
-
 static void set_no_fbc_reason(struct drm_i915_private *dev_priv,
-			      enum no_fbc_reason reason)
+			      const char *reason)
 {
 	if (dev_priv->fbc.no_fbc_reason == reason)
 		return;
 
 	dev_priv->fbc.no_fbc_reason = reason;
-	DRM_DEBUG_KMS("Disabling FBC: %s\n", intel_no_fbc_reason_str(reason));
+	DRM_DEBUG_KMS("Disabling FBC: %s\n", reason);
 }
 
-static struct drm_crtc *intel_fbc_find_crtc(struct drm_i915_private *dev_priv)
+static bool crtc_can_fbc(struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = NULL, *tmp_crtc;
-	enum pipe pipe;
-	bool pipe_a_only = false;
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 
-	if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
-		pipe_a_only = true;
+	if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
+		return false;
 
-	for_each_pipe(dev_priv, pipe) {
-		tmp_crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A)
+		return false;
 
-		if (intel_crtc_active(tmp_crtc) &&
-		    to_intel_plane_state(tmp_crtc->primary->state)->visible)
-			crtc = tmp_crtc;
+	return true;
+}
 
-		if (pipe_a_only)
-			break;
-	}
+static bool crtc_is_valid(struct intel_crtc *crtc)
+{
+	if (!intel_crtc_active(&crtc->base))
+		return false;
 
-	if (!crtc || crtc->primary->fb == NULL)
-		return NULL;
+	if (!to_intel_plane_state(crtc->base.primary->state)->visible)
+		return false;
 
-	return crtc;
+	return true;
 }
 
 static bool multiple_pipes_ok(struct drm_i915_private *dev_priv)
@@ -581,7 +541,8 @@
 	 * reserved range size, so it always assumes the maximum (8mb) is used.
 	 * If we enable FBC using a CFB on that memory range we'll get FIFO
 	 * underruns, even if that range is not reserved by the BIOS. */
-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv) ||
+	    IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		end = dev_priv->gtt.stolen_size - 8 * 1024 * 1024;
 	else
 		end = dev_priv->gtt.stolen_usable_size;
@@ -617,11 +578,17 @@
 	}
 }
 
-static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, int size,
-			       int fb_cpp)
+static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_framebuffer *fb = crtc->base.primary->state->fb;
 	struct drm_mm_node *uninitialized_var(compressed_llb);
-	int ret;
+	int size, fb_cpp, ret;
+
+	WARN_ON(drm_mm_node_allocated(&dev_priv->fbc.compressed_fb));
+
+	size = intel_fbc_calculate_cfb_size(crtc, fb);
+	fb_cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 
 	ret = find_compression_threshold(dev_priv, &dev_priv->fbc.compressed_fb,
 					 size, fb_cpp);
@@ -656,8 +623,6 @@
 			   dev_priv->mm.stolen_base + compressed_llb->start);
 	}
 
-	dev_priv->fbc.uncompressed_size = size;
-
 	DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
 		      dev_priv->fbc.compressed_fb.size,
 		      dev_priv->fbc.threshold);
@@ -674,18 +639,15 @@
 
 static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->fbc.uncompressed_size == 0)
-		return;
-
-	i915_gem_stolen_remove_node(dev_priv, &dev_priv->fbc.compressed_fb);
+	if (drm_mm_node_allocated(&dev_priv->fbc.compressed_fb))
+		i915_gem_stolen_remove_node(dev_priv,
+					    &dev_priv->fbc.compressed_fb);
 
 	if (dev_priv->fbc.compressed_llb) {
 		i915_gem_stolen_remove_node(dev_priv,
 					    dev_priv->fbc.compressed_llb);
 		kfree(dev_priv->fbc.compressed_llb);
 	}
-
-	dev_priv->fbc.uncompressed_size = 0;
 }
 
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
@@ -698,63 +660,6 @@
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
-/*
- * For SKL+, the plane source size used by the hardware is based on the value we
- * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
- * we wrote to PIPESRC.
- */
-static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
-					    int *width, int *height)
-{
-	struct intel_plane_state *plane_state =
-			to_intel_plane_state(crtc->base.primary->state);
-	int w, h;
-
-	if (intel_rotation_90_or_270(plane_state->base.rotation)) {
-		w = drm_rect_height(&plane_state->src) >> 16;
-		h = drm_rect_width(&plane_state->src) >> 16;
-	} else {
-		w = drm_rect_width(&plane_state->src) >> 16;
-		h = drm_rect_height(&plane_state->src) >> 16;
-	}
-
-	if (width)
-		*width = w;
-	if (height)
-		*height = h;
-}
-
-static int intel_fbc_calculate_cfb_size(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	int lines;
-
-	intel_fbc_get_plane_source_size(crtc, NULL, &lines);
-	if (INTEL_INFO(dev_priv)->gen >= 7)
-		lines = min(lines, 2048);
-
-	return lines * fb->pitches[0];
-}
-
-static int intel_fbc_setup_cfb(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	int size, cpp;
-
-	size = intel_fbc_calculate_cfb_size(crtc);
-	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-
-	if (size <= dev_priv->fbc.uncompressed_size)
-		return 0;
-
-	/* Release any current block */
-	__intel_fbc_cleanup_cfb(dev_priv);
-
-	return intel_fbc_alloc_cfb(dev_priv, size, cpp);
-}
-
 static bool stride_is_valid(struct drm_i915_private *dev_priv,
 			    unsigned int stride)
 {
@@ -829,87 +734,46 @@
 }
 
 /**
- * __intel_fbc_update - enable/disable FBC as needed, unlocked
- * @dev_priv: i915 device instance
+ * __intel_fbc_update - activate/deactivate FBC as needed, unlocked
+ * @crtc: the CRTC that triggered the update
  *
- * Set up the framebuffer compression hardware at mode set time.  We
- * enable it if possible:
- *   - plane A only (on pre-965)
- *   - no pixel mulitply/line duplication
- *   - no alpha buffer discard
- *   - no dual wide
- *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
- *
- * We can't assume that any compression will take place (worst case),
- * so the compressed buffer has to be the same size as the uncompressed
- * one.  It also must reside (along with the line length buffer) in
- * stolen memory.
- *
- * We need to enable/disable FBC on a global basis.
+ * This function completely reevaluates the status of FBC, then activates,
+ * deactivates or maintains it on the same state.
  */
-static void __intel_fbc_update(struct drm_i915_private *dev_priv)
+static void __intel_fbc_update(struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = NULL;
-	struct intel_crtc *intel_crtc;
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	const struct drm_display_mode *adjusted_mode;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
-	/* disable framebuffer compression in vGPU */
-	if (intel_vgpu_active(dev_priv->dev))
-		i915.enable_fbc = 0;
-
-	if (i915.enable_fbc < 0) {
-		set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT);
-		goto out_disable;
-	}
-
-	if (!i915.enable_fbc) {
-		set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM);
-		goto out_disable;
-	}
-
-	/*
-	 * If FBC is already on, we just have to verify that we can
-	 * keep it that way...
-	 * Need to disable if:
-	 *   - more than one pipe is active
-	 *   - changing FBC params (stride, fence, mode)
-	 *   - new fb is too large to fit in compressed buffer
-	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
-	 */
-	crtc = intel_fbc_find_crtc(dev_priv);
-	if (!crtc) {
-		set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT);
-		goto out_disable;
-	}
-
 	if (!multiple_pipes_ok(dev_priv)) {
-		set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES);
+		set_no_fbc_reason(dev_priv, "more than one pipe active");
 		goto out_disable;
 	}
 
-	intel_crtc = to_intel_crtc(crtc);
-	fb = crtc->primary->fb;
+	if (!dev_priv->fbc.enabled || dev_priv->fbc.crtc != crtc)
+		return;
+
+	if (!crtc_is_valid(crtc)) {
+		set_no_fbc_reason(dev_priv, "no output");
+		goto out_disable;
+	}
+
+	fb = crtc->base.primary->fb;
 	obj = intel_fb_obj(fb);
-	adjusted_mode = &intel_crtc->config->base.adjusted_mode;
+	adjusted_mode = &crtc->config->base.adjusted_mode;
 
 	if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
-		set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE);
+		set_no_fbc_reason(dev_priv, "incompatible mode");
 		goto out_disable;
 	}
 
-	if (!intel_fbc_hw_tracking_covers_screen(intel_crtc)) {
-		set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE);
-		goto out_disable;
-	}
-
-	if ((INTEL_INFO(dev_priv)->gen < 4 || HAS_DDI(dev_priv)) &&
-	    intel_crtc->plane != PLANE_A) {
-		set_no_fbc_reason(dev_priv, FBC_BAD_PLANE);
+	if (!intel_fbc_hw_tracking_covers_screen(crtc)) {
+		set_no_fbc_reason(dev_priv, "mode too large for compression");
 		goto out_disable;
 	}
 
@@ -918,41 +782,46 @@
 	 */
 	if (obj->tiling_mode != I915_TILING_X ||
 	    obj->fence_reg == I915_FENCE_REG_NONE) {
-		set_no_fbc_reason(dev_priv, FBC_NOT_TILED);
+		set_no_fbc_reason(dev_priv, "framebuffer not tiled or fenced");
 		goto out_disable;
 	}
 	if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) &&
-	    crtc->primary->state->rotation != BIT(DRM_ROTATE_0)) {
-		set_no_fbc_reason(dev_priv, FBC_ROTATION);
+	    crtc->base.primary->state->rotation != BIT(DRM_ROTATE_0)) {
+		set_no_fbc_reason(dev_priv, "rotation unsupported");
 		goto out_disable;
 	}
 
 	if (!stride_is_valid(dev_priv, fb->pitches[0])) {
-		set_no_fbc_reason(dev_priv, FBC_BAD_STRIDE);
+		set_no_fbc_reason(dev_priv, "framebuffer stride not supported");
 		goto out_disable;
 	}
 
 	if (!pixel_format_is_valid(fb)) {
-		set_no_fbc_reason(dev_priv, FBC_PIXEL_FORMAT);
-		goto out_disable;
-	}
-
-	/* If the kernel debugger is active, always disable compression */
-	if (in_dbg_master()) {
-		set_no_fbc_reason(dev_priv, FBC_IN_DBG_MASTER);
+		set_no_fbc_reason(dev_priv, "pixel format is invalid");
 		goto out_disable;
 	}
 
 	/* WaFbcExceedCdClockThreshold:hsw,bdw */
 	if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) &&
-	    ilk_pipe_pixel_rate(intel_crtc->config) >=
+	    ilk_pipe_pixel_rate(crtc->config) >=
 	    dev_priv->cdclk_freq * 95 / 100) {
-		set_no_fbc_reason(dev_priv, FBC_PIXEL_RATE);
+		set_no_fbc_reason(dev_priv, "pixel rate is too big");
 		goto out_disable;
 	}
 
-	if (intel_fbc_setup_cfb(intel_crtc)) {
-		set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL);
+	/* It is possible for the required CFB size change without a
+	 * crtc->disable + crtc->enable since it is possible to change the
+	 * stride without triggering a full modeset. Since we try to
+	 * over-allocate the CFB, there's a chance we may keep FBC enabled even
+	 * if this happens, but if we exceed the current CFB size we'll have to
+	 * disable FBC. Notice that it would be possible to disable FBC, wait
+	 * for a frame, free the stolen node, then try to reenable FBC in case
+	 * we didn't get any invalidate/deactivate calls, but this would require
+	 * a lot of tracking just for a specific case. If we conclude it's an
+	 * important case, we can implement it later. */
+	if (intel_fbc_calculate_cfb_size(crtc, fb) >
+	    dev_priv->fbc.compressed_fb.size * dev_priv->fbc.threshold) {
+		set_no_fbc_reason(dev_priv, "CFB requirements changed");
 		goto out_disable;
 	}
 
@@ -961,12 +830,13 @@
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
 	 * without first being decoupled from the scanout and FBC disabled.
 	 */
-	if (dev_priv->fbc.crtc == intel_crtc &&
+	if (dev_priv->fbc.crtc == crtc &&
 	    dev_priv->fbc.fb_id == fb->base.id &&
-	    dev_priv->fbc.y == crtc->y)
+	    dev_priv->fbc.y == crtc->base.y &&
+	    dev_priv->fbc.active)
 		return;
 
-	if (intel_fbc_enabled(dev_priv)) {
+	if (intel_fbc_is_active(dev_priv)) {
 		/* We update FBC along two paths, after changing fb/crtc
 		 * configuration (modeswitching) and after page-flipping
 		 * finishes. For the latter, we know that not only did
@@ -990,36 +860,37 @@
 		 * disabling paths we do need to wait for a vblank at
 		 * some point. And we wait before enabling FBC anyway.
 		 */
-		DRM_DEBUG_KMS("disabling active FBC for update\n");
-		__intel_fbc_disable(dev_priv);
+		DRM_DEBUG_KMS("deactivating FBC for update\n");
+		__intel_fbc_deactivate(dev_priv);
 	}
 
-	intel_fbc_schedule_enable(intel_crtc);
-	dev_priv->fbc.no_fbc_reason = FBC_OK;
+	intel_fbc_schedule_activation(crtc);
+	dev_priv->fbc.no_fbc_reason = "FBC enabled (not necessarily active)";
 	return;
 
 out_disable:
 	/* Multiple disables should be harmless */
-	if (intel_fbc_enabled(dev_priv)) {
-		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
-		__intel_fbc_disable(dev_priv);
+	if (intel_fbc_is_active(dev_priv)) {
+		DRM_DEBUG_KMS("unsupported config, deactivating FBC\n");
+		__intel_fbc_deactivate(dev_priv);
 	}
-	__intel_fbc_cleanup_cfb(dev_priv);
 }
 
 /*
- * intel_fbc_update - enable/disable FBC as needed
- * @dev_priv: i915 device instance
+ * intel_fbc_update - activate/deactivate FBC as needed
+ * @crtc: the CRTC that triggered the update
  *
- * This function reevaluates the overall state and enables or disables FBC.
+ * This function reevaluates the overall state and activates or deactivates FBC.
  */
-void intel_fbc_update(struct drm_i915_private *dev_priv)
+void intel_fbc_update(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
 	if (!fbc_supported(dev_priv))
 		return;
 
 	mutex_lock(&dev_priv->fbc.lock);
-	__intel_fbc_update(dev_priv);
+	__intel_fbc_update(crtc);
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
@@ -1039,16 +910,13 @@
 
 	if (dev_priv->fbc.enabled)
 		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
-	else if (dev_priv->fbc.fbc_work)
-		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
-					dev_priv->fbc.fbc_work->crtc->pipe);
 	else
 		fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
 
 	dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
 
 	if (dev_priv->fbc.busy_bits)
-		__intel_fbc_disable(dev_priv);
+		__intel_fbc_deactivate(dev_priv);
 
 	mutex_unlock(&dev_priv->fbc.lock);
 }
@@ -1066,15 +934,140 @@
 
 	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
 
-	if (!dev_priv->fbc.busy_bits) {
-		__intel_fbc_disable(dev_priv);
-		__intel_fbc_update(dev_priv);
+	if (!dev_priv->fbc.busy_bits && dev_priv->fbc.enabled) {
+		if (origin != ORIGIN_FLIP && dev_priv->fbc.active) {
+			intel_fbc_recompress(dev_priv);
+		} else {
+			__intel_fbc_deactivate(dev_priv);
+			__intel_fbc_update(dev_priv->fbc.crtc);
+		}
 	}
 
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
 /**
+ * intel_fbc_enable: tries to enable FBC on the CRTC
+ * @crtc: the CRTC
+ *
+ * This function checks if it's possible to enable FBC on the following CRTC,
+ * then enables it. Notice that it doesn't activate FBC.
+ */
+void intel_fbc_enable(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+
+	if (dev_priv->fbc.enabled) {
+		WARN_ON(dev_priv->fbc.crtc == crtc);
+		goto out;
+	}
+
+	WARN_ON(dev_priv->fbc.active);
+	WARN_ON(dev_priv->fbc.crtc != NULL);
+
+	if (intel_vgpu_active(dev_priv->dev)) {
+		set_no_fbc_reason(dev_priv, "VGPU is active");
+		goto out;
+	}
+
+	if (i915.enable_fbc < 0) {
+		set_no_fbc_reason(dev_priv, "disabled per chip default");
+		goto out;
+	}
+
+	if (!i915.enable_fbc) {
+		set_no_fbc_reason(dev_priv, "disabled per module param");
+		goto out;
+	}
+
+	if (!crtc_can_fbc(crtc)) {
+		set_no_fbc_reason(dev_priv, "no enabled pipes can have FBC");
+		goto out;
+	}
+
+	if (intel_fbc_alloc_cfb(crtc)) {
+		set_no_fbc_reason(dev_priv, "not enough stolen memory");
+		goto out;
+	}
+
+	DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+	dev_priv->fbc.no_fbc_reason = "FBC enabled but not active yet\n";
+
+	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.crtc = crtc;
+out:
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+/**
+ * __intel_fbc_disable - disable FBC
+ * @dev_priv: i915 device instance
+ *
+ * This is the low level function that actually disables FBC. Callers should
+ * grab the FBC lock.
+ */
+static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+	WARN_ON(!dev_priv->fbc.enabled);
+	WARN_ON(dev_priv->fbc.active);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+
+	DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+
+	__intel_fbc_cleanup_cfb(dev_priv);
+
+	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.crtc = NULL;
+}
+
+/**
+ * intel_fbc_disable_crtc - disable FBC if it's associated with crtc
+ * @crtc: the CRTC
+ *
+ * This function disables FBC if it's associated with the provided CRTC.
+ */
+void intel_fbc_disable_crtc(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.crtc == crtc) {
+		WARN_ON(!dev_priv->fbc.enabled);
+		WARN_ON(dev_priv->fbc.active);
+		__intel_fbc_disable(dev_priv);
+	}
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+/**
+ * intel_fbc_disable - globally disable FBC
+ * @dev_priv: i915 device instance
+ *
+ * This function disables FBC regardless of which CRTC is associated with it.
+ */
+void intel_fbc_disable(struct drm_i915_private *dev_priv)
+{
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.enabled)
+		__intel_fbc_disable(dev_priv);
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+/**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
  *
@@ -1084,11 +1077,14 @@
 {
 	enum pipe pipe;
 
+	INIT_WORK(&dev_priv->fbc.work.work, intel_fbc_work_fn);
 	mutex_init(&dev_priv->fbc.lock);
+	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
+	dev_priv->fbc.work.scheduled = false;
 
 	if (!HAS_FBC(dev_priv)) {
-		dev_priv->fbc.enabled = false;
-		dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
+		dev_priv->fbc.no_fbc_reason = "unsupported by this chipset";
 		return;
 	}
 
@@ -1096,30 +1092,34 @@
 		dev_priv->fbc.possible_framebuffer_bits |=
 				INTEL_FRONTBUFFER_PRIMARY(pipe);
 
-		if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
+		if (fbc_on_pipe_a_only(dev_priv))
 			break;
 	}
 
 	if (INTEL_INFO(dev_priv)->gen >= 7) {
-		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
-		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
-		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		dev_priv->fbc.is_active = ilk_fbc_is_active;
+		dev_priv->fbc.activate = gen7_fbc_activate;
+		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
 	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
-		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
-		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
-		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		dev_priv->fbc.is_active = ilk_fbc_is_active;
+		dev_priv->fbc.activate = ilk_fbc_activate;
+		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
 	} else if (IS_GM45(dev_priv)) {
-		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
-		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
-		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
+		dev_priv->fbc.is_active = g4x_fbc_is_active;
+		dev_priv->fbc.activate = g4x_fbc_activate;
+		dev_priv->fbc.deactivate = g4x_fbc_deactivate;
 	} else {
-		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
-		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
-		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
+		dev_priv->fbc.is_active = i8xx_fbc_is_active;
+		dev_priv->fbc.activate = i8xx_fbc_activate;
+		dev_priv->fbc.deactivate = i8xx_fbc_deactivate;
 
 		/* This value was pulled out of someone's hat */
 		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
 	}
 
-	dev_priv->fbc.enabled = dev_priv->fbc.fbc_enabled(dev_priv);
+	/* We still don't have any sort of hardware state readout for FBC, so
+	 * deactivate it in case the BIOS activated it to make sure software
+	 * matches the hardware state. */
+	if (dev_priv->fbc.is_active(dev_priv))
+		dev_priv->fbc.deactivate(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 4fd5fdf..7ccde58 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -119,7 +119,7 @@
 {
 	struct intel_fbdev *ifbdev =
 		container_of(helper, struct intel_fbdev, helper);
-	struct drm_framebuffer *fb;
+	struct drm_framebuffer *fb = NULL;
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
@@ -138,6 +138,8 @@
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
+	mutex_lock(&dev->struct_mutex);
+
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	size = PAGE_ALIGN(size);
 
@@ -156,26 +158,28 @@
 
 	fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
 	if (IS_ERR(fb)) {
+		drm_gem_object_unreference(&obj->base);
 		ret = PTR_ERR(fb);
-		goto out_unref;
+		goto out;
 	}
 
 	/* Flush everything out, we'll be doing GTT only from now on */
-	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL);
+	ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL);
 	if (ret) {
 		DRM_ERROR("failed to pin obj: %d\n", ret);
-		goto out_fb;
+		goto out;
 	}
 
+	mutex_unlock(&dev->struct_mutex);
+
 	ifbdev->fb = to_intel_framebuffer(fb);
 
 	return 0;
 
-out_fb:
-	drm_framebuffer_remove(fb);
-out_unref:
-	drm_gem_object_unreference(&obj->base);
 out:
+	mutex_unlock(&dev->struct_mutex);
+	if (!IS_ERR_OR_NULL(fb))
+		drm_framebuffer_unreference(fb);
 	return ret;
 }
 
@@ -193,8 +197,6 @@
 	int size, ret;
 	bool prealloc = false;
 
-	mutex_lock(&dev->struct_mutex);
-
 	if (intel_fb &&
 	    (sizes->fb_width > intel_fb->base.width ||
 	     sizes->fb_height > intel_fb->base.height)) {
@@ -209,7 +211,7 @@
 		DRM_DEBUG_KMS("no BIOS fb, allocating a new one\n");
 		ret = intelfb_alloc(helper, sizes);
 		if (ret)
-			goto out_unlock;
+			return ret;
 		intel_fb = ifbdev->fb;
 	} else {
 		DRM_DEBUG_KMS("re-using BIOS fb\n");
@@ -221,8 +223,11 @@
 	obj = intel_fb->obj;
 	size = obj->base.size;
 
+	mutex_lock(&dev->struct_mutex);
+
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
+		DRM_ERROR("Failed to allocate fb_info\n");
 		ret = PTR_ERR(info);
 		goto out_unpin;
 	}
@@ -249,6 +254,7 @@
 		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
 			   size);
 	if (!info->screen_base) {
+		DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
 		ret = -ENOSPC;
 		goto out_destroy_fbi;
 	}
@@ -281,8 +287,6 @@
 	drm_fb_helper_release_fbi(helper);
 out_unpin:
 	i915_gem_object_ggtt_unpin(obj);
-	drm_gem_object_unreference(&obj->base);
-out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
@@ -526,8 +530,10 @@
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	drm_framebuffer_unregister_private(&ifbdev->fb->base);
-	drm_framebuffer_remove(&ifbdev->fb->base);
+	if (ifbdev->fb) {
+		drm_framebuffer_unregister_private(&ifbdev->fb->base);
+		drm_framebuffer_remove(&ifbdev->fb->base);
+	}
 }
 
 /*
@@ -702,13 +708,20 @@
 	return 0;
 }
 
-void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
+static void intel_fbdev_initial_config(void *data, async_cookie_t cookie)
 {
 	struct drm_i915_private *dev_priv = data;
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
 
 	/* Due to peculiar init order wrt to hpd handling this is separate. */
-	drm_fb_helper_initial_config(&ifbdev->helper, ifbdev->preferred_bpp);
+	if (drm_fb_helper_initial_config(&ifbdev->helper,
+					 ifbdev->preferred_bpp))
+		intel_fbdev_fini(dev_priv->dev);
+}
+
+void intel_fbdev_initial_config_async(struct drm_device *dev)
+{
+	async_schedule(intel_fbdev_initial_config, to_i915(dev));
 }
 
 void intel_fbdev_fini(struct drm_device *dev)
@@ -719,7 +732,8 @@
 
 	flush_work(&dev_priv->fbdev_suspend_work);
 
-	async_synchronize_full();
+	if (!current_is_async())
+		async_synchronize_full();
 	intel_fbdev_destroy(dev, dev_priv->fbdev);
 	kfree(dev_priv->fbdev);
 	dev_priv->fbdev = NULL;
diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c
index 54daa66..bda5266 100644
--- a/drivers/gpu/drm/i915/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c
@@ -84,38 +84,21 @@
 	return true;
 }
 
-/**
- * i9xx_check_fifo_underruns - check for fifo underruns
- * @dev_priv: i915 device instance
- *
- * This function checks for fifo underruns on GMCH platforms. This needs to be
- * done manually on modeset to make sure that we catch all underruns since they
- * do not generate an interrupt by themselves on these platforms.
- */
-void i9xx_check_fifo_underruns(struct drm_i915_private *dev_priv)
+static void i9xx_check_fifo_underruns(struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc;
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	i915_reg_t reg = PIPESTAT(crtc->pipe);
+	u32 pipestat = I915_READ(reg) & 0xffff0000;
 
-	spin_lock_irq(&dev_priv->irq_lock);
+	assert_spin_locked(&dev_priv->irq_lock);
 
-	for_each_intel_crtc(dev_priv->dev, crtc) {
-		u32 reg = PIPESTAT(crtc->pipe);
-		u32 pipestat;
+	if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0)
+		return;
 
-		if (crtc->cpu_fifo_underrun_disabled)
-			continue;
+	I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
+	POSTING_READ(reg);
 
-		pipestat = I915_READ(reg) & 0xffff0000;
-		if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0)
-			continue;
-
-		I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS);
-		POSTING_READ(reg);
-
-		DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe));
-	}
-
-	spin_unlock_irq(&dev_priv->irq_lock);
+	DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe));
 }
 
 static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -123,7 +106,7 @@
 					     bool enable, bool old)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg = PIPESTAT(pipe);
+	i915_reg_t reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & 0xffff0000;
 
 	assert_spin_locked(&dev_priv->irq_lock);
@@ -145,9 +128,26 @@
 					  DE_PIPEB_FIFO_UNDERRUN;
 
 	if (enable)
-		ironlake_enable_display_irq(dev_priv, bit);
+		ilk_enable_display_irq(dev_priv, bit);
 	else
-		ironlake_disable_display_irq(dev_priv, bit);
+		ilk_disable_display_irq(dev_priv, bit);
+}
+
+static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+	uint32_t err_int = I915_READ(GEN7_ERR_INT);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0)
+		return;
+
+	I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe));
+	POSTING_READ(GEN7_ERR_INT);
+
+	DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe));
 }
 
 static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -161,9 +161,9 @@
 		if (!ivb_can_enable_err_int(dev))
 			return;
 
-		ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		ilk_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
 	} else {
-		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		ilk_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
 
 		if (old &&
 		    I915_READ(GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe)) {
@@ -178,14 +178,10 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	assert_spin_locked(&dev_priv->irq_lock);
-
 	if (enable)
-		dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_FIFO_UNDERRUN;
+		bdw_enable_pipe_irq(dev_priv, pipe, GEN8_PIPE_FIFO_UNDERRUN);
 	else
-		dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_FIFO_UNDERRUN;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+		bdw_disable_pipe_irq(dev_priv, pipe, GEN8_PIPE_FIFO_UNDERRUN);
 }
 
 static void ibx_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -202,6 +198,24 @@
 		ibx_disable_display_interrupt(dev_priv, bit);
 }
 
+static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum transcoder pch_transcoder = (enum transcoder) crtc->pipe;
+	uint32_t serr_int = I915_READ(SERR_INT);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0)
+		return;
+
+	I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder));
+	POSTING_READ(SERR_INT);
+
+	DRM_ERROR("pch fifo underrun on pch transcoder %c\n",
+		  transcoder_name(pch_transcoder));
+}
+
 static void cpt_set_fifo_underrun_reporting(struct drm_device *dev,
 					    enum transcoder pch_transcoder,
 					    bool enable, bool old)
@@ -375,3 +389,56 @@
 		DRM_ERROR("PCH transcoder %c FIFO underrun\n",
 			  transcoder_name(pch_transcoder));
 }
+
+/**
+ * intel_check_cpu_fifo_underruns - check for CPU fifo underruns immediately
+ * @dev_priv: i915 device instance
+ *
+ * Check for CPU fifo underruns immediately. Useful on IVB/HSW where the shared
+ * error interrupt may have been disabled, and so CPU fifo underruns won't
+ * necessarily raise an interrupt, and on GMCH platforms where underruns never
+ * raise an interrupt.
+ */
+void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	for_each_intel_crtc(dev_priv->dev, crtc) {
+		if (crtc->cpu_fifo_underrun_disabled)
+			continue;
+
+		if (HAS_GMCH_DISPLAY(dev_priv))
+			i9xx_check_fifo_underruns(crtc);
+		else if (IS_GEN7(dev_priv))
+			ivybridge_check_fifo_underruns(crtc);
+	}
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+/**
+ * intel_check_pch_fifo_underruns - check for PCH fifo underruns immediately
+ * @dev_priv: i915 device instance
+ *
+ * Check for PCH fifo underruns immediately. Useful on CPT/PPT where the shared
+ * error interrupt may have been disabled, and so PCH fifo underruns won't
+ * necessarily raise an interrupt.
+ */
+void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	for_each_intel_crtc(dev_priv->dev, crtc) {
+		if (crtc->pch_fifo_underrun_disabled)
+			continue;
+
+		if (HAS_PCH_CPT(dev_priv))
+			cpt_check_pch_fifo_underruns(crtc);
+	}
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 081d5f6..8229522 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -42,8 +42,6 @@
 
 	uint32_t wq_offset;
 	uint32_t wq_size;
-
-	spinlock_t wq_lock;		/* Protects all data below	*/
 	uint32_t wq_tail;
 
 	/* GuC submission statistics & status */
@@ -76,11 +74,17 @@
 	uint16_t			guc_fw_minor_wanted;
 	uint16_t			guc_fw_major_found;
 	uint16_t			guc_fw_minor_found;
+
+	uint32_t header_size;
+	uint32_t header_offset;
+	uint32_t rsa_size;
+	uint32_t rsa_offset;
+	uint32_t ucode_size;
+	uint32_t ucode_offset;
 };
 
 struct intel_guc {
 	struct intel_guc_fw guc_fw;
-
 	uint32_t log_flags;
 	struct drm_i915_gem_object *log_obj;
 
@@ -89,8 +93,6 @@
 
 	struct i915_guc_client *execbuf_client;
 
-	spinlock_t host2guc_lock;	/* Protects all data below	*/
-
 	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
 
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 593d2f5..40b2ea5 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -122,6 +122,78 @@
 
 #define GUC_CTL_MAX_DWORDS		(GUC_CTL_RSRVD + 1)
 
+/**
+ * DOC: GuC Firmware Layout
+ *
+ * The GuC firmware layout looks like this:
+ *
+ *     +-------------------------------+
+ *     |        guc_css_header         |
+ *     | contains major/minor version  |
+ *     +-------------------------------+
+ *     |             uCode             |
+ *     +-------------------------------+
+ *     |         RSA signature         |
+ *     +-------------------------------+
+ *     |          modulus key          |
+ *     +-------------------------------+
+ *     |          exponent val         |
+ *     +-------------------------------+
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ * in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ * in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct guc_css_header {
+	uint32_t module_type;
+	/* header_size includes all non-uCode bits, including css_header, rsa
+	 * key, modulus key and exponent data. */
+	uint32_t header_size_dw;
+	uint32_t header_version;
+	uint32_t module_id;
+	uint32_t module_vendor;
+	union {
+		struct {
+			uint8_t day;
+			uint8_t month;
+			uint16_t year;
+		};
+		uint32_t date;
+	};
+	uint32_t size_dw; /* uCode plus header_size_dw */
+	uint32_t key_size_dw;
+	uint32_t modulus_size_dw;
+	uint32_t exponent_size_dw;
+	union {
+		struct {
+			uint8_t hour;
+			uint8_t min;
+			uint16_t sec;
+		};
+		uint32_t time;
+	};
+
+	char username[8];
+	char buildnumber[12];
+	uint32_t device_id;
+	uint32_t guc_sw_version;
+	uint32_t prod_preprod_fw;
+	uint32_t reserved[12];
+	uint32_t header_info;
+} __packed;
+
 struct guc_doorbell_info {
 	u32 db_status;
 	u32 cookie;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3541f76..550921f 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -31,7 +31,7 @@
 #include "intel_guc.h"
 
 /**
- * DOC: GuC
+ * DOC: GuC-specific firmware loader
  *
  * intel_guc:
  * Top level structure of guc. It handles firmware loading and manages client
@@ -208,16 +208,6 @@
 /*
  * Transfer the firmware image to RAM for execution by the microcontroller.
  *
- * GuC Firmware layout:
- * +-------------------------------+  ----
- * |          CSS header           |  128B
- * | contains major/minor version  |
- * +-------------------------------+  ----
- * |             uCode             |
- * +-------------------------------+  ----
- * |         RSA signature         |  256B
- * +-------------------------------+  ----
- *
  * Architecturally, the DMA engine is bidirectional, and can potentially even
  * transfer between GTT locations. This functionality is left out of the API
  * for now as there is no need for it.
@@ -225,33 +215,29 @@
  * Note that GuC needs the CSS header plus uKernel code to be copied by the
  * DMA engine in one operation, whereas the RSA signature is loaded via MMIO.
  */
-
-#define UOS_CSS_HEADER_OFFSET		0
-#define UOS_VER_MINOR_OFFSET		0x44
-#define UOS_VER_MAJOR_OFFSET		0x46
-#define UOS_CSS_HEADER_SIZE		0x80
-#define UOS_RSA_SIG_SIZE		0x100
-
 static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj;
 	unsigned long offset;
 	struct sg_table *sg = fw_obj->pages;
-	u32 status, ucode_size, rsa[UOS_RSA_SIG_SIZE / sizeof(u32)];
+	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
 	int i, ret = 0;
 
-	/* uCode size, also is where RSA signature starts */
-	offset = ucode_size = guc_fw->guc_fw_size - UOS_RSA_SIG_SIZE;
-	I915_WRITE(DMA_COPY_SIZE, ucode_size);
+	/* where RSA signature starts */
+	offset = guc_fw->rsa_offset;
 
 	/* Copy RSA signature from the fw image to HW for verification */
-	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, UOS_RSA_SIG_SIZE, offset);
-	for (i = 0; i < UOS_RSA_SIG_SIZE / sizeof(u32); i++)
+	sg_pcopy_to_buffer(sg->sgl, sg->nents, rsa, sizeof(rsa), offset);
+	for (i = 0; i < UOS_RSA_SCRATCH_MAX_COUNT; i++)
 		I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]);
 
+	/* The header plus uCode will be copied to WOPCM via DMA, excluding any
+	 * other components */
+	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
+
 	/* Set the source address for the new blob */
-	offset = i915_gem_obj_ggtt_offset(fw_obj);
+	offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -322,8 +308,8 @@
 	I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE);
 
 	/* WaDisableMinuteIaClockGating:skl,bxt */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GUC_SHIM_CONTROL, (I915_READ(GUC_SHIM_CONTROL) &
 					      ~GUC_ENABLE_MIA_CLOCK_GATING));
 	}
@@ -378,6 +364,9 @@
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 	int err = 0;
 
+	if (!i915.enable_guc_submission)
+		return 0;
+
 	DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n",
 		intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status),
 		intel_guc_fw_status_repr(guc_fw->guc_fw_load_status));
@@ -457,10 +446,8 @@
 {
 	struct drm_i915_gem_object *obj;
 	const struct firmware *fw;
-	const u8 *css_header;
-	const size_t minsize = UOS_CSS_HEADER_SIZE + UOS_RSA_SIG_SIZE;
-	const size_t maxsize = GUC_WOPCM_SIZE_VALUE + UOS_RSA_SIG_SIZE
-			- 0x8000; /* 32k reserved (8K stack + 24k context) */
+	struct guc_css_header *css;
+	size_t size;
 	int err;
 
 	DRM_DEBUG_DRIVER("before requesting firmware: GuC fw fetch status %s\n",
@@ -474,12 +461,52 @@
 
 	DRM_DEBUG_DRIVER("fetch GuC fw from %s succeeded, fw %p\n",
 		guc_fw->guc_fw_path, fw);
-	DRM_DEBUG_DRIVER("firmware file size %zu (minimum %zu, maximum %zu)\n",
-		fw->size, minsize, maxsize);
 
-	/* Check the size of the blob befoe examining buffer contents */
-	if (fw->size < minsize || fw->size > maxsize)
+	/* Check the size of the blob before examining buffer contents */
+	if (fw->size < sizeof(struct guc_css_header)) {
+		DRM_ERROR("Firmware header is missing\n");
 		goto fail;
+	}
+
+	css = (struct guc_css_header *)fw->data;
+
+	/* Firmware bits always start from header */
+	guc_fw->header_offset = 0;
+	guc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
+		css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
+
+	if (guc_fw->header_size != sizeof(struct guc_css_header)) {
+		DRM_ERROR("CSS header definition mismatch\n");
+		goto fail;
+	}
+
+	/* then, uCode */
+	guc_fw->ucode_offset = guc_fw->header_offset + guc_fw->header_size;
+	guc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
+		DRM_ERROR("RSA key size is bad\n");
+		goto fail;
+	}
+	guc_fw->rsa_offset = guc_fw->ucode_offset + guc_fw->ucode_size;
+	guc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = guc_fw->header_size + guc_fw->ucode_size + guc_fw->rsa_size;
+	if (fw->size < size) {
+		DRM_ERROR("Missing firmware components\n");
+		goto fail;
+	}
+
+	/* Header and uCode will be loaded to WOPCM. Size of the two. */
+	size = guc_fw->header_size + guc_fw->ucode_size;
+
+	/* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
+	if (size > GUC_WOPCM_SIZE_VALUE - 0x8000) {
+		DRM_ERROR("Firmware is too large to fit in WOPCM\n");
+		goto fail;
+	}
 
 	/*
 	 * The GuC firmware image has the version number embedded at a well-known
@@ -487,9 +514,8 @@
 	 * TWO bytes each (i.e. u16), although all pointers and offsets are defined
 	 * in terms of bytes (u8).
 	 */
-	css_header = fw->data + UOS_CSS_HEADER_OFFSET;
-	guc_fw->guc_fw_major_found = *(u16 *)(css_header + UOS_VER_MAJOR_OFFSET);
-	guc_fw->guc_fw_minor_found = *(u16 *)(css_header + UOS_VER_MINOR_OFFSET);
+	guc_fw->guc_fw_major_found = css->guc_sw_version >> 16;
+	guc_fw->guc_fw_minor_found = css->guc_sw_version & 0xFFFF;
 
 	if (guc_fw->guc_fw_major_found != guc_fw->guc_fw_major_wanted ||
 	    guc_fw->guc_fw_minor_found < guc_fw->guc_fw_minor_wanted) {
@@ -566,6 +592,9 @@
 		fw_path = "";	/* unknown device */
 	}
 
+	if (!i915.enable_guc_submission)
+		return;
+
 	guc_fw->guc_dev = dev;
 	guc_fw->guc_fw_path = fw_path;
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 9eafa19..00d065f 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -113,10 +113,11 @@
 	}
 }
 
-static u32 hsw_dip_data_reg(struct drm_i915_private *dev_priv,
-			    enum transcoder cpu_transcoder,
-			    enum hdmi_infoframe_type type,
-			    int i)
+static i915_reg_t
+hsw_dip_data_reg(struct drm_i915_private *dev_priv,
+		 enum transcoder cpu_transcoder,
+		 enum hdmi_infoframe_type type,
+		 int i)
 {
 	switch (type) {
 	case HDMI_INFOFRAME_TYPE_AVI:
@@ -127,7 +128,7 @@
 		return HSW_TVIDEO_DIP_VS_DATA(cpu_transcoder, i);
 	default:
 		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
-		return 0;
+		return INVALID_MMIO_REG;
 	}
 }
 
@@ -168,10 +169,10 @@
 	POSTING_READ(VIDEO_DIP_CTL);
 }
 
-static bool g4x_infoframe_enabled(struct drm_encoder *encoder)
+static bool g4x_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u32 val = I915_READ(VIDEO_DIP_CTL);
 
@@ -193,8 +194,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -223,13 +225,13 @@
 	POSTING_READ(reg);
 }
 
-static bool ibx_infoframe_enabled(struct drm_encoder *encoder)
+static bool ibx_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	i915_reg_t reg = TVIDEO_DIP_CTL(pipe);
 	u32 val = I915_READ(reg);
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
@@ -251,8 +253,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -284,13 +287,12 @@
 	POSTING_READ(reg);
 }
 
-static bool cpt_infoframe_enabled(struct drm_encoder *encoder)
+static bool cpt_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
-	u32 val = I915_READ(reg);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	u32 val = I915_READ(TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -308,8 +310,9 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int i, reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
@@ -338,14 +341,13 @@
 	POSTING_READ(reg);
 }
 
-static bool vlv_infoframe_enabled(struct drm_encoder *encoder)
+static bool vlv_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	int reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
-	u32 val = I915_READ(reg);
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -367,13 +369,13 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
-	u32 data_reg;
+	i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
+	i915_reg_t data_reg;
 	int i;
 	u32 val = I915_READ(ctl_reg);
 
 	data_reg = hsw_dip_data_reg(dev_priv, cpu_transcoder, type, 0);
-	if (data_reg == 0)
+	if (i915_mmio_reg_valid(data_reg))
 		return;
 
 	val &= ~hsw_infoframe_enable(type);
@@ -396,13 +398,11 @@
 	POSTING_READ(ctl_reg);
 }
 
-static bool hsw_infoframe_enabled(struct drm_encoder *encoder)
+static bool hsw_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
-	u32 val = I915_READ(ctl_reg);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	u32 val = I915_READ(HSW_TVIDEO_DIP_CTL(pipe_config->cpu_transcoder));
 
 	return val & (VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
 		      VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
@@ -513,7 +513,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
-	u32 reg = VIDEO_DIP_CTL;
+	i915_reg_t reg = VIDEO_DIP_CTL;
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -633,7 +633,8 @@
 {
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
-	u32 reg, val = 0;
+	i915_reg_t reg;
+	u32 val = 0;
 
 	if (HAS_DDI(dev_priv))
 		reg = HSW_TVIDEO_DIP_GCP(crtc->config->cpu_transcoder);
@@ -666,7 +667,7 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_hdmi *intel_hdmi = &intel_dig_port->hdmi;
-	u32 reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -717,7 +718,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	assert_hdmi_port_disabled(intel_hdmi);
@@ -760,7 +761,7 @@
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
+	i915_reg_t reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 	u32 port = VIDEO_DIP_PORT(intel_dig_port->port);
 
@@ -811,7 +812,7 @@
 	struct drm_i915_private *dev_priv = encoder->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
-	u32 reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
+	i915_reg_t reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder);
 	u32 val = I915_READ(reg);
 
 	assert_hdmi_port_disabled(intel_hdmi);
@@ -925,7 +926,7 @@
 	if (tmp & HDMI_MODE_SELECT_HDMI)
 		pipe_config->has_hdmi_sink = true;
 
-	if (intel_hdmi->infoframe_enabled(&encoder->base))
+	if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 		pipe_config->has_infoframe = true;
 
 	if (tmp & SDVO_AUDIO_ENABLE)
@@ -1108,6 +1109,13 @@
 	 * matching DP port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		temp &= ~SDVO_PIPE_B_SELECT;
 		temp |= SDVO_ENABLE;
 		/*
@@ -1122,6 +1130,10 @@
 		temp &= ~SDVO_ENABLE;
 		I915_WRITE(intel_hdmi->hdmi_reg, temp);
 		POSTING_READ(intel_hdmi->hdmi_reg);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 
 	intel_hdmi->set_infoframes(&encoder->base, false, NULL);
@@ -1335,21 +1347,18 @@
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
-	struct intel_encoder *intel_encoder =
-		&hdmi_to_dig_port(intel_hdmi)->base;
-	enum intel_display_power_domain power_domain;
 	struct edid *edid = NULL;
 	bool connected = false;
 
-	power_domain = intel_display_port_power_domain(intel_encoder);
-	intel_display_power_get(dev_priv, power_domain);
+	if (force) {
+		intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
 
-	if (force)
 		edid = drm_get_edid(connector,
 				    intel_gmbus_get_adapter(dev_priv,
 				    intel_hdmi->ddc_bus));
 
-	intel_display_power_put(dev_priv, power_domain);
+		intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+	}
 
 	to_intel_connector(connector)->detect_edid = edid;
 	if (edid && edid->input & DRM_EDID_INPUT_DIGITAL) {
@@ -1383,6 +1392,8 @@
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 
+	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+
 	while (!live_status && --retry) {
 		live_status = intel_digital_port_connected(dev_priv,
 				hdmi_to_dig_port(intel_hdmi));
@@ -1402,6 +1413,8 @@
 	} else
 		status = connector_status_disconnected;
 
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
 	return status;
 }
 
@@ -2039,7 +2052,7 @@
 		 * On BXT A0/A1, sw needs to activate DDIA HPD logic and
 		 * interrupts to check the external panel connection.
 		 */
-		if (IS_BROXTON(dev_priv) && (INTEL_REVID(dev) < BXT_REVID_B0))
+		if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 			intel_encoder->hpd_pin = HPD_PORT_A;
 		else
 			intel_encoder->hpd_pin = HPD_PORT_B;
@@ -2131,7 +2144,8 @@
 	}
 }
 
-void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
+void intel_hdmi_init(struct drm_device *dev,
+		     i915_reg_t hdmi_reg, enum port port)
 {
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
@@ -2150,7 +2164,7 @@
 	intel_encoder = &intel_dig_port->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_hdmi_compute_config;
 	if (HAS_PCH_SPLIT(dev)) {
@@ -2202,7 +2216,7 @@
 
 	intel_dig_port->port = port;
 	intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
-	intel_dig_port->dp.output_reg = 0;
+	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
 
 	intel_hdmi_init_connector(intel_dig_port, intel_connector);
 }
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 1369fc4..e26e22a 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -36,7 +36,7 @@
 
 struct gmbus_pin {
 	const char *name;
-	int reg;
+	i915_reg_t reg;
 };
 
 /* Map gmbus pin pairs to names and registers. */
@@ -63,9 +63,9 @@
 };
 
 static const struct gmbus_pin gmbus_pins_bxt[] = {
-	[GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
-	[GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
-	[GMBUS_PIN_3_BXT] = { "misc", PCH_GPIOD },
+	[GMBUS_PIN_1_BXT] = { "dpb", GPIOB },
+	[GMBUS_PIN_2_BXT] = { "dpc", GPIOC },
+	[GMBUS_PIN_3_BXT] = { "misc", GPIOD },
 };
 
 /* pin is expected to be valid */
@@ -74,7 +74,7 @@
 {
 	if (IS_BROXTON(dev_priv))
 		return &gmbus_pins_bxt[pin];
-	else if (IS_SKYLAKE(dev_priv))
+	else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return &gmbus_pins_skl[pin];
 	else if (IS_BROADWELL(dev_priv))
 		return &gmbus_pins_bdw[pin];
@@ -89,14 +89,15 @@
 
 	if (IS_BROXTON(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_bxt);
-	else if (IS_SKYLAKE(dev_priv))
+	else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_skl);
 	else if (IS_BROADWELL(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_bdw);
 	else
 		size = ARRAY_SIZE(gmbus_pins);
 
-	return pin < size && get_gmbus_pin(dev_priv, pin)->reg;
+	return pin < size &&
+		i915_mmio_reg_valid(get_gmbus_pin(dev_priv, pin)->reg);
 }
 
 /* Intel GPIO access functions */
@@ -240,9 +241,8 @@
 
 	algo = &bus->bit_algo;
 
-	bus->gpio_reg = dev_priv->gpio_mmio_base +
-		get_gmbus_pin(dev_priv, pin)->reg;
-
+	bus->gpio_reg = _MMIO(dev_priv->gpio_mmio_base +
+			      i915_mmio_reg_offset(get_gmbus_pin(dev_priv, pin)->reg));
 	bus->adapter.algo_data = algo;
 	algo->setsda = set_data;
 	algo->setscl = set_clock;
@@ -472,9 +472,7 @@
 }
 
 static int
-gmbus_xfer(struct i2c_adapter *adapter,
-	   struct i2c_msg *msgs,
-	   int num)
+do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 {
 	struct intel_gmbus *bus = container_of(adapter,
 					       struct intel_gmbus,
@@ -483,14 +481,6 @@
 	int i = 0, inc, try = 0;
 	int ret = 0;
 
-	intel_aux_display_runtime_get(dev_priv);
-	mutex_lock(&dev_priv->gmbus_mutex);
-
-	if (bus->force_bit) {
-		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
-		goto out;
-	}
-
 retry:
 	I915_WRITE(GMBUS0, bus->reg0);
 
@@ -505,17 +495,13 @@
 			ret = gmbus_xfer_write(dev_priv, &msgs[i]);
 		}
 
+		if (!ret)
+			ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_WAIT_PHASE,
+						   GMBUS_HW_WAIT_EN);
 		if (ret == -ETIMEDOUT)
 			goto timeout;
-		if (ret == -ENXIO)
+		else if (ret)
 			goto clear_err;
-
-		ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_WAIT_PHASE,
-					   GMBUS_HW_WAIT_EN);
-		if (ret == -ENXIO)
-			goto clear_err;
-		if (ret)
-			goto timeout;
 	}
 
 	/* Generate a STOP condition on the bus. Note that gmbus can't generata
@@ -589,13 +575,36 @@
 		 bus->adapter.name, bus->reg0 & 0xff);
 	I915_WRITE(GMBUS0, 0);
 
-	/* Hardware may not support GMBUS over these pins? Try GPIO bitbanging instead. */
+	/*
+	 * Hardware may not support GMBUS over these pins? Try GPIO bitbanging
+	 * instead. Use EAGAIN to have i2c core retry.
+	 */
 	bus->force_bit = 1;
-	ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
+	ret = -EAGAIN;
 
 out:
+	return ret;
+}
+
+static int
+gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
+{
+	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
+					       adapter);
+	struct drm_i915_private *dev_priv = bus->dev_priv;
+	int ret;
+
+	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	mutex_lock(&dev_priv->gmbus_mutex);
+
+	if (bus->force_bit)
+		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
+	else
+		ret = do_gmbus_xfer(adapter, msgs, num);
+
 	mutex_unlock(&dev_priv->gmbus_mutex);
-	intel_aux_display_runtime_put(dev_priv);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
+
 	return ret;
 }
 
@@ -626,12 +635,13 @@
 
 	if (HAS_PCH_NOP(dev))
 		return 0;
-	else if (HAS_PCH_SPLIT(dev))
-		dev_priv->gpio_mmio_base = PCH_GPIOA - GPIOA;
-	else if (IS_VALLEYVIEW(dev))
+
+	if (IS_VALLEYVIEW(dev))
 		dev_priv->gpio_mmio_base = VLV_DISPLAY_BASE;
-	else
-		dev_priv->gpio_mmio_base = 0;
+	else if (!HAS_GMCH_DISPLAY(dev_priv))
+		dev_priv->gpio_mmio_base =
+			i915_mmio_reg_offset(PCH_GPIOA) -
+			i915_mmio_reg_offset(GPIOA);
 
 	mutex_init(&dev_priv->gmbus_mutex);
 	init_waitqueue_head(&dev_priv->gmbus_wait_queue);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 88e12bd..4ebafab 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -190,16 +190,21 @@
 #define GEN8_CTX_L3LLC_COHERENT (1<<5)
 #define GEN8_CTX_PRIVILEGE (1<<8)
 
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
+#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
+	(reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
+	(reg_state)[(pos)+1] = (val); \
+} while (0)
+
+#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do {		\
 	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
 	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
 	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-}
+} while (0)
 
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
+#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
 	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
 	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-}
+} while (0)
 
 enum {
 	ADVANCED_CONTEXT = 0,
@@ -284,8 +289,8 @@
 {
 	struct drm_device *dev = ring->dev;
 
-	return ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-		(IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) &&
+	return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+		IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
 	       (ring->id == VCS || ring->id == VCS2);
 }
 
@@ -921,7 +926,7 @@
 
 		intel_logical_ring_emit(ringbuf, MI_NOOP);
 		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
-		intel_logical_ring_emit(ringbuf, INSTPM);
+		intel_logical_ring_emit_reg(ringbuf, INSTPM);
 		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
 		intel_logical_ring_advance(ringbuf);
 
@@ -1096,7 +1101,7 @@
 
 	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
 	for (i = 0; i < w->count; i++) {
-		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
+		intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr);
 		intel_logical_ring_emit(ringbuf, w->reg[i].value);
 	}
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
@@ -1120,6 +1125,8 @@
 		batch[__index] = (cmd);					\
 	} while (0)
 
+#define wa_ctx_emit_reg(batch, index, reg) \
+	wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
 
 /*
  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
@@ -1149,17 +1156,17 @@
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
+	if (IS_SKL_REVID(ring->dev, 0, SKL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
 	wa_ctx_emit(batch, index, 0);
 
 	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, l3sqc4_flush);
 
 	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
@@ -1172,7 +1179,7 @@
 
 	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
 				   MI_SRM_LRM_GLOBAL_GTT));
-	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
+	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
 	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
 	wa_ctx_emit(batch, index, 0);
 
@@ -1314,8 +1321,8 @@
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1340,18 +1347,18 @@
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
-		wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
+		wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
 		wa_ctx_emit(batch, index,
 			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
 		wa_ctx_emit(batch, index, MI_NOOP);
 	}
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
-	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
 
 	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
@@ -1472,12 +1479,6 @@
 	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
 	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
 
-	if (ring->status_page.obj) {
-		I915_WRITE(RING_HWS_PGA(ring->mmio_base),
-			   (u32)ring->status_page.gfx_addr);
-		POSTING_READ(RING_HWS_PGA(ring->mmio_base));
-	}
-
 	I915_WRITE(RING_MODE_GEN7(ring),
 		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
 		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
@@ -1562,9 +1563,9 @@
 	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
 		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
 
-		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
+		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_UDW(ring, i));
 		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
-		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
+		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_LDW(ring, i));
 		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
 	}
 
@@ -1923,6 +1924,7 @@
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
+	INIT_LIST_HEAD(&ring->buffers);
 	INIT_LIST_HEAD(&ring->execlist_queue);
 	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
@@ -1972,7 +1974,7 @@
 		ring->init_hw = gen8_init_render_ring;
 	ring->init_context = gen8_init_rcs_context;
 	ring->cleanup = intel_fini_pipe_control;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2024,7 +2026,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2079,7 +2081,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2109,7 +2111,7 @@
 		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
 
 	ring->init_hw = gen8_init_common_ring;
-	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		ring->get_seqno = bxt_a_get_seqno;
 		ring->set_seqno = bxt_a_set_seqno;
 	} else {
@@ -2263,46 +2265,31 @@
 	 * only for the first context restore: on a subsequent save, the GPU will
 	 * recreate this batchbuffer with new values (including all the missing
 	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
-	if (ring->id == RCS)
-		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
-	else
-		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
-	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
-	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
-	reg_state[CTX_CONTEXT_CONTROL+1] =
-		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				   CTX_CTRL_RS_CTX_ENABLE);
-	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
-	reg_state[CTX_RING_HEAD+1] = 0;
-	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
-	reg_state[CTX_RING_TAIL+1] = 0;
-	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
+	reg_state[CTX_LRI_HEADER_0] =
+		MI_LOAD_REGISTER_IMM(ring->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED;
+	ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(ring),
+		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					  CTX_CTRL_RS_CTX_ENABLE));
+	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
 	/* Ring buffer start address is not known until the buffer is pinned.
 	 * It is written to the context image in execlists_update_context()
 	 */
-	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
-	reg_state[CTX_RING_BUFFER_CONTROL+1] =
-			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
-	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
-	reg_state[CTX_BB_HEAD_U+1] = 0;
-	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
-	reg_state[CTX_BB_HEAD_L+1] = 0;
-	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
-	reg_state[CTX_BB_STATE+1] = (1<<5);
-	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
-	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
-	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
-	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
-	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
-	reg_state[CTX_SECOND_BB_STATE+1] = 0;
+	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(ring->mmio_base),
+		       ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, RING_BBADDR(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, RING_BBSTATE(ring->mmio_base),
+		       RING_BB_PPGTT);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(ring->mmio_base), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, RING_SBBSTATE(ring->mmio_base), 0);
 	if (ring->id == RCS) {
-		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
-		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
-		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
-		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
-		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
-		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+		ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(ring->mmio_base), 0);
+		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(ring->mmio_base), 0);
+		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(ring->mmio_base), 0);
 		if (ring->wa_ctx.obj) {
 			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
 			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
@@ -2319,18 +2306,17 @@
 				0x01;
 		}
 	}
-	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
-	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
-	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
-	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
-	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
-	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
-	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
-	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
-	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
-	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
-	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
-	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
+	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+	ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(ring->mmio_base), 0);
+	/* PDP values well be assigned later if needed */
+	ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(ring, 3), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(ring, 3), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(ring, 2), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(ring, 2), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(ring, 1), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(ring, 1), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(ring, 0), 0);
+	ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(ring, 0), 0);
 
 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		/* 64b PPGTT (48bit canonical)
@@ -2352,8 +2338,8 @@
 
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
-		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
+		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+			       make_rpcs(dev));
 	}
 
 	kunmap_atomic(reg_state);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4e60d54..0b821b9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -29,16 +29,16 @@
 #define GEN8_CSB_PTR_MASK 0x07
 
 /* Execlists regs */
-#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
-#define RING_EXECLIST_STATUS_LO(ring)	((ring)->mmio_base+0x234)
-#define RING_EXECLIST_STATUS_HI(ring)	((ring)->mmio_base+0x234 + 4)
-#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
+#define RING_ELSP(ring)				_MMIO((ring)->mmio_base + 0x230)
+#define RING_EXECLIST_STATUS_LO(ring)		_MMIO((ring)->mmio_base + 0x234)
+#define RING_EXECLIST_STATUS_HI(ring)		_MMIO((ring)->mmio_base + 0x234 + 4)
+#define RING_CONTEXT_CONTROL(ring)		_MMIO((ring)->mmio_base + 0x244)
 #define	  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH	(1 << 3)
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE                (1 << 1)
-#define RING_CONTEXT_STATUS_BUF_LO(ring, i)	((ring)->mmio_base+0x370 + (i) * 8)
-#define RING_CONTEXT_STATUS_BUF_HI(ring, i)	((ring)->mmio_base+0x370 + (i) * 8 + 4)
-#define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
+#define RING_CONTEXT_STATUS_BUF_LO(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8)
+#define RING_CONTEXT_STATUS_BUF_HI(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4)
+#define RING_CONTEXT_STATUS_PTR(ring)		_MMIO((ring)->mmio_base + 0x3a0)
 
 /* Logical Rings */
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request);
@@ -70,6 +70,11 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
+static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
+					       i915_reg_t reg)
+{
+	intel_logical_ring_emit(ringbuf, i915_mmio_reg_offset(reg));
+}
 
 /* Logical Ring Contexts */
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 7f39b8a..0da0240 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -51,7 +51,7 @@
 	struct intel_encoder base;
 
 	bool is_dual_link;
-	u32 reg;
+	i915_reg_t reg;
 	u32 a3_power;
 
 	struct intel_lvds_connector *attached_connector;
@@ -210,7 +210,7 @@
 	struct intel_connector *intel_connector =
 		&lvds_encoder->attached_connector->base;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 ctl_reg, stat_reg;
+	i915_reg_t ctl_reg, stat_reg;
 
 	if (HAS_PCH_SPLIT(dev)) {
 		ctl_reg = PCH_PP_CONTROL;
@@ -235,7 +235,7 @@
 	struct drm_device *dev = encoder->base.dev;
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 ctl_reg, stat_reg;
+	i915_reg_t ctl_reg, stat_reg;
 
 	if (HAS_PCH_SPLIT(dev)) {
 		ctl_reg = PCH_PP_CONTROL;
@@ -939,7 +939,7 @@
 	struct drm_display_mode *downclock_mode = NULL;
 	struct edid *edid;
 	struct drm_crtc *crtc;
-	u32 lvds_reg;
+	i915_reg_t lvds_reg;
 	u32 lvds;
 	int pipe;
 	u8 pin;
@@ -1025,7 +1025,7 @@
 			   DRM_MODE_CONNECTOR_LVDS);
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	intel_encoder->enable = intel_enable_lvds;
 	intel_encoder->pre_enable = intel_pre_enable_lvds;
@@ -1164,8 +1164,7 @@
 	DRM_DEBUG_KMS("detected %s-link lvds configuration\n",
 		      lvds_encoder->is_dual_link ? "dual" : "single");
 
-	lvds_encoder->a3_power = I915_READ(lvds_encoder->reg) &
-				 LVDS_A3_POWER_MASK;
+	lvds_encoder->a3_power = lvds & LVDS_A3_POWER_MASK;
 
 	lvds_connector->lid_notifier.notifier_call = intel_lid_notify;
 	if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) {
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 6d3c6c0..fed7bea 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -143,7 +143,7 @@
 {
 	bool result = false;
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->table = skylake_mocs_table;
 		result = true;
@@ -159,11 +159,30 @@
 	return result;
 }
 
+static i915_reg_t mocs_register(enum intel_ring_id ring, int index)
+{
+	switch (ring) {
+	case RCS:
+		return GEN9_GFX_MOCS(index);
+	case VCS:
+		return GEN9_MFX0_MOCS(index);
+	case BCS:
+		return GEN9_BLT_MOCS(index);
+	case VECS:
+		return GEN9_VEBOX_MOCS(index);
+	case VCS2:
+		return GEN9_MFX1_MOCS(index);
+	default:
+		MISSING_CASE(ring);
+		return INVALID_MMIO_REG;
+	}
+}
+
 /**
  * emit_mocs_control_table() - emit the mocs control table
  * @req:	Request to set up the MOCS table for.
  * @table:	The values to program into the control regs.
- * @reg_base:	The base for the engine that needs to be programmed.
+ * @ring:	The engine for whom to emit the registers.
  *
  * This function simply emits a MI_LOAD_REGISTER_IMM command for the
  * given table starting at the given address.
@@ -172,7 +191,7 @@
  */
 static int emit_mocs_control_table(struct drm_i915_gem_request *req,
 				   const struct drm_i915_mocs_table *table,
-				   u32 reg_base)
+				   enum intel_ring_id ring)
 {
 	struct intel_ringbuffer *ringbuf = req->ringbuf;
 	unsigned int index;
@@ -191,7 +210,7 @@
 				MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES));
 
 	for (index = 0; index < table->size; index++) {
-		intel_logical_ring_emit(ringbuf, reg_base + index * 4);
+		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
 		intel_logical_ring_emit(ringbuf,
 					table->table[index].control_value);
 	}
@@ -205,7 +224,7 @@
 	 * that value to all the used entries.
 	 */
 	for (; index < GEN9_NUM_MOCS_ENTRIES; index++) {
-		intel_logical_ring_emit(ringbuf, reg_base + index * 4);
+		intel_logical_ring_emit_reg(ringbuf, mocs_register(ring, index));
 		intel_logical_ring_emit(ringbuf, table->table[0].control_value);
 	}
 
@@ -253,7 +272,7 @@
 		value = (table->table[count].l3cc_value & 0xffff) |
 			((table->table[count + 1].l3cc_value & 0xffff) << 16);
 
-		intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + i * 4);
+		intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i));
 		intel_logical_ring_emit(ringbuf, value);
 	}
 
@@ -270,7 +289,7 @@
 	 * they are reserved by the hardware.
 	 */
 	for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) {
-		intel_logical_ring_emit(ringbuf, GEN9_LNCFCMOCS0 + i * 4);
+		intel_logical_ring_emit_reg(ringbuf, GEN9_LNCFCMOCS(i));
 		intel_logical_ring_emit(ringbuf, value);
 
 		value = filler;
@@ -304,26 +323,16 @@
 	int ret;
 
 	if (get_mocs_settings(req->ring->dev, &t)) {
+		struct drm_i915_private *dev_priv = req->i915;
+		struct intel_engine_cs *ring;
+		enum intel_ring_id ring_id;
+
 		/* Program the control registers */
-		ret = emit_mocs_control_table(req, &t, GEN9_GFX_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_MFX0_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_MFX1_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_VEBOX_MOCS_0);
-		if (ret)
-			return ret;
-
-		ret = emit_mocs_control_table(req, &t, GEN9_BLT_MOCS_0);
-		if (ret)
-			return ret;
+		for_each_ring(ring, dev_priv, ring_id) {
+			ret = emit_mocs_control_table(req, &t, ring_id);
+			if (ret)
+				return ret;
+		}
 
 		/* Now program the l3cc registers */
 		ret = emit_mocs_l3cc_table(req, &t);
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 6dc13c0..e362a30 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -682,7 +682,7 @@
 	}
 
 	if (!acpi_video_bus) {
-		DRM_ERROR("No ACPI video bus found\n");
+		DRM_DEBUG_KMS("No ACPI video bus found\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 4445426..76f1980 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -749,7 +749,7 @@
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL,
+	ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
 						   &i915_ggtt_view_normal);
 	if (ret != 0)
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a24df35..ae808b6 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1264,6 +1264,14 @@
 #endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
 
 /*
+ * BXT: PWM clock frequency = 19.2 MHz.
+ */
+static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+{
+	return KHz(19200) / pwm_freq_hz;
+}
+
+/*
  * SPT: This value represents the period of the PWM stream in clock periods
  * multiplied by 16 (default increment) or 128 (alternate increment selected in
  * SCHICKEN_1 bit 0). PWM clock is 24 MHz.
@@ -1300,7 +1308,7 @@
 	else
 		mul = 128;
 
-	if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE)
+	if (HAS_PCH_LPT_H(dev_priv))
 		clock = MHz(135); /* LPT:H */
 	else
 		clock = MHz(24); /* LPT:LP */
@@ -1335,22 +1343,28 @@
 	int clock;
 
 	if (IS_PINEVIEW(dev))
-		clock = intel_hrawclk(dev);
+		clock = MHz(intel_hrawclk(dev));
 	else
-		clock = 1000 * dev_priv->display.get_display_clock_speed(dev);
+		clock = 1000 * dev_priv->cdclk_freq;
 
 	return clock / (pwm_freq_hz * 32);
 }
 
 /*
  * Gen4: This value represents the period of the PWM stream in display core
- * clocks multiplied by 128.
+ * clocks ([DevCTG] HRAW clocks) multiplied by 128.
+ *
  */
 static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 {
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int clock = 1000 * dev_priv->display.get_display_clock_speed(dev);
+	int clock;
+
+	if (IS_G4X(dev_priv))
+		clock = MHz(intel_hrawclk(dev));
+	else
+		clock = 1000 * dev_priv->cdclk_freq;
 
 	return clock / (pwm_freq_hz * 128);
 }
@@ -1385,14 +1399,18 @@
 	u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz;
 	u32 pwm;
 
-	if (!pwm_freq_hz) {
-		DRM_DEBUG_KMS("backlight frequency not specified in VBT\n");
+	if (!panel->backlight.hz_to_pwm) {
+		DRM_DEBUG_KMS("backlight frequency conversion not supported\n");
 		return 0;
 	}
 
-	if (!panel->backlight.hz_to_pwm) {
-		DRM_DEBUG_KMS("backlight frequency setting from VBT currently not supported on this platform\n");
-		return 0;
+	if (pwm_freq_hz) {
+		DRM_DEBUG_KMS("VBT defined backlight frequency %u Hz\n",
+			      pwm_freq_hz);
+	} else {
+		pwm_freq_hz = 200;
+		DRM_DEBUG_KMS("default backlight frequency %u Hz\n",
+			      pwm_freq_hz);
 	}
 
 	pwm = panel->backlight.hz_to_pwm(connector, pwm_freq_hz);
@@ -1401,8 +1419,6 @@
 		return 0;
 	}
 
-	DRM_DEBUG_KMS("backlight frequency %u Hz from VBT\n", pwm_freq_hz);
-
 	return pwm;
 }
 
@@ -1750,6 +1766,7 @@
 		panel->backlight.disable = bxt_disable_backlight;
 		panel->backlight.set = bxt_set_backlight;
 		panel->backlight.get = bxt_get_backlight;
+		panel->backlight.hz_to_pwm = bxt_hz_to_pwm;
 	} else if (HAS_PCH_LPT(dev) || HAS_PCH_SPT(dev)) {
 		panel->backlight.setup = lpt_setup_backlight;
 		panel->backlight.enable = lpt_enable_backlight;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d52a15d..ee05ce8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -66,6 +66,14 @@
 	 */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+	/*
+	 * Wa: Backlight PWM may stop in the asserted state, causing backlight
+	 * to stay fully on.
+	 */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
+		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
+			   PWM1_GATING_DIS | PWM2_GATING_DIS);
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
@@ -1708,13 +1716,6 @@
 	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
 }
 
-struct skl_pipe_wm_parameters {
-	bool active;
-	uint32_t pipe_htotal;
-	uint32_t pixel_rate; /* in KHz */
-	struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
-};
-
 struct ilk_wm_maximums {
 	uint16_t pri;
 	uint16_t spr;
@@ -1722,13 +1723,6 @@
 	uint16_t fbc;
 };
 
-/* used in computing the new watermarks state */
-struct intel_wm_config {
-	unsigned int num_pipes_active;
-	bool sprites_enabled;
-	bool sprites_scaled;
-};
-
 /*
  * For both WM_PIPE and WM_LP.
  * mem_value must be in 0.1us units.
@@ -1979,9 +1973,11 @@
 				 const struct intel_crtc *intel_crtc,
 				 int level,
 				 struct intel_crtc_state *cstate,
+				 struct intel_plane_state *pristate,
+				 struct intel_plane_state *sprstate,
+				 struct intel_plane_state *curstate,
 				 struct intel_wm_level *result)
 {
-	struct intel_plane *intel_plane;
 	uint16_t pri_latency = dev_priv->wm.pri_latency[level];
 	uint16_t spr_latency = dev_priv->wm.spr_latency[level];
 	uint16_t cur_latency = dev_priv->wm.cur_latency[level];
@@ -1993,29 +1989,11 @@
 		cur_latency *= 5;
 	}
 
-	for_each_intel_plane_on_crtc(dev_priv->dev, intel_crtc, intel_plane) {
-		struct intel_plane_state *pstate =
-			to_intel_plane_state(intel_plane->base.state);
-
-		switch (intel_plane->base.type) {
-		case DRM_PLANE_TYPE_PRIMARY:
-			result->pri_val = ilk_compute_pri_wm(cstate, pstate,
-							     pri_latency,
-							     level);
-			result->fbc_val = ilk_compute_fbc_wm(cstate, pstate,
-							     result->pri_val);
-			break;
-		case DRM_PLANE_TYPE_OVERLAY:
-			result->spr_val = ilk_compute_spr_wm(cstate, pstate,
-							     spr_latency);
-			break;
-		case DRM_PLANE_TYPE_CURSOR:
-			result->cur_val = ilk_compute_cur_wm(cstate, pstate,
-							     cur_latency);
-			break;
-		}
-	}
-
+	result->pri_val = ilk_compute_pri_wm(cstate, pristate,
+					     pri_latency, level);
+	result->spr_val = ilk_compute_spr_wm(cstate, sprstate, spr_latency);
+	result->cur_val = ilk_compute_cur_wm(cstate, curstate, cur_latency);
+	result->fbc_val = ilk_compute_fbc_wm(cstate, pristate, result->pri_val);
 	result->enable = true;
 }
 
@@ -2274,34 +2252,19 @@
 	intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
 }
 
-static void ilk_compute_wm_config(struct drm_device *dev,
-				  struct intel_wm_config *config)
-{
-	struct intel_crtc *intel_crtc;
-
-	/* Compute the currently _active_ config */
-	for_each_intel_crtc(dev, intel_crtc) {
-		const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
-
-		if (!wm->pipe_enabled)
-			continue;
-
-		config->sprites_enabled |= wm->sprites_enabled;
-		config->sprites_scaled |= wm->sprites_scaled;
-		config->num_pipes_active++;
-	}
-}
-
 /* Compute new watermarks for the pipe */
-static bool intel_compute_pipe_wm(struct intel_crtc_state *cstate,
-				  struct intel_pipe_wm *pipe_wm)
+static int ilk_compute_pipe_wm(struct intel_crtc *intel_crtc,
+			       struct drm_atomic_state *state)
 {
-	struct drm_crtc *crtc = cstate->base.crtc;
-	struct drm_device *dev = crtc->dev;
+	struct intel_pipe_wm *pipe_wm;
+	struct drm_device *dev = intel_crtc->base.dev;
 	const struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = NULL;
 	struct intel_plane *intel_plane;
+	struct drm_plane_state *ps;
+	struct intel_plane_state *pristate = NULL;
 	struct intel_plane_state *sprstate = NULL;
+	struct intel_plane_state *curstate = NULL;
 	int level, max_level = ilk_wm_max_level(dev);
 	/* LP0 watermark maximums depend on this pipe alone */
 	struct intel_wm_config config = {
@@ -2309,11 +2272,24 @@
 	};
 	struct ilk_wm_maximums max;
 
+	cstate = intel_atomic_get_crtc_state(state, intel_crtc);
+	if (IS_ERR(cstate))
+		return PTR_ERR(cstate);
+
+	pipe_wm = &cstate->wm.optimal.ilk;
+
 	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
-		if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY) {
-			sprstate = to_intel_plane_state(intel_plane->base.state);
-			break;
-		}
+		ps = drm_atomic_get_plane_state(state,
+						&intel_plane->base);
+		if (IS_ERR(ps))
+			return PTR_ERR(ps);
+
+		if (intel_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
+			pristate = to_intel_plane_state(ps);
+		else if (intel_plane->base.type == DRM_PLANE_TYPE_OVERLAY)
+			sprstate = to_intel_plane_state(ps);
+		else if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			curstate = to_intel_plane_state(ps);
 	}
 
 	config.sprites_enabled = sprstate->visible;
@@ -2322,7 +2298,7 @@
 		drm_rect_height(&sprstate->dst) != drm_rect_height(&sprstate->src) >> 16);
 
 	pipe_wm->pipe_enabled = cstate->base.active;
-	pipe_wm->sprites_enabled = sprstate->visible;
+	pipe_wm->sprites_enabled = config.sprites_enabled;
 	pipe_wm->sprites_scaled = config.sprites_scaled;
 
 	/* ILK/SNB: LP2+ watermarks only w/o sprites */
@@ -2333,24 +2309,27 @@
 	if (config.sprites_scaled)
 		max_level = 0;
 
-	ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate, &pipe_wm->wm[0]);
+	ilk_compute_wm_level(dev_priv, intel_crtc, 0, cstate,
+			     pristate, sprstate, curstate, &pipe_wm->wm[0]);
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-		pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
+		pipe_wm->linetime = hsw_compute_linetime_wm(dev,
+							    &intel_crtc->base);
 
 	/* LP0 watermarks always use 1/2 DDB partitioning */
 	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
 
 	/* At least LP0 must be valid */
 	if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
-		return false;
+		return -EINVAL;
 
 	ilk_compute_wm_reg_maximums(dev, 1, &max);
 
 	for (level = 1; level <= max_level; level++) {
 		struct intel_wm_level wm = {};
 
-		ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate, &wm);
+		ilk_compute_wm_level(dev_priv, intel_crtc, level, cstate,
+				     pristate, sprstate, curstate, &wm);
 
 		/*
 		 * Disable any watermark level that exceeds the
@@ -2363,7 +2342,7 @@
 		pipe_wm->wm[level] = wm;
 	}
 
-	return true;
+	return 0;
 }
 
 /*
@@ -2378,7 +2357,9 @@
 	ret_wm->enable = true;
 
 	for_each_intel_crtc(dev, intel_crtc) {
-		const struct intel_pipe_wm *active = &intel_crtc->wm.active;
+		const struct intel_crtc_state *cstate =
+			to_intel_crtc_state(intel_crtc->base.state);
+		const struct intel_pipe_wm *active = &cstate->wm.optimal.ilk;
 		const struct intel_wm_level *wm = &active->wm[level];
 
 		if (!active->pipe_enabled)
@@ -2449,7 +2430,7 @@
 	 * enabled sometime later.
 	 */
 	if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
-	    intel_fbc_enabled(dev_priv)) {
+	    intel_fbc_is_active(dev_priv)) {
 		for (level = 2; level <= max_level; level++) {
 			struct intel_wm_level *wm = &merged->wm[level];
 
@@ -2526,14 +2507,15 @@
 
 	/* LP0 register values */
 	for_each_intel_crtc(dev, intel_crtc) {
+		const struct intel_crtc_state *cstate =
+			to_intel_crtc_state(intel_crtc->base.state);
 		enum pipe pipe = intel_crtc->pipe;
-		const struct intel_wm_level *r =
-			&intel_crtc->wm.active.wm[0];
+		const struct intel_wm_level *r = &cstate->wm.optimal.ilk.wm[0];
 
 		if (WARN_ON(!r->enable))
 			continue;
 
-		results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
+		results->wm_linetime[pipe] = cstate->wm.optimal.ilk.linetime;
 
 		results->wm_pipe[pipe] =
 			(r->pri_val << WM0_PIPE_PLANE_SHIFT) |
@@ -2755,18 +2737,40 @@
 #define SKL_DDB_SIZE		896	/* in blocks */
 #define BXT_DDB_SIZE		512
 
+/*
+ * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
+ * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
+ * other universal planes are in indices 1..n.  Note that this may leave unused
+ * indices between the top "sprite" plane and the cursor.
+ */
+static int
+skl_wm_plane_id(const struct intel_plane *plane)
+{
+	switch (plane->base.type) {
+	case DRM_PLANE_TYPE_PRIMARY:
+		return 0;
+	case DRM_PLANE_TYPE_CURSOR:
+		return PLANE_CURSOR;
+	case DRM_PLANE_TYPE_OVERLAY:
+		return plane->plane + 1;
+	default:
+		MISSING_CASE(plane->base.type);
+		return plane->plane;
+	}
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
-				   struct drm_crtc *for_crtc,
+				   const struct intel_crtc_state *cstate,
 				   const struct intel_wm_config *config,
-				   const struct skl_pipe_wm_parameters *params,
 				   struct skl_ddb_entry *alloc /* out */)
 {
+	struct drm_crtc *for_crtc = cstate->base.crtc;
 	struct drm_crtc *crtc;
 	unsigned int pipe_size, ddb_size;
 	int nth_active_pipe;
 
-	if (!params->active) {
+	if (!cstate->base.active) {
 		alloc->start = 0;
 		alloc->end = 0;
 		return;
@@ -2837,19 +2841,29 @@
 }
 
 static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
+skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
+			     const struct drm_plane_state *pstate,
+			     int y)
 {
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct drm_framebuffer *fb = pstate->fb;
 
 	/* for planar format */
-	if (p->y_bytes_per_pixel) {
+	if (fb->pixel_format == DRM_FORMAT_NV12) {
 		if (y)  /* y-plane data rate */
-			return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+			return intel_crtc->config->pipe_src_w *
+				intel_crtc->config->pipe_src_h *
+				drm_format_plane_cpp(fb->pixel_format, 0);
 		else    /* uv-plane data rate */
-			return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+			return (intel_crtc->config->pipe_src_w/2) *
+				(intel_crtc->config->pipe_src_h/2) *
+				drm_format_plane_cpp(fb->pixel_format, 1);
 	}
 
 	/* for packed formats */
-	return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
+	return intel_crtc->config->pipe_src_w *
+		intel_crtc->config->pipe_src_h *
+		drm_format_plane_cpp(fb->pixel_format, 0);
 }
 
 /*
@@ -2858,46 +2872,55 @@
  *   3 * 4096 * 8192  * 4 < 2^32
  */
 static unsigned int
-skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
-				 const struct skl_pipe_wm_parameters *params)
+skl_get_total_relative_data_rate(const struct intel_crtc_state *cstate)
 {
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct drm_device *dev = intel_crtc->base.dev;
+	const struct intel_plane *intel_plane;
 	unsigned int total_data_rate = 0;
-	int plane;
 
-	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		const struct drm_plane_state *pstate = intel_plane->base.state;
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (pstate->fb == NULL)
 			continue;
 
-		total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
-		if (p->y_bytes_per_pixel) {
-			total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
-		}
+		if (intel_plane->base.type == DRM_PLANE_TYPE_CURSOR)
+			continue;
+
+		/* packed/uv */
+		total_data_rate += skl_plane_relative_data_rate(cstate,
+								pstate,
+								0);
+
+		if (pstate->fb->pixel_format == DRM_FORMAT_NV12)
+			/* y-plane */
+			total_data_rate += skl_plane_relative_data_rate(cstate,
+									pstate,
+									1);
 	}
 
 	return total_data_rate;
 }
 
 static void
-skl_allocate_pipe_ddb(struct drm_crtc *crtc,
-		      const struct intel_wm_config *config,
-		      const struct skl_pipe_wm_parameters *params,
+skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		      struct skl_ddb_allocation *ddb /* out */)
 {
+	struct drm_crtc *crtc = cstate->base.crtc;
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_wm_config *config = &dev_priv->wm.config;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_plane *intel_plane;
 	enum pipe pipe = intel_crtc->pipe;
 	struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
 	uint16_t alloc_size, start, cursor_blocks;
 	uint16_t minimum[I915_MAX_PLANES];
 	uint16_t y_minimum[I915_MAX_PLANES];
 	unsigned int total_data_rate;
-	int plane;
 
-	skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
+	skl_ddb_get_pipe_allocation_limits(dev, cstate, config, alloc);
 	alloc_size = skl_ddb_entry_size(alloc);
 	if (alloc_size == 0) {
 		memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
@@ -2914,17 +2937,20 @@
 	alloc->end -= cursor_blocks;
 
 	/* 1. Allocate the mininum required blocks for each active plane */
-	for_each_plane(dev_priv, pipe, plane) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane *plane = &intel_plane->base;
+		struct drm_framebuffer *fb = plane->state->fb;
+		int id = skl_wm_plane_id(intel_plane);
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (fb == NULL)
+			continue;
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
 
-		minimum[plane] = 8;
-		alloc_size -= minimum[plane];
-		y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
-		alloc_size -= y_minimum[plane];
+		minimum[id] = 8;
+		alloc_size -= minimum[id];
+		y_minimum[id] = (fb->pixel_format == DRM_FORMAT_NV12) ? 8 : 0;
+		alloc_size -= y_minimum[id];
 	}
 
 	/*
@@ -2933,45 +2959,50 @@
 	 *
 	 * FIXME: we may not allocate every single block here.
 	 */
-	total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
+	total_data_rate = skl_get_total_relative_data_rate(cstate);
 
 	start = alloc->start;
-	for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
-		const struct intel_plane_wm_parameters *p;
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		struct drm_plane *plane = &intel_plane->base;
+		struct drm_plane_state *pstate = intel_plane->base.state;
 		unsigned int data_rate, y_data_rate;
 		uint16_t plane_blocks, y_plane_blocks = 0;
+		int id = skl_wm_plane_id(intel_plane);
 
-		p = &params->plane[plane];
-		if (!p->enabled)
+		if (pstate->fb == NULL)
+			continue;
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
 
-		data_rate = skl_plane_relative_data_rate(p, 0);
+		data_rate = skl_plane_relative_data_rate(cstate, pstate, 0);
 
 		/*
 		 * allocation for (packed formats) or (uv-plane part of planar format):
 		 * promote the expression to 64 bits to avoid overflowing, the
 		 * result is < available as data_rate / total_data_rate < 1
 		 */
-		plane_blocks = minimum[plane];
+		plane_blocks = minimum[id];
 		plane_blocks += div_u64((uint64_t)alloc_size * data_rate,
 					total_data_rate);
 
-		ddb->plane[pipe][plane].start = start;
-		ddb->plane[pipe][plane].end = start + plane_blocks;
+		ddb->plane[pipe][id].start = start;
+		ddb->plane[pipe][id].end = start + plane_blocks;
 
 		start += plane_blocks;
 
 		/*
 		 * allocation for y_plane part of planar format:
 		 */
-		if (p->y_bytes_per_pixel) {
-			y_data_rate = skl_plane_relative_data_rate(p, 1);
-			y_plane_blocks = y_minimum[plane];
+		if (pstate->fb->pixel_format == DRM_FORMAT_NV12) {
+			y_data_rate = skl_plane_relative_data_rate(cstate,
+								   pstate,
+								   1);
+			y_plane_blocks = y_minimum[id];
 			y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
 						total_data_rate);
 
-			ddb->y_plane[pipe][plane].start = start;
-			ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+			ddb->y_plane[pipe][id].start = start;
+			ddb->y_plane[pipe][id].end = start + y_plane_blocks;
 
 			start += y_plane_blocks;
 		}
@@ -3041,104 +3072,27 @@
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
-	enum pipe pipe = intel_crtc->pipe;
 
-	if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
-		   sizeof(new_ddb->plane[pipe])))
-		return true;
-
-	if (memcmp(&new_ddb->plane[pipe][PLANE_CURSOR], &cur_ddb->plane[pipe][PLANE_CURSOR],
-		    sizeof(new_ddb->plane[pipe][PLANE_CURSOR])))
+	/*
+	 * If ddb allocation of pipes changed, it may require recalculation of
+	 * watermarks
+	 */
+	if (memcmp(new_ddb->pipe, cur_ddb->pipe, sizeof(new_ddb->pipe)))
 		return true;
 
 	return false;
 }
 
-static void skl_compute_wm_global_parameters(struct drm_device *dev,
-					     struct intel_wm_config *config)
-{
-	struct drm_crtc *crtc;
-	struct drm_plane *plane;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		config->num_pipes_active += to_intel_crtc(crtc)->active;
-
-	/* FIXME: I don't think we need those two global parameters on SKL */
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct intel_plane *intel_plane = to_intel_plane(plane);
-
-		config->sprites_enabled |= intel_plane->wm.enabled;
-		config->sprites_scaled |= intel_plane->wm.scaled;
-	}
-}
-
-static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
-					   struct skl_pipe_wm_parameters *p)
-{
-	struct drm_device *dev = crtc->dev;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	enum pipe pipe = intel_crtc->pipe;
-	struct drm_plane *plane;
-	struct drm_framebuffer *fb;
-	int i = 1; /* Index for sprite planes start */
-
-	p->active = intel_crtc->active;
-	if (p->active) {
-		p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
-		p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
-
-		fb = crtc->primary->state->fb;
-		/* For planar: Bpp is for uv plane, y_Bpp is for y plane */
-		if (fb) {
-			p->plane[0].enabled = true;
-			p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
-				drm_format_plane_cpp(fb->pixel_format, 1) :
-				drm_format_plane_cpp(fb->pixel_format, 0);
-			p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
-				drm_format_plane_cpp(fb->pixel_format, 0) : 0;
-			p->plane[0].tiling = fb->modifier[0];
-		} else {
-			p->plane[0].enabled = false;
-			p->plane[0].bytes_per_pixel = 0;
-			p->plane[0].y_bytes_per_pixel = 0;
-			p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
-		}
-		p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
-		p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
-		p->plane[0].rotation = crtc->primary->state->rotation;
-
-		fb = crtc->cursor->state->fb;
-		p->plane[PLANE_CURSOR].y_bytes_per_pixel = 0;
-		if (fb) {
-			p->plane[PLANE_CURSOR].enabled = true;
-			p->plane[PLANE_CURSOR].bytes_per_pixel = fb->bits_per_pixel / 8;
-			p->plane[PLANE_CURSOR].horiz_pixels = crtc->cursor->state->crtc_w;
-			p->plane[PLANE_CURSOR].vert_pixels = crtc->cursor->state->crtc_h;
-		} else {
-			p->plane[PLANE_CURSOR].enabled = false;
-			p->plane[PLANE_CURSOR].bytes_per_pixel = 0;
-			p->plane[PLANE_CURSOR].horiz_pixels = 64;
-			p->plane[PLANE_CURSOR].vert_pixels = 64;
-		}
-	}
-
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct intel_plane *intel_plane = to_intel_plane(plane);
-
-		if (intel_plane->pipe == pipe &&
-			plane->type == DRM_PLANE_TYPE_OVERLAY)
-			p->plane[i++] = intel_plane->wm;
-	}
-}
-
 static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
-				 struct skl_pipe_wm_parameters *p,
-				 struct intel_plane_wm_parameters *p_params,
+				 struct intel_crtc_state *cstate,
+				 struct intel_plane *intel_plane,
 				 uint16_t ddb_allocation,
 				 int level,
 				 uint16_t *out_blocks, /* out */
 				 uint8_t *out_lines /* out */)
 {
+	struct drm_plane *plane = &intel_plane->base;
+	struct drm_framebuffer *fb = plane->state->fb;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
 	uint32_t method1, method2;
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
@@ -3146,31 +3100,33 @@
 	uint32_t selected_result;
 	uint8_t bytes_per_pixel;
 
-	if (latency == 0 || !p->active || !p_params->enabled)
+	if (latency == 0 || !cstate->base.active || !fb)
 		return false;
 
-	bytes_per_pixel = p_params->y_bytes_per_pixel ?
-		p_params->y_bytes_per_pixel :
-		p_params->bytes_per_pixel;
-	method1 = skl_wm_method1(p->pixel_rate,
+	bytes_per_pixel = drm_format_plane_cpp(fb->pixel_format, 0);
+	method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate),
 				 bytes_per_pixel,
 				 latency);
-	method2 = skl_wm_method2(p->pixel_rate,
-				 p->pipe_htotal,
-				 p_params->horiz_pixels,
+	method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
+				 cstate->base.adjusted_mode.crtc_htotal,
+				 cstate->pipe_src_w,
 				 bytes_per_pixel,
-				 p_params->tiling,
+				 fb->modifier[0],
 				 latency);
 
-	plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
+	plane_bytes_per_line = cstate->pipe_src_w * bytes_per_pixel;
 	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
-	if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
-	    p_params->tiling == I915_FORMAT_MOD_Yf_TILED) {
+	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+	    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
 		uint32_t min_scanlines = 4;
 		uint32_t y_tile_minimum;
-		if (intel_rotation_90_or_270(p_params->rotation)) {
-			switch (p_params->bytes_per_pixel) {
+		if (intel_rotation_90_or_270(plane->state->rotation)) {
+			int bpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
+				drm_format_plane_cpp(fb->pixel_format, 1) :
+				drm_format_plane_cpp(fb->pixel_format, 0);
+
+			switch (bpp) {
 			case 1:
 				min_scanlines = 16;
 				break;
@@ -3194,8 +3150,8 @@
 	res_lines = DIV_ROUND_UP(selected_result, plane_blocks_per_line);
 
 	if (level >= 1 && level <= 7) {
-		if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
-		    p_params->tiling == I915_FORMAT_MOD_Yf_TILED)
+		if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
+		    fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)
 			res_lines += 4;
 		else
 			res_blocks++;
@@ -3212,84 +3168,80 @@
 
 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
 				 struct skl_ddb_allocation *ddb,
-				 struct skl_pipe_wm_parameters *p,
-				 enum pipe pipe,
+				 struct intel_crtc_state *cstate,
 				 int level,
-				 int num_planes,
 				 struct skl_wm_level *result)
 {
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
+	struct intel_plane *intel_plane;
 	uint16_t ddb_blocks;
-	int i;
+	enum pipe pipe = intel_crtc->pipe;
 
-	for (i = 0; i < num_planes; i++) {
+	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+		int i = skl_wm_plane_id(intel_plane);
+
 		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
 
 		result->plane_en[i] = skl_compute_plane_wm(dev_priv,
-						p, &p->plane[i],
+						cstate,
+						intel_plane,
 						ddb_blocks,
 						level,
 						&result->plane_res_b[i],
 						&result->plane_res_l[i]);
 	}
-
-	ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][PLANE_CURSOR]);
-	result->plane_en[PLANE_CURSOR] = skl_compute_plane_wm(dev_priv, p,
-						 &p->plane[PLANE_CURSOR],
-						 ddb_blocks, level,
-						 &result->plane_res_b[PLANE_CURSOR],
-						 &result->plane_res_l[PLANE_CURSOR]);
 }
 
 static uint32_t
-skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
+skl_compute_linetime_wm(struct intel_crtc_state *cstate)
 {
-	if (!to_intel_crtc(crtc)->active)
+	if (!cstate->base.active)
 		return 0;
 
-	if (WARN_ON(p->pixel_rate == 0))
+	if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0))
 		return 0;
 
-	return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
+	return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000,
+			    skl_pipe_pixel_rate(cstate));
 }
 
-static void skl_compute_transition_wm(struct drm_crtc *crtc,
-				      struct skl_pipe_wm_parameters *params,
+static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
 				      struct skl_wm_level *trans_wm /* out */)
 {
+	struct drm_crtc *crtc = cstate->base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int i;
+	struct intel_plane *intel_plane;
 
-	if (!params->active)
+	if (!cstate->base.active)
 		return;
 
 	/* Until we know more, just disable transition WMs */
-	for (i = 0; i < intel_num_planes(intel_crtc); i++)
+	for_each_intel_plane_on_crtc(crtc->dev, intel_crtc, intel_plane) {
+		int i = skl_wm_plane_id(intel_plane);
+
 		trans_wm->plane_en[i] = false;
-	trans_wm->plane_en[PLANE_CURSOR] = false;
+	}
 }
 
-static void skl_compute_pipe_wm(struct drm_crtc *crtc,
+static void skl_compute_pipe_wm(struct intel_crtc_state *cstate,
 				struct skl_ddb_allocation *ddb,
-				struct skl_pipe_wm_parameters *params,
 				struct skl_pipe_wm *pipe_wm)
 {
-	struct drm_device *dev = crtc->dev;
+	struct drm_device *dev = cstate->base.crtc->dev;
 	const struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int level, max_level = ilk_wm_max_level(dev);
 
 	for (level = 0; level <= max_level; level++) {
-		skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
-				     level, intel_num_planes(intel_crtc),
-				     &pipe_wm->wm[level]);
+		skl_compute_wm_level(dev_priv, ddb, cstate,
+				     level, &pipe_wm->wm[level]);
 	}
-	pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
+	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
 
-	skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
+	skl_compute_transition_wm(cstate, &pipe_wm->trans_wm);
 }
 
 static void skl_compute_wm_results(struct drm_device *dev,
-				   struct skl_pipe_wm_parameters *p,
 				   struct skl_pipe_wm *p_wm,
 				   struct skl_wm_values *r,
 				   struct intel_crtc *intel_crtc)
@@ -3346,7 +3298,8 @@
 	r->wm_linetime[pipe] = p_wm->linetime;
 }
 
-static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
+static void skl_ddb_entry_write(struct drm_i915_private *dev_priv,
+				i915_reg_t reg,
 				const struct skl_ddb_entry *entry)
 {
 	if (entry->end)
@@ -3533,28 +3486,25 @@
 }
 
 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
-			       struct skl_pipe_wm_parameters *params,
-			       struct intel_wm_config *config,
 			       struct skl_ddb_allocation *ddb, /* out */
 			       struct skl_pipe_wm *pipe_wm /* out */)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
 
-	skl_compute_wm_pipe_parameters(crtc, params);
-	skl_allocate_pipe_ddb(crtc, config, params, ddb);
-	skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
+	skl_allocate_pipe_ddb(cstate, ddb);
+	skl_compute_pipe_wm(cstate, ddb, pipe_wm);
 
-	if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
+	if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm)))
 		return false;
 
-	intel_crtc->wm.skl_active = *pipe_wm;
+	intel_crtc->wm.active.skl = *pipe_wm;
 
 	return true;
 }
 
 static void skl_update_other_pipe_wm(struct drm_device *dev,
 				     struct drm_crtc *crtc,
-				     struct intel_wm_config *config,
 				     struct skl_wm_values *r)
 {
 	struct intel_crtc *intel_crtc;
@@ -3575,7 +3525,6 @@
 	 */
 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
 				base.head) {
-		struct skl_pipe_wm_parameters params = {};
 		struct skl_pipe_wm pipe_wm = {};
 		bool wm_changed;
 
@@ -3586,7 +3535,6 @@
 			continue;
 
 		wm_changed = skl_update_pipe_wm(&intel_crtc->base,
-						&params, config,
 						&r->ddb, &pipe_wm);
 
 		/*
@@ -3596,7 +3544,7 @@
 		 */
 		WARN_ON(!wm_changed);
 
-		skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
+		skl_compute_wm_results(dev, &pipe_wm, r, intel_crtc);
 		r->dirty[intel_crtc->pipe] = true;
 	}
 }
@@ -3626,10 +3574,9 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct skl_pipe_wm_parameters params = {};
 	struct skl_wm_values *results = &dev_priv->wm.skl_results;
-	struct skl_pipe_wm pipe_wm = {};
-	struct intel_wm_config config = {};
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct skl_pipe_wm *pipe_wm = &cstate->wm.optimal.skl;
 
 
 	/* Clear all dirty flags */
@@ -3637,16 +3584,13 @@
 
 	skl_clear_wm(results, intel_crtc->pipe);
 
-	skl_compute_wm_global_parameters(dev, &config);
-
-	if (!skl_update_pipe_wm(crtc, &params, &config,
-				&results->ddb, &pipe_wm))
+	if (!skl_update_pipe_wm(crtc, &results->ddb, pipe_wm))
 		return;
 
-	skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
+	skl_compute_wm_results(dev, pipe_wm, results, intel_crtc);
 	results->dirty[intel_crtc->pipe] = true;
 
-	skl_update_other_pipe_wm(dev, crtc, &config, results);
+	skl_update_other_pipe_wm(dev, crtc, results);
 	skl_write_wm_values(dev_priv, results);
 	skl_flush_wm_values(dev_priv, results);
 
@@ -3654,71 +3598,23 @@
 	dev_priv->wm.skl_hw = *results;
 }
 
-static void
-skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
-		     uint32_t sprite_width, uint32_t sprite_height,
-		     int pixel_size, bool enabled, bool scaled)
+static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
 {
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane->state->fb;
-
-	intel_plane->wm.enabled = enabled;
-	intel_plane->wm.scaled = scaled;
-	intel_plane->wm.horiz_pixels = sprite_width;
-	intel_plane->wm.vert_pixels = sprite_height;
-	intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
-
-	/* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
-	intel_plane->wm.bytes_per_pixel =
-		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
-		drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
-	intel_plane->wm.y_bytes_per_pixel =
-		(fb && fb->pixel_format == DRM_FORMAT_NV12) ?
-		drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
-
-	/*
-	 * Framebuffer can be NULL on plane disable, but it does not
-	 * matter for watermarks if we assume no tiling in that case.
-	 */
-	if (fb)
-		intel_plane->wm.tiling = fb->modifier[0];
-	intel_plane->wm.rotation = plane->state->rotation;
-
-	skl_update_wm(crtc);
-}
-
-static void ilk_update_wm(struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
 	struct ilk_wm_maximums max;
+	struct intel_wm_config *config = &dev_priv->wm.config;
 	struct ilk_wm_values results = {};
 	enum intel_ddb_partitioning partitioning;
-	struct intel_pipe_wm pipe_wm = {};
-	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
-	struct intel_wm_config config = {};
 
-	WARN_ON(cstate->base.active != intel_crtc->active);
-
-	intel_compute_pipe_wm(cstate, &pipe_wm);
-
-	if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
-		return;
-
-	intel_crtc->wm.active = pipe_wm;
-
-	ilk_compute_wm_config(dev, &config);
-
-	ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
-	ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
+	ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_1_2, &max);
+	ilk_wm_merge(dev, config, &max, &lp_wm_1_2);
 
 	/* 5/6 split only in single pipe config on IVB+ */
 	if (INTEL_INFO(dev)->gen >= 7 &&
-	    config.num_pipes_active == 1 && config.sprites_enabled) {
-		ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
-		ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
+	    config->num_pipes_active == 1 && config->sprites_enabled) {
+		ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_5_6, &max);
+		ilk_wm_merge(dev, config, &max, &lp_wm_5_6);
 
 		best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
 	} else {
@@ -3733,14 +3629,13 @@
 	ilk_write_wm_values(dev_priv, &results);
 }
 
-static void
-ilk_update_sprite_wm(struct drm_plane *plane,
-		     struct drm_crtc *crtc,
-		     uint32_t sprite_width, uint32_t sprite_height,
-		     int pixel_size, bool enabled, bool scaled)
+static void ilk_update_wm(struct drm_crtc *crtc)
 {
-	struct drm_device *dev = plane->dev;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+
+	WARN_ON(cstate->base.active != intel_crtc->active);
 
 	/*
 	 * IVB workaround: must disable low power watermarks for at least
@@ -3749,10 +3644,14 @@
 	 *
 	 * WaCxSRDisabledForSpriteScaling:ivb
 	 */
-	if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
-		intel_wait_for_vblank(dev, intel_plane->pipe);
+	if (cstate->disable_lp_wm) {
+		ilk_disable_lp_wm(crtc->dev);
+		intel_wait_for_vblank(crtc->dev, intel_crtc->pipe);
+	}
 
-	ilk_update_wm(crtc);
+	intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk;
+
+	ilk_program_watermarks(dev_priv);
 }
 
 static void skl_pipe_wm_active_state(uint32_t val,
@@ -3805,7 +3704,8 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct skl_pipe_wm *active = &cstate->wm.optimal.skl;
 	enum pipe pipe = intel_crtc->pipe;
 	int level, i, max_level;
 	uint32_t temp;
@@ -3849,6 +3749,8 @@
 
 	temp = hw->plane_trans[pipe][PLANE_CURSOR];
 	skl_pipe_wm_active_state(temp, active, true, true, i, 0);
+
+	intel_crtc->wm.active.skl = *active;
 }
 
 void skl_wm_get_hw_state(struct drm_device *dev)
@@ -3868,9 +3770,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct ilk_wm_values *hw = &dev_priv->wm.hw;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_pipe_wm *active = &intel_crtc->wm.active;
+	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
+	struct intel_pipe_wm *active = &cstate->wm.optimal.ilk;
 	enum pipe pipe = intel_crtc->pipe;
-	static const unsigned int wm0_pipe_reg[] = {
+	static const i915_reg_t wm0_pipe_reg[] = {
 		[PIPE_A] = WM0_PIPEA_ILK,
 		[PIPE_B] = WM0_PIPEB_ILK,
 		[PIPE_C] = WM0_PIPEC_IVB,
@@ -3907,6 +3810,8 @@
 		for (level = 0; level <= max_level; level++)
 			active->wm[level].enable = true;
 	}
+
+	intel_crtc->wm.active.ilk = *active;
 }
 
 #define _FW_WM(value, plane) \
@@ -4132,21 +4037,6 @@
 		dev_priv->display.update_wm(crtc);
 }
 
-void intel_update_sprite_watermarks(struct drm_plane *plane,
-				    struct drm_crtc *crtc,
-				    uint32_t sprite_width,
-				    uint32_t sprite_height,
-				    int pixel_size,
-				    bool enabled, bool scaled)
-{
-	struct drm_i915_private *dev_priv = plane->dev->dev_private;
-
-	if (dev_priv->display.update_sprite_wm)
-		dev_priv->display.update_sprite_wm(plane, crtc,
-						   sprite_width, sprite_height,
-						   pixel_size, enabled, scaled);
-}
-
 /**
  * Lock protecting IPS related data structures
  */
@@ -4414,7 +4304,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
-	if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0))
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		return;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -4449,7 +4339,7 @@
 	POSTING_READ(GEN6_RPNSWREQ);
 
 	dev_priv->rps.cur_freq = val;
-	trace_intel_gpu_freq_change(val * 50);
+	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 }
 
 static void valleyview_set_rps(struct drm_device *dev, u8 val)
@@ -4689,7 +4579,8 @@
 	dev_priv->rps.max_freq		= dev_priv->rps.rp0_freq;
 
 	dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev) || IS_SKYLAKE(dev)) {
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
+	    IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		ret = sandybridge_pcode_read(dev_priv,
 					HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
 					&ddcc_status);
@@ -4701,7 +4592,7 @@
 					dev_priv->rps.max_freq);
 	}
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Store the frequency values in 16.66 MHZ units, which is
 		   the natural hardware unit for SKL */
 		dev_priv->rps.rp0_freq *= GEN9_FREQ_SCALER;
@@ -4738,7 +4629,7 @@
 	gen6_init_rps_frequencies(dev);
 
 	/* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */
-	if (IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 		return;
 	}
@@ -4783,7 +4674,7 @@
 
 	/* WaRsDoubleRc6WrlWithCoarsePowerGating: Doubling WRL only when CPG is enabled */
 	if (IS_SKYLAKE(dev) && !((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
-				 (INTEL_REVID(dev) <= SKL_REVID_E0)))
+				 IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
 	else
 		I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
@@ -4807,8 +4698,8 @@
 	DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
 			"on" : "off");
 	/* WaRsUseTimeoutMode */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_D0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_A0)) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */
 		I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
 			   GEN7_RC_CTL_TO_MODE |
@@ -4824,8 +4715,9 @@
 	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
 	 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
 	 */
-	if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_E0)))
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
+	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) &&
+	     IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
 		I915_WRITE(GEN9_PG_ENABLE, 0);
 	else
 		I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
@@ -5056,7 +4948,7 @@
 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
 	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
 
-	if (IS_SKYLAKE(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 		/* Convert GT frequency to 50 HZ units */
 		min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER;
 		max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER;
@@ -5074,7 +4966,7 @@
 		int diff = max_gpu_freq - gpu_freq;
 		unsigned int ia_freq = 0, ring_freq = 0;
 
-		if (IS_SKYLAKE(dev)) {
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
 			/*
 			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
 			 * No floor required for ring frequency on SKL.
@@ -6202,7 +6094,7 @@
 	} else if (INTEL_INFO(dev)->gen >= 9) {
 		gen9_enable_rc6(dev);
 		gen9_enable_rps(dev);
-		if (IS_SKYLAKE(dev))
+		if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
 			__gen6_update_ring_freq(dev);
 	} else if (IS_BROADWELL(dev)) {
 		gen8_enable_rps(dev);
@@ -7058,7 +6950,6 @@
 			dev_priv->display.init_clock_gating =
 				bxt_init_clock_gating;
 		dev_priv->display.update_wm = skl_update_wm;
-		dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		ilk_setup_wm_latency(dev);
 
@@ -7067,7 +6958,7 @@
 		    (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
 		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
 			dev_priv->display.update_wm = ilk_update_wm;
-			dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
+			dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
 		} else {
 			DRM_DEBUG_KMS("Failed to read display plane latency. "
 				      "Disable CxSR\n");
@@ -7255,7 +7146,8 @@
 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	if (IS_GEN9(dev_priv->dev))
-		return (val * GT_FREQUENCY_MULTIPLIER) / GEN9_FREQ_SCALER;
+		return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+					 GEN9_FREQ_SCALER);
 	else if (IS_CHERRYVIEW(dev_priv->dev))
 		return chv_gpu_freq(dev_priv, val);
 	else if (IS_VALLEYVIEW(dev_priv->dev))
@@ -7267,13 +7159,14 @@
 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
 	if (IS_GEN9(dev_priv->dev))
-		return (val * GEN9_FREQ_SCALER) / GT_FREQUENCY_MULTIPLIER;
+		return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+					 GT_FREQUENCY_MULTIPLIER);
 	else if (IS_CHERRYVIEW(dev_priv->dev))
 		return chv_freq_opcode(dev_priv, val);
 	else if (IS_VALLEYVIEW(dev_priv->dev))
 		return byt_freq_opcode(dev_priv, val);
 	else
-		return val / GT_FREQUENCY_MULTIPLIER;
+		return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
 }
 
 struct request_boost {
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 213581c..b6609e6 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -80,7 +80,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
 	enum transcoder cpu_transcoder = crtc->config->cpu_transcoder;
-	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
+	i915_reg_t ctl_reg = HSW_TVIDEO_DIP_CTL(cpu_transcoder);
 	uint32_t *data = (uint32_t *) vsc_psr;
 	unsigned int i;
 
@@ -151,13 +151,31 @@
 			   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
 }
 
+static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv,
+				       enum port port)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return DP_AUX_CH_CTL(port);
+	else
+		return EDP_PSR_AUX_CTL;
+}
+
+static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv,
+					enum port port, int index)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 9)
+		return DP_AUX_CH_DATA(port, index);
+	else
+		return EDP_PSR_AUX_DATA(index);
+}
+
 static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t aux_clock_divider;
-	uint32_t aux_data_reg, aux_ctl_reg;
+	i915_reg_t aux_ctl_reg;
 	int precharge = 0x3;
 	static const uint8_t aux_msg[] = {
 		[0] = DP_AUX_NATIVE_WRITE << 4,
@@ -166,29 +184,24 @@
 		[3] = 1 - 1,
 		[4] = DP_SET_POWER_D0,
 	};
+	enum port port = dig_port->port;
 	int i;
 
 	BUILD_BUG_ON(sizeof(aux_msg) > 20);
 
 	aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0);
 
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-			   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
-
 	/* Enable AUX frame sync at sink */
 	if (dev_priv->psr.aux_frame_sync)
 		drm_dp_dpcd_writeb(&intel_dp->aux,
 				DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
 				DP_AUX_FRAME_SYNC_ENABLE);
 
-	aux_data_reg = (INTEL_INFO(dev)->gen >= 9) ?
-				DPA_AUX_CH_DATA1 : EDP_PSR_AUX_DATA1(dev);
-	aux_ctl_reg = (INTEL_INFO(dev)->gen >= 9) ?
-				DPA_AUX_CH_CTL : EDP_PSR_AUX_CTL(dev);
+	aux_ctl_reg = psr_aux_ctl_reg(dev_priv, port);
 
 	/* Setup AUX registers */
 	for (i = 0; i < sizeof(aux_msg); i += 4)
-		I915_WRITE(aux_data_reg + i,
+		I915_WRITE(psr_aux_data_reg(dev_priv, port, i >> 2),
 			   intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i));
 
 	if (INTEL_INFO(dev)->gen >= 9) {
@@ -267,16 +280,11 @@
 	const uint32_t link_entry_time = EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
 
 	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) {
-		/* It doesn't mean we shouldn't send TPS patters, so let's
-		   send the minimal TP1 possible and skip TP2. */
-		val |= EDP_PSR_TP1_TIME_100us;
-		val |= EDP_PSR_TP2_TP3_TIME_0us;
-		val |= EDP_PSR_SKIP_AUX_EXIT;
 		/* Sink should be able to train with the 5 or 6 idle patterns */
 		idle_frames += 4;
 	}
 
-	I915_WRITE(EDP_PSR_CTL(dev), val |
+	I915_WRITE(EDP_PSR_CTL, val |
 		   (IS_BROADWELL(dev) ? 0 : link_entry_time) |
 		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
 		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
@@ -340,7 +348,7 @@
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	WARN_ON(I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE);
+	WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
 	WARN_ON(dev_priv->psr.active);
 	lockdep_assert_held(&dev_priv->psr.lock);
 
@@ -403,9 +411,14 @@
 				skl_psr_setup_su_vsc(intel_dp);
 		}
 
-		/* Avoid continuous PSR exit by masking memup and hpd */
-		I915_WRITE(EDP_PSR_DEBUG_CTL(dev), EDP_PSR_DEBUG_MASK_MEMUP |
-			   EDP_PSR_DEBUG_MASK_HPD);
+		/*
+		 * Per Spec: Avoid continuous PSR exit by masking MEMUP and HPD.
+		 * Also mask LPSP to avoid dependency on other drivers that
+		 * might block runtime_pm besides preventing other hw tracking
+		 * issues now we can rely on frontbuffer tracking.
+		 */
+		I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP |
+			   EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP);
 
 		/* Enable PSR on the panel */
 		hsw_psr_enable_sink(intel_dp);
@@ -427,6 +440,19 @@
 		vlv_psr_enable_source(intel_dp);
 	}
 
+	/*
+	 * FIXME: Activation should happen immediately since this function
+	 * is just called after pipe is fully trained and enabled.
+	 * However on every platform we face issues when first activation
+	 * follows a modeset so quickly.
+	 *     - On VLV/CHV we get bank screen on first activation
+	 *     - On HSW/BDW we get a recoverable frozen screen until next
+	 *       exit-activate sequence.
+	 */
+	if (INTEL_INFO(dev)->gen < 9)
+		schedule_delayed_work(&dev_priv->psr.work,
+				      msecs_to_jiffies(intel_dp->panel_power_cycle_delay * 5));
+
 	dev_priv->psr.enabled = intel_dp;
 unlock:
 	mutex_unlock(&dev_priv->psr.lock);
@@ -466,17 +492,17 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->psr.active) {
-		I915_WRITE(EDP_PSR_CTL(dev),
-			   I915_READ(EDP_PSR_CTL(dev)) & ~EDP_PSR_ENABLE);
+		I915_WRITE(EDP_PSR_CTL,
+			   I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
 
 		/* Wait till PSR is idle */
-		if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev)) &
+		if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL) &
 			       EDP_PSR_STATUS_STATE_MASK) == 0, 2000, 10))
 			DRM_ERROR("Timed out waiting for PSR Idle State\n");
 
 		dev_priv->psr.active = false;
 	} else {
-		WARN_ON(I915_READ(EDP_PSR_CTL(dev)) & EDP_PSR_ENABLE);
+		WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
 	}
 }
 
@@ -498,11 +524,15 @@
 		return;
 	}
 
+	/* Disable PSR on Source */
 	if (HAS_DDI(dev))
 		hsw_psr_disable(intel_dp);
 	else
 		vlv_psr_disable(intel_dp);
 
+	/* Disable PSR on Sink */
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0);
+
 	dev_priv->psr.enabled = NULL;
 	mutex_unlock(&dev_priv->psr.lock);
 
@@ -523,7 +553,7 @@
 	 * and be ready for re-enable.
 	 */
 	if (HAS_DDI(dev_priv->dev)) {
-		if (wait_for((I915_READ(EDP_PSR_STATUS_CTL(dev_priv->dev)) &
+		if (wait_for((I915_READ(EDP_PSR_STATUS_CTL) &
 			      EDP_PSR_STATUS_STATE_MASK) == 0, 50)) {
 			DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n");
 			return;
@@ -566,11 +596,11 @@
 		return;
 
 	if (HAS_DDI(dev)) {
-		val = I915_READ(EDP_PSR_CTL(dev));
+		val = I915_READ(EDP_PSR_CTL);
 
 		WARN_ON(!(val & EDP_PSR_ENABLE));
 
-		I915_WRITE(EDP_PSR_CTL(dev), val & ~EDP_PSR_ENABLE);
+		I915_WRITE(EDP_PSR_CTL, val & ~EDP_PSR_ENABLE);
 	} else {
 		val = I915_READ(VLV_PSRCTL(pipe));
 
@@ -700,7 +730,6 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	enum pipe pipe;
-	int delay_ms = HAS_DDI(dev) ? 100 : 500;
 
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
@@ -714,29 +743,14 @@
 	frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
 	dev_priv->psr.busy_frontbuffer_bits &= ~frontbuffer_bits;
 
-	if (HAS_DDI(dev)) {
-		/*
-		 * By definition every flush should mean invalidate + flush,
-		 * however on core platforms let's minimize the
-		 * disable/re-enable so we can avoid the invalidate when flip
-		 * originated the flush.
-		 */
-		if (frontbuffer_bits && origin != ORIGIN_FLIP)
-			intel_psr_exit(dev);
-	} else {
-		/*
-		 * On Valleyview and Cherryview we don't use hardware tracking
-		 * so any plane updates or cursor moves don't result in a PSR
-		 * invalidating. Which means we need to manually fake this in
-		 * software for all flushes.
-		 */
-		if (frontbuffer_bits)
-			intel_psr_exit(dev);
-	}
+	/* By definition flush = invalidate + flush */
+	if (frontbuffer_bits)
+		intel_psr_exit(dev);
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
-		schedule_delayed_work(&dev_priv->psr.work,
-				      msecs_to_jiffies(delay_ms));
+		if (!work_busy(&dev_priv->psr.work.work))
+			schedule_delayed_work(&dev_priv->psr.work,
+					      msecs_to_jiffies(100));
 	mutex_unlock(&dev_priv->psr.lock);
 }
 
@@ -751,6 +765,9 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ?
+		HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE;
+
 	INIT_DELAYED_WORK(&dev_priv->psr.work, intel_psr_work);
 	mutex_init(&dev_priv->psr.lock);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9461a23..57d78f2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -481,7 +481,7 @@
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 mmio = 0;
+	i915_reg_t mmio;
 
 	/* The ring status page addresses are no longer next to the rest of
 	 * the ring registers as of gen7.
@@ -524,7 +524,7 @@
 	 * invalidating the TLB?
 	 */
 	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
-		u32 reg = RING_INSTPM(ring->mmio_base);
+		i915_reg_t reg = RING_INSTPM(ring->mmio_base);
 
 		/* ring should be idle before issuing a sync flush*/
 		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
@@ -733,7 +733,7 @@
 
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
 	for (i = 0; i < w->count; i++) {
-		intel_ring_emit(ring, w->reg[i].addr);
+		intel_ring_emit_reg(ring, w->reg[i].addr);
 		intel_ring_emit(ring, w->reg[i].value);
 	}
 	intel_ring_emit(ring, MI_NOOP);
@@ -766,7 +766,8 @@
 }
 
 static int wa_add(struct drm_i915_private *dev_priv,
-		  const u32 addr, const u32 mask, const u32 val)
+		  i915_reg_t addr,
+		  const u32 mask, const u32 val)
 {
 	const u32 idx = dev_priv->workarounds.count;
 
@@ -924,17 +925,15 @@
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
-	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
-	    INTEL_REVID(dev) == SKL_REVID_B0)) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-		/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+	/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
 				  GEN9_DG_MIRROR_FIX_ENABLE);
-	}
 
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-		/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
 				  GEN9_RHWO_OPTIMIZATION_DISABLE);
 		/*
@@ -944,12 +943,10 @@
 		 */
 	}
 
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
-	    IS_BROXTON(dev)) {
-		/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
 		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
 				  GEN9_ENABLE_YV12_BUGFIX);
-	}
 
 	/* Wa4x4STCOptimizationDisable:skl,bxt */
 	/* WaDisablePartialResolveInVc:skl,bxt */
@@ -961,24 +958,22 @@
 			  GEN9_CCS_TLB_PREFETCH_ENABLE);
 
 	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
+	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
 				  PIXEL_MASK_CAMMING_DISABLE);
 
 	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
 	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+	if (IS_SKL_REVID(dev, SKL_REVID_F0, SKL_REVID_F0) ||
+	    IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
 		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
 	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
 
 	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-	if (IS_SKYLAKE(dev) ||
-	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
+	if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
 		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
 				  GEN8_SAMPLER_POWER_BYPASS_DIS);
-	}
 
 	/* WaDisableSTUnitPowerOptimization:skl,bxt */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
@@ -1038,7 +1033,7 @@
 	if (ret)
 		return ret;
 
-	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
 		/* WaDisableHDCInvalidation:skl */
 		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
 			   BDW_DISABLE_HDC_INVALIDATION);
@@ -1051,23 +1046,23 @@
 	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
 	 * involving this register should also be added to WA batch as required.
 	 */
-	if (INTEL_REVID(dev) <= SKL_REVID_E0)
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
 		/* WaDisableLSQCROPERFforOCL:skl */
 		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
 			   GEN8_LQSC_RO_PERF_DIS);
 
 	/* WaEnableGapsTsvCreditFix:skl */
-	if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
 		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
 					   GEN9_GAPS_TSV_CREDIT_DISABLE));
 	}
 
 	/* WaDisablePowerCompilerClockGating:skl */
-	if (INTEL_REVID(dev) == SKL_REVID_B0)
+	if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
 		WA_SET_BIT_MASKED(HIZ_CHICKEN,
 				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
-	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
 		/*
 		 *Use Force Non-Coherent whenever executing a 3D context. This
 		 * is a workaround for a possible hang in the unlikely event
@@ -1078,19 +1073,17 @@
 				  HDC_FORCE_NON_COHERENT);
 	}
 
-	if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-	    INTEL_REVID(dev) == SKL_REVID_D0)
-		/* WaBarrierPerformanceFixDisable:skl */
+	/* WaBarrierPerformanceFixDisable:skl */
+	if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
 		WA_SET_BIT_MASKED(HDC_CHICKEN0,
 				  HDC_FENCE_DEST_SLM_DISABLE |
 				  HDC_BARRIER_PERFORMANCE_DISABLE);
 
 	/* WaDisableSbeCacheDispatchPortSharing:skl */
-	if (INTEL_REVID(dev) <= SKL_REVID_F0) {
+	if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
 		WA_SET_BIT_MASKED(
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-	}
 
 	return skl_tune_iz_hashing(ring);
 }
@@ -1107,11 +1100,11 @@
 
 	/* WaStoreMultiplePTEenable:bxt */
 	/* This is a requirement according to Hardware specification */
-	if (INTEL_REVID(dev) == BXT_REVID_A0)
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
 		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
 
 	/* WaSetClckGatingDisableMedia:bxt */
-	if (INTEL_REVID(dev) == BXT_REVID_A0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
 		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
 					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
 	}
@@ -1121,7 +1114,7 @@
 			  STALL_DOP_GATING_DISABLE);
 
 	/* WaDisableSbeCacheDispatchPortSharing:bxt */
-	if (INTEL_REVID(dev) <= BXT_REVID_B0) {
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
 		WA_SET_BIT_MASKED(
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
@@ -1319,11 +1312,13 @@
 		return ret;
 
 	for_each_ring(useless, dev_priv, i) {
-		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
-		if (mbox_reg != GEN6_NOSYNC) {
+		i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
+
+		if (i915_mmio_reg_valid(mbox_reg)) {
 			u32 seqno = i915_gem_request_get_seqno(signaller_req);
+
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
-			intel_ring_emit(signaller, mbox_reg);
+			intel_ring_emit_reg(signaller, mbox_reg);
 			intel_ring_emit(signaller, seqno);
 		}
 	}
@@ -2004,11 +1999,35 @@
 
 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
-	iounmap(ringbuf->virtual_start);
+	if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
+		vunmap(ringbuf->virtual_start);
+	else
+		iounmap(ringbuf->virtual_start);
 	ringbuf->virtual_start = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
 }
 
+static u32 *vmap_obj(struct drm_i915_gem_object *obj)
+{
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	void *addr;
+	int i;
+
+	pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
+	if (pages == NULL)
+		return NULL;
+
+	i = 0;
+	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
+		pages[i++] = sg_page_iter_page(&sg_iter);
+
+	addr = vmap(pages, i, 0, PAGE_KERNEL);
+	drm_free_large(pages);
+
+	return addr;
+}
+
 int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 				     struct intel_ringbuffer *ringbuf)
 {
@@ -2016,21 +2035,39 @@
 	struct drm_i915_gem_object *obj = ringbuf->obj;
 	int ret;
 
-	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
-		return ret;
+	if (HAS_LLC(dev_priv) && !obj->stolen) {
+		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, 0);
+		if (ret)
+			return ret;
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(obj);
-		return ret;
-	}
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret) {
+			i915_gem_object_ggtt_unpin(obj);
+			return ret;
+		}
 
-	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
-			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
-		i915_gem_object_ggtt_unpin(obj);
-		return -EINVAL;
+		ringbuf->virtual_start = vmap_obj(obj);
+		if (ringbuf->virtual_start == NULL) {
+			i915_gem_object_ggtt_unpin(obj);
+			return -ENOMEM;
+		}
+	} else {
+		ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
+		if (ret)
+			return ret;
+
+		ret = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (ret) {
+			i915_gem_object_ggtt_unpin(obj);
+			return ret;
+		}
+
+		ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
+						    i915_gem_obj_ggtt_offset(obj), ringbuf->size);
+		if (ringbuf->virtual_start == NULL) {
+			i915_gem_object_ggtt_unpin(obj);
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -2070,10 +2107,14 @@
 	int ret;
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-	if (ring == NULL)
+	if (ring == NULL) {
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
+				 engine->name);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	ring->ring = engine;
+	list_add(&ring->link, &engine->buffers);
 
 	ring->size = size;
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -2089,8 +2130,9 @@
 
 	ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
 	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
-			  engine->name, ret);
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
+				 engine->name, ret);
+		list_del(&ring->link);
 		kfree(ring);
 		return ERR_PTR(ret);
 	}
@@ -2102,6 +2144,7 @@
 intel_ringbuffer_free(struct intel_ringbuffer *ring)
 {
 	intel_destroy_ringbuffer_obj(ring);
+	list_del(&ring->link);
 	kfree(ring);
 }
 
@@ -2117,6 +2160,7 @@
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->buffers);
 	i915_gem_batch_pool_init(dev, &ring->batch_pool);
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 49fa41d..5d1eb20 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -100,6 +100,7 @@
 	void __iomem *virtual_start;
 
 	struct intel_engine_cs *ring;
+	struct list_head link;
 
 	u32 head;
 	u32 tail;
@@ -157,6 +158,7 @@
 	u32		mmio_base;
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
+	struct list_head buffers;
 
 	/*
 	 * A pool of objects to use as shadow copies of client batch buffers
@@ -247,7 +249,7 @@
 				/* our mbox written by others */
 				u32		wait[I915_NUM_RINGS];
 				/* mboxes this ring signals to */
-				u32		signal[I915_NUM_RINGS];
+				i915_reg_t	signal[I915_NUM_RINGS];
 			} mbox;
 			u64		signal_ggtt[I915_NUM_RINGS];
 		};
@@ -441,6 +443,11 @@
 	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
 	ringbuf->tail += 4;
 }
+static inline void intel_ring_emit_reg(struct intel_engine_cs *ring,
+				       i915_reg_t reg)
+{
+	intel_ring_emit(ring, i915_mmio_reg_offset(reg));
+}
 static inline void intel_ring_advance(struct intel_engine_cs *ring)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index d89c1d0..2c2151f 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -49,25 +49,88 @@
  * present for a given platform.
  */
 
-#define GEN9_ENABLE_DC5(dev) 0
-#define SKL_ENABLE_DC6(dev) IS_SKYLAKE(dev)
-
 #define for_each_power_well(i, power_well, domain_mask, power_domains)	\
 	for (i = 0;							\
 	     i < (power_domains)->power_well_count &&			\
 		 ((power_well) = &(power_domains)->power_wells[i]);	\
 	     i++)							\
-		if ((power_well)->domains & (domain_mask))
+		for_each_if ((power_well)->domains & (domain_mask))
 
 #define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \
 	for (i = (power_domains)->power_well_count - 1;			 \
 	     i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\
 	     i--)							 \
-		if ((power_well)->domains & (domain_mask))
+		for_each_if ((power_well)->domains & (domain_mask))
 
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 				    int power_well_id);
 
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain)
+{
+	switch (domain) {
+	case POWER_DOMAIN_PIPE_A:
+		return "PIPE_A";
+	case POWER_DOMAIN_PIPE_B:
+		return "PIPE_B";
+	case POWER_DOMAIN_PIPE_C:
+		return "PIPE_C";
+	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
+		return "PIPE_A_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
+		return "PIPE_B_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
+		return "PIPE_C_PANEL_FITTER";
+	case POWER_DOMAIN_TRANSCODER_A:
+		return "TRANSCODER_A";
+	case POWER_DOMAIN_TRANSCODER_B:
+		return "TRANSCODER_B";
+	case POWER_DOMAIN_TRANSCODER_C:
+		return "TRANSCODER_C";
+	case POWER_DOMAIN_TRANSCODER_EDP:
+		return "TRANSCODER_EDP";
+	case POWER_DOMAIN_PORT_DDI_A_LANES:
+		return "PORT_DDI_A_LANES";
+	case POWER_DOMAIN_PORT_DDI_B_LANES:
+		return "PORT_DDI_B_LANES";
+	case POWER_DOMAIN_PORT_DDI_C_LANES:
+		return "PORT_DDI_C_LANES";
+	case POWER_DOMAIN_PORT_DDI_D_LANES:
+		return "PORT_DDI_D_LANES";
+	case POWER_DOMAIN_PORT_DDI_E_LANES:
+		return "PORT_DDI_E_LANES";
+	case POWER_DOMAIN_PORT_DSI:
+		return "PORT_DSI";
+	case POWER_DOMAIN_PORT_CRT:
+		return "PORT_CRT";
+	case POWER_DOMAIN_PORT_OTHER:
+		return "PORT_OTHER";
+	case POWER_DOMAIN_VGA:
+		return "VGA";
+	case POWER_DOMAIN_AUDIO:
+		return "AUDIO";
+	case POWER_DOMAIN_PLLS:
+		return "PLLS";
+	case POWER_DOMAIN_AUX_A:
+		return "AUX_A";
+	case POWER_DOMAIN_AUX_B:
+		return "AUX_B";
+	case POWER_DOMAIN_AUX_C:
+		return "AUX_C";
+	case POWER_DOMAIN_AUX_D:
+		return "AUX_D";
+	case POWER_DOMAIN_GMBUS:
+		return "GMBUS";
+	case POWER_DOMAIN_INIT:
+		return "INIT";
+	case POWER_DOMAIN_MODESET:
+		return "MODESET";
+	default:
+		MISSING_CASE(domain);
+		return "?";
+	}
+}
+
 static void intel_power_well_enable(struct drm_i915_private *dev_priv,
 				    struct i915_power_well *power_well)
 {
@@ -244,12 +307,6 @@
 		gen8_irq_power_well_post_enable(dev_priv,
 						1 << PIPE_C | 1 << PIPE_B);
 	}
-
-	if (power_well->data == SKL_DISP_PW_1) {
-		if (!dev_priv->power_domains.initializing)
-			intel_prepare_ddi(dev);
-		gen8_irq_power_well_post_enable(dev_priv, 1 << PIPE_A);
-	}
 }
 
 static void hsw_set_power_well(struct drm_i915_private *dev_priv,
@@ -292,58 +349,38 @@
 	BIT(POWER_DOMAIN_TRANSCODER_C) |		\
 	BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
 	BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_E_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_B) |                       \
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUX_D) |			\
 	BIT(POWER_DOMAIN_AUDIO) |			\
 	BIT(POWER_DOMAIN_VGA) |				\
 	BIT(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS (		\
-	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT(POWER_DOMAIN_PLLS) |			\
-	BIT(POWER_DOMAIN_PIPE_A) |			\
-	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
-	BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_AUX_A) |			\
-	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_E_2_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_B_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_C_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DDI_D_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
 	BIT(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_MISC_IO_POWER_DOMAINS (		\
-	SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS |		\
-	BIT(POWER_DOMAIN_PLLS) |			\
+#define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT(POWER_DOMAIN_MODESET) |			\
+	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_ALWAYS_ON_POWER_DOMAINS (		\
-	(POWER_DOMAIN_MASK & ~(SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS |	\
+	(POWER_DOMAIN_MASK & ~(				\
 	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_A_E_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_B_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_C_POWER_DOMAINS |		\
-	SKL_DISPLAY_DDI_D_POWER_DOMAINS |		\
-	SKL_DISPLAY_MISC_IO_POWER_DOMAINS)) |		\
+	SKL_DISPLAY_DC_OFF_POWER_DOMAINS)) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
@@ -354,25 +391,28 @@
 	BIT(POWER_DOMAIN_TRANSCODER_C) |		\
 	BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
 	BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_B) |			\
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUDIO) |			\
 	BIT(POWER_DOMAIN_VGA) |				\
+	BIT(POWER_DOMAIN_GMBUS) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS (		\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
 	BIT(POWER_DOMAIN_PIPE_A) |			\
 	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
 	BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
 	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_PLLS) |			\
 	BIT(POWER_DOMAIN_INIT))
+#define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT(POWER_DOMAIN_MODESET) |			\
+	BIT(POWER_DOMAIN_AUX_A) |			\
+	BIT(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_ALWAYS_ON_POWER_DOMAINS (		\
 	(POWER_DOMAIN_MASK & ~(BXT_DISPLAY_POWERWELL_1_POWER_DOMAINS |	\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS)) |	\
@@ -416,34 +456,6 @@
 	  */
 }
 
-void bxt_enable_dc9(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_enable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Enabling DC9\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val |= DC_STATE_EN_DC9;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
-void bxt_disable_dc9(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_disable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Disabling DC9\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_DC9;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
 static void gen9_set_dc_state_debugmask_memory_up(
 			struct drm_i915_private *dev_priv)
 {
@@ -458,6 +470,62 @@
 	}
 }
 
+static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
+{
+	uint32_t val;
+	uint32_t mask;
+
+	mask = DC_STATE_EN_UPTO_DC5;
+	if (IS_BROXTON(dev_priv))
+		mask |= DC_STATE_EN_DC9;
+	else
+		mask |= DC_STATE_EN_UPTO_DC6;
+
+	WARN_ON_ONCE(state & ~mask);
+
+	if (i915.enable_dc == 0)
+		state = DC_STATE_DISABLE;
+	else if (i915.enable_dc == 1 && state > DC_STATE_EN_UPTO_DC5)
+		state = DC_STATE_EN_UPTO_DC5;
+
+	if (state & DC_STATE_EN_UPTO_DC5_DC6_MASK)
+		gen9_set_dc_state_debugmask_memory_up(dev_priv);
+
+	val = I915_READ(DC_STATE_EN);
+	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
+		      val & mask, state);
+	val &= ~mask;
+	val |= state;
+	I915_WRITE(DC_STATE_EN, val);
+	POSTING_READ(DC_STATE_EN);
+}
+
+void bxt_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_enable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC9\n");
+
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_DC9);
+}
+
+void bxt_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_disable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Disabling DC9\n");
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+}
+
+static void assert_csr_loaded(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
+		  "CSR program storage start is NULL\n");
+	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
+	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
+}
+
 static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -478,8 +546,6 @@
 
 static void assert_can_disable_dc5(struct drm_i915_private *dev_priv)
 {
-	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
-					SKL_DISP_PW_2);
 	/*
 	 * During initialization, the firmware may not be loaded yet.
 	 * We still want to make sure that the DC enabling flag is cleared.
@@ -487,40 +553,17 @@
 	if (dev_priv->power_domains.initializing)
 		return;
 
-	WARN_ONCE(!pg2_enabled, "PG2 not enabled to disable DC5.\n");
 	WARN_ONCE(dev_priv->pm.suspended,
 		"Disabling of DC5 while platform is runtime-suspended should never happen.\n");
 }
 
 static void gen9_enable_dc5(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
-
 	assert_can_enable_dc5(dev_priv);
 
 	DRM_DEBUG_KMS("Enabling DC5\n");
 
-	gen9_set_dc_state_debugmask_memory_up(dev_priv);
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5_DC6_MASK;
-	val |= DC_STATE_EN_UPTO_DC5;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
-}
-
-static void gen9_disable_dc5(struct drm_i915_private *dev_priv)
-{
-	uint32_t val;
-
-	assert_can_disable_dc5(dev_priv);
-
-	DRM_DEBUG_KMS("Disabling DC5\n");
-
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
 }
 
 static void assert_can_enable_dc6(struct drm_i915_private *dev_priv)
@@ -546,40 +589,37 @@
 	if (dev_priv->power_domains.initializing)
 		return;
 
-	assert_csr_loaded(dev_priv);
 	WARN_ONCE(!(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6),
 		  "DC6 already programmed to be disabled.\n");
 }
 
-static void skl_enable_dc6(struct drm_i915_private *dev_priv)
+static void gen9_disable_dc5_dc6(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
+	assert_can_disable_dc5(dev_priv);
 
+	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+		assert_can_disable_dc6(dev_priv);
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+}
+
+void skl_enable_dc6(struct drm_i915_private *dev_priv)
+{
 	assert_can_enable_dc6(dev_priv);
 
 	DRM_DEBUG_KMS("Enabling DC6\n");
 
-	gen9_set_dc_state_debugmask_memory_up(dev_priv);
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
 
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC5_DC6_MASK;
-	val |= DC_STATE_EN_UPTO_DC6;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
 }
 
-static void skl_disable_dc6(struct drm_i915_private *dev_priv)
+void skl_disable_dc6(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
-
 	assert_can_disable_dc6(dev_priv);
 
 	DRM_DEBUG_KMS("Disabling DC6\n");
 
-	val = I915_READ(DC_STATE_EN);
-	val &= ~DC_STATE_EN_UPTO_DC6;
-	I915_WRITE(DC_STATE_EN, val);
-	POSTING_READ(DC_STATE_EN);
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 }
 
 static void skl_set_power_well(struct drm_i915_private *dev_priv,
@@ -629,20 +669,16 @@
 				!I915_READ(HSW_PWR_WELL_BIOS),
 				"Invalid for power well status to be enabled, unless done by the BIOS, \
 				when request is to disable!\n");
-			if ((GEN9_ENABLE_DC5(dev) || SKL_ENABLE_DC6(dev)) &&
-				power_well->data == SKL_DISP_PW_2) {
-				if (SKL_ENABLE_DC6(dev)) {
-					skl_disable_dc6(dev_priv);
-					/*
-					 * DDI buffer programming unnecessary during driver-load/resume
-					 * as it's already done during modeset initialization then.
-					 * It's also invalid here as encoder list is still uninitialized.
-					 */
-					if (!dev_priv->power_domains.initializing)
-						intel_prepare_ddi(dev);
-				} else {
-					gen9_disable_dc5(dev_priv);
-				}
+			if (power_well->data == SKL_DISP_PW_2) {
+				/*
+				 * DDI buffer programming unnecessary during
+				 * driver-load/resume as it's already done
+				 * during modeset initialization then. It's
+				 * also invalid here as encoder list is still
+				 * uninitialized.
+				 */
+				if (!dev_priv->power_domains.initializing)
+					intel_prepare_ddi(dev);
 			}
 			I915_WRITE(HSW_PWR_WELL_DRIVER, tmp | req_mask);
 		}
@@ -657,34 +693,9 @@
 		}
 	} else {
 		if (enable_requested) {
-			if (IS_SKYLAKE(dev) &&
-				(power_well->data == SKL_DISP_PW_1) &&
-				(intel_csr_load_status_get(dev_priv) == FW_LOADED))
-				DRM_DEBUG_KMS("Not Disabling PW1, dmc will handle\n");
-			else {
-				I915_WRITE(HSW_PWR_WELL_DRIVER,	tmp & ~req_mask);
-				POSTING_READ(HSW_PWR_WELL_DRIVER);
-				DRM_DEBUG_KMS("Disabling %s\n", power_well->name);
-			}
-
-			if ((GEN9_ENABLE_DC5(dev) || SKL_ENABLE_DC6(dev)) &&
-				power_well->data == SKL_DISP_PW_2) {
-				enum csr_state state;
-				/* TODO: wait for a completion event or
-				 * similar here instead of busy
-				 * waiting using wait_for function.
-				 */
-				wait_for((state = intel_csr_load_status_get(dev_priv)) !=
-						FW_UNINITIALIZED, 1000);
-				if (state != FW_LOADED)
-					DRM_DEBUG("CSR firmware not ready (%d)\n",
-							state);
-				else
-					if (SKL_ENABLE_DC6(dev))
-						skl_enable_dc6(dev_priv);
-					else
-						gen9_enable_dc5(dev_priv);
-			}
+			I915_WRITE(HSW_PWR_WELL_DRIVER,	tmp & ~req_mask);
+			POSTING_READ(HSW_PWR_WELL_DRIVER);
+			DRM_DEBUG_KMS("Disabling %s\n", power_well->name);
 		}
 	}
 
@@ -759,6 +770,41 @@
 	skl_set_power_well(dev_priv, power_well, false);
 }
 
+static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
+}
+
+static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	gen9_disable_dc5_dc6(dev_priv);
+}
+
+static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+		skl_enable_dc6(dev_priv);
+	else
+		gen9_enable_dc5(dev_priv);
+}
+
+static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	if (power_well->count > 0) {
+		gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+	} else {
+		if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 &&
+		    i915.enable_dc != 1)
+			gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+		else
+			gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
+	}
+}
+
 static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
@@ -973,10 +1019,12 @@
 						 int power_well_id)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *power_well;
 	int i;
 
-	for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
+	for (i = 0; i < power_domains->power_well_count; i++) {
+		struct i915_power_well *power_well;
+
+		power_well = &power_domains->power_wells[i];
 		if (power_well->data == power_well_id)
 			return power_well;
 	}
@@ -1451,13 +1499,17 @@
 
 	mutex_lock(&power_domains->lock);
 
-	WARN_ON(!power_domains->domain_use_count[domain]);
+	WARN(!power_domains->domain_use_count[domain],
+	     "Use count on domain %s is already zero\n",
+	     intel_display_power_domain_str(domain));
 	power_domains->domain_use_count[domain]--;
 
 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
-		WARN_ON(!power_well->count);
+		WARN(!power_well->count,
+		     "Use count on power well %s is already zero",
+		     power_well->name);
 
-		if (!--power_well->count && i915.disable_power_well)
+		if (!--power_well->count)
 			intel_power_well_disable(dev_priv, power_well);
 	}
 
@@ -1469,20 +1521,17 @@
 #define HSW_ALWAYS_ON_POWER_DOMAINS (			\
 	BIT(POWER_DOMAIN_PIPE_A) |			\
 	BIT(POWER_DOMAIN_TRANSCODER_EDP) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
 	BIT(POWER_DOMAIN_PORT_CRT) |			\
 	BIT(POWER_DOMAIN_PLLS) |			\
 	BIT(POWER_DOMAIN_AUX_A) |			\
 	BIT(POWER_DOMAIN_AUX_B) |			\
 	BIT(POWER_DOMAIN_AUX_C) |			\
 	BIT(POWER_DOMAIN_AUX_D) |			\
+	BIT(POWER_DOMAIN_GMBUS) |			\
 	BIT(POWER_DOMAIN_INIT))
 #define HSW_DISPLAY_POWER_DOMAINS (				\
 	(POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) |	\
@@ -1499,49 +1548,42 @@
 #define VLV_DISPLAY_POWER_DOMAINS	POWER_DOMAIN_MASK
 
 #define VLV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_PORT_CRT) |		\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS (	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define CHV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_B) |		\
 	BIT(POWER_DOMAIN_AUX_C) |		\
 	BIT(POWER_DOMAIN_INIT))
 
 #define CHV_DPIO_CMN_D_POWER_DOMAINS (		\
-	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |	\
-	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
 	BIT(POWER_DOMAIN_AUX_D) |		\
 	BIT(POWER_DOMAIN_INIT))
 
@@ -1589,6 +1631,13 @@
 	.is_enabled = skl_power_well_enabled,
 };
 
+static const struct i915_power_well_ops gen9_dc_off_power_well_ops = {
+	.sync_hw = gen9_dc_off_power_well_sync_hw,
+	.enable = gen9_dc_off_power_well_enable,
+	.disable = gen9_dc_off_power_well_disable,
+	.is_enabled = gen9_dc_off_power_well_enabled,
+};
+
 static struct i915_power_well hsw_power_wells[] = {
 	{
 		.name = "always-on",
@@ -1644,6 +1693,7 @@
 		.always_on = 1,
 		.domains = VLV_ALWAYS_ON_POWER_DOMAINS,
 		.ops = &i9xx_always_on_power_well_ops,
+		.data = PUNIT_POWER_WELL_ALWAYS_ON,
 	},
 	{
 		.name = "display",
@@ -1745,20 +1795,29 @@
 		.always_on = 1,
 		.domains = SKL_DISPLAY_ALWAYS_ON_POWER_DOMAINS,
 		.ops = &i9xx_always_on_power_well_ops,
+		.data = SKL_DISP_PW_ALWAYS_ON,
 	},
 	{
 		.name = "power well 1",
-		.domains = SKL_DISPLAY_POWERWELL_1_POWER_DOMAINS,
+		/* Handled by the DMC firmware */
+		.domains = 0,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_1,
 	},
 	{
 		.name = "MISC IO power well",
-		.domains = SKL_DISPLAY_MISC_IO_POWER_DOMAINS,
+		/* Handled by the DMC firmware */
+		.domains = 0,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_MISC_IO,
 	},
 	{
+		.name = "DC off",
+		.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.data = SKL_DISP_PW_DC_OFF,
+	},
+	{
 		.name = "power well 2",
 		.domains = SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
 		.ops = &skl_power_well_ops,
@@ -1790,6 +1849,34 @@
 	},
 };
 
+void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *well;
+
+	if (!IS_SKYLAKE(dev_priv))
+		return;
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
+	intel_power_well_enable(dev_priv, well);
+}
+
+void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *well;
+
+	if (!IS_SKYLAKE(dev_priv))
+		return;
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
+	intel_power_well_disable(dev_priv, well);
+}
+
 static struct i915_power_well bxt_power_wells[] = {
 	{
 		.name = "always-on",
@@ -1804,11 +1891,17 @@
 		.data = SKL_DISP_PW_1,
 	},
 	{
+		.name = "DC off",
+		.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.data = SKL_DISP_PW_DC_OFF,
+	},
+	{
 		.name = "power well 2",
 		.domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS,
 		.ops = &skl_power_well_ops,
 		.data = SKL_DISP_PW_2,
-	}
+	},
 };
 
 static int
@@ -1818,7 +1911,7 @@
 	if (disable_power_well >= 0)
 		return !!disable_power_well;
 
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_BROXTON(dev_priv)) {
 		DRM_DEBUG_KMS("Disabling display power well support\n");
 		return 0;
 	}
@@ -1845,6 +1938,8 @@
 	i915.disable_power_well = sanitize_disable_power_well_option(dev_priv,
 						     i915.disable_power_well);
 
+	BUILD_BUG_ON(POWER_DOMAIN_NUM > 31);
+
 	mutex_init(&power_domains->lock);
 
 	/*
@@ -1855,7 +1950,7 @@
 		set_power_wells(power_domains, hsw_power_wells);
 	} else if (IS_BROADWELL(dev_priv->dev)) {
 		set_power_wells(power_domains, bdw_power_wells);
-	} else if (IS_SKYLAKE(dev_priv->dev)) {
+	} else if (IS_SKYLAKE(dev_priv->dev) || IS_KABYLAKE(dev_priv->dev)) {
 		set_power_wells(power_domains, skl_power_wells);
 	} else if (IS_BROXTON(dev_priv->dev)) {
 		set_power_wells(power_domains, bxt_power_wells);
@@ -1870,21 +1965,6 @@
 	return 0;
 }
 
-static void intel_runtime_pm_disable(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = dev_priv->dev;
-	struct device *device = &dev->pdev->dev;
-
-	if (!HAS_RUNTIME_PM(dev))
-		return;
-
-	if (!intel_enable_rc6(dev))
-		return;
-
-	/* Make sure we're not suspended first. */
-	pm_runtime_get_sync(device);
-}
-
 /**
  * intel_power_domains_fini - finalizes the power domain structures
  * @dev_priv: i915 device instance
@@ -1895,15 +1975,17 @@
  */
 void intel_power_domains_fini(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_disable(dev_priv);
-
 	/* The i915.ko module is still not prepared to be loaded when
 	 * the power well is not enabled, so just enable it in case
 	 * we're going to unload/reload. */
 	intel_display_set_init_power(dev_priv, true);
+
+	/* Remove the refcount we took to keep power well support disabled. */
+	if (!i915.disable_power_well)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 }
 
-static void intel_power_domains_resume(struct drm_i915_private *dev_priv)
+static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 	struct i915_power_well *power_well;
@@ -1918,6 +2000,47 @@
 	mutex_unlock(&power_domains->lock);
 }
 
+static void skl_display_core_init(struct drm_i915_private *dev_priv,
+				  bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	uint32_t val;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* enable PCH reset handshake */
+	val = I915_READ(HSW_NDE_RSTWRN_OPT);
+	I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
+
+	/* enable PG1 and Misc I/O */
+	mutex_lock(&power_domains->lock);
+	skl_pw1_misc_io_init(dev_priv);
+	mutex_unlock(&power_domains->lock);
+
+	if (!resume)
+		return;
+
+	skl_init_cdclk(dev_priv);
+
+	if (dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+static void skl_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	skl_uninit_cdclk(dev_priv);
+
+	/* The spec doesn't call for removing the reset handshake flag */
+	/* disable PG1 and Misc I/O */
+	mutex_lock(&power_domains->lock);
+	skl_pw1_misc_io_fini(dev_priv);
+	mutex_unlock(&power_domains->lock);
+}
+
 static void chv_phy_control_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_well *cmn_bc =
@@ -2040,14 +2163,16 @@
  * This function initializes the hardware power domain state and enables all
  * power domains using intel_display_set_init_power().
  */
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
 	power_domains->initializing = true;
 
-	if (IS_CHERRYVIEW(dev)) {
+	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
+		skl_display_core_init(dev_priv, resume);
+	} else if (IS_CHERRYVIEW(dev)) {
 		mutex_lock(&power_domains->lock);
 		chv_phy_control_init(dev_priv);
 		mutex_unlock(&power_domains->lock);
@@ -2059,38 +2184,31 @@
 
 	/* For now, we need the power well to be always enabled. */
 	intel_display_set_init_power(dev_priv, true);
-	intel_power_domains_resume(dev_priv);
+	/* Disable power support if the user asked so. */
+	if (!i915.disable_power_well)
+		intel_display_power_get(dev_priv, POWER_DOMAIN_INIT);
+	intel_power_domains_sync_hw(dev_priv);
 	power_domains->initializing = false;
 }
 
 /**
- * intel_aux_display_runtime_get - grab an auxiliary power domain reference
+ * intel_power_domains_suspend - suspend power domain state
  * @dev_priv: i915 device instance
  *
- * This function grabs a power domain reference for the auxiliary power domain
- * (for access to the GMBUS and DP AUX blocks) and ensures that it and all its
- * parents are powered up. Therefore users should only grab a reference to the
- * innermost power domain they need.
- *
- * Any power domain reference obtained by this function must have a symmetric
- * call to intel_aux_display_runtime_put() to release the reference again.
+ * This function prepares the hardware power domain state before entering
+ * system suspend. It must be paired with intel_power_domains_init_hw().
  */
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
+void intel_power_domains_suspend(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_get(dev_priv);
-}
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		skl_display_core_uninit(dev_priv);
 
-/**
- * intel_aux_display_runtime_put - release an auxiliary power domain reference
- * @dev_priv: i915 device instance
- *
- * This function drops the auxiliary power domain reference obtained by
- * intel_aux_display_runtime_get() and might power down the corresponding
- * hardware block right away if this is the last reference.
- */
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
-{
-	intel_runtime_pm_put(dev_priv);
+	/*
+	 * Even if power well support was disabled we still want to disable
+	 * power wells while we are system suspended.
+	 */
+	if (!i915.disable_power_well)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index c42b636..2e1da06 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -74,7 +74,7 @@
 	struct i2c_adapter ddc;
 
 	/* Register for the SDVO device: SDVOB or SDVOC */
-	uint32_t sdvo_reg;
+	i915_reg_t sdvo_reg;
 
 	/* Active outputs controlled by this SDVO output */
 	uint16_t controlled_output;
@@ -120,8 +120,7 @@
 	 */
 	bool is_tv;
 
-	/* On different gens SDVOB is at different places. */
-	bool is_sdvob;
+	enum port port;
 
 	/* This is for current tv format name */
 	int tv_format_index;
@@ -245,7 +244,7 @@
 	u32 bval = val, cval = val;
 	int i;
 
-	if (intel_sdvo->sdvo_reg == PCH_SDVOB) {
+	if (HAS_PCH_SPLIT(dev_priv)) {
 		I915_WRITE(intel_sdvo->sdvo_reg, val);
 		POSTING_READ(intel_sdvo->sdvo_reg);
 		/*
@@ -259,7 +258,7 @@
 		return;
 	}
 
-	if (intel_sdvo->sdvo_reg == GEN3_SDVOB)
+	if (intel_sdvo->port == PORT_B)
 		cval = I915_READ(GEN3_SDVOC);
 	else
 		bval = I915_READ(GEN3_SDVOB);
@@ -422,7 +421,7 @@
 	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
 };
 
-#define SDVO_NAME(svdo) ((svdo)->is_sdvob ? "SDVOB" : "SDVOC")
+#define SDVO_NAME(svdo) ((svdo)->port == PORT_B ? "SDVOB" : "SDVOC")
 
 static void intel_sdvo_debug_write(struct intel_sdvo *intel_sdvo, u8 cmd,
 				   const void *args, int args_len)
@@ -1282,14 +1281,10 @@
 			sdvox |= SDVO_BORDER_ENABLE;
 	} else {
 		sdvox = I915_READ(intel_sdvo->sdvo_reg);
-		switch (intel_sdvo->sdvo_reg) {
-		case GEN3_SDVOB:
+		if (intel_sdvo->port == PORT_B)
 			sdvox &= SDVOB_PRESERVE_MASK;
-			break;
-		case GEN3_SDVOC:
+		else
 			sdvox &= SDVOC_PRESERVE_MASK;
-			break;
-		}
 		sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
 	}
 
@@ -1464,12 +1459,23 @@
 	 * matching DP port to be enabled on transcoder A.
 	 */
 	if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+		/*
+		 * We get CPU/PCH FIFO underruns on the other pipe when
+		 * doing the workaround. Sweep them under the rug.
+		 */
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, false);
+
 		temp &= ~SDVO_PIPE_B_SELECT;
 		temp |= SDVO_ENABLE;
 		intel_sdvo_write_sdvox(intel_sdvo, temp);
 
 		temp &= ~SDVO_ENABLE;
 		intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+		intel_wait_for_vblank_if_active(dev_priv->dev, PIPE_A);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true);
+		intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true);
 	}
 }
 
@@ -2251,7 +2257,7 @@
 {
 	struct sdvo_device_mapping *mapping;
 
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		mapping = &(dev_priv->sdvo_mappings[0]);
 	else
 		mapping = &(dev_priv->sdvo_mappings[1]);
@@ -2269,7 +2275,7 @@
 	struct sdvo_device_mapping *mapping;
 	u8 pin;
 
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		mapping = &dev_priv->sdvo_mappings[0];
 	else
 		mapping = &dev_priv->sdvo_mappings[1];
@@ -2307,7 +2313,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct sdvo_device_mapping *my_mapping, *other_mapping;
 
-	if (sdvo->is_sdvob) {
+	if (sdvo->port == PORT_B) {
 		my_mapping = &dev_priv->sdvo_mappings[0];
 		other_mapping = &dev_priv->sdvo_mappings[1];
 	} else {
@@ -2332,7 +2338,7 @@
 	/* No SDVO device info is found for another DVO port,
 	 * so use mapping assumption we had before BIOS parsing.
 	 */
-	if (sdvo->is_sdvob)
+	if (sdvo->port == PORT_B)
 		return 0x70;
 	else
 		return 0x72;
@@ -2939,18 +2945,31 @@
 	return i2c_add_adapter(&sdvo->ddc) == 0;
 }
 
-bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob)
+static void assert_sdvo_port_valid(const struct drm_i915_private *dev_priv,
+				   enum port port)
+{
+	if (HAS_PCH_SPLIT(dev_priv))
+		WARN_ON(port != PORT_B);
+	else
+		WARN_ON(port != PORT_B && port != PORT_C);
+}
+
+bool intel_sdvo_init(struct drm_device *dev,
+		     i915_reg_t sdvo_reg, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_encoder *intel_encoder;
 	struct intel_sdvo *intel_sdvo;
 	int i;
+
+	assert_sdvo_port_valid(dev_priv, port);
+
 	intel_sdvo = kzalloc(sizeof(*intel_sdvo), GFP_KERNEL);
 	if (!intel_sdvo)
 		return false;
 
 	intel_sdvo->sdvo_reg = sdvo_reg;
-	intel_sdvo->is_sdvob = is_sdvob;
+	intel_sdvo->port = port;
 	intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(dev, intel_sdvo) >> 1;
 	intel_sdvo_select_i2c_bus(dev_priv, intel_sdvo);
 	if (!intel_sdvo_init_ddc_proxy(intel_sdvo, dev))
@@ -2959,7 +2978,8 @@
 	/* encoder type will be decided later */
 	intel_encoder = &intel_sdvo->base;
 	intel_encoder->type = INTEL_OUTPUT_SDVO;
-	drm_encoder_init(dev, &intel_encoder->base, &intel_sdvo_enc_funcs, 0);
+	drm_encoder_init(dev, &intel_encoder->base, &intel_sdvo_enc_funcs, 0,
+			 NULL);
 
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
@@ -3000,8 +3020,10 @@
 	 * hotplug lines.
 	 */
 	if (intel_sdvo->hotplug_active) {
-		intel_encoder->hpd_pin =
-			intel_sdvo->is_sdvob ?  HPD_SDVO_B : HPD_SDVO_C;
+		if (intel_sdvo->port == PORT_B)
+			intel_encoder->hpd_pin = HPD_SDVO_B;
+		else
+			intel_encoder->hpd_pin = HPD_SDVO_C;
 	}
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 56dc132..dbf4213 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -192,10 +192,9 @@
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
 	u32 plane_ctl, stride_div, stride;
-	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
 	const struct drm_intel_sprite_colorkey *key =
 		&to_intel_plane_state(drm_plane->state)->ckey;
-	unsigned long surf_addr;
+	u32 surf_addr;
 	u32 tile_height, plane_offset, plane_size;
 	unsigned int rotation;
 	int x_offset, y_offset;
@@ -212,10 +211,6 @@
 	rotation = drm_plane->state->rotation;
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	intel_update_sprite_watermarks(drm_plane, crtc, src_w, src_h,
-				       pixel_size, true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
 					       fb->pixel_format);
 
@@ -297,8 +292,6 @@
 
 	I915_WRITE(PLANE_SURF(pipe, plane), 0);
 	POSTING_READ(PLANE_SURF(pipe, plane));
-
-	intel_update_sprite_watermarks(dplane, crtc, 0, 0, 0, false, false);
 }
 
 static void
@@ -541,10 +534,6 @@
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		sprctl |= SPRITE_PIPE_CSC_ENABLE;
 
-	intel_update_sprite_watermarks(plane, crtc, src_w, src_h, pixel_size,
-				       true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -678,10 +667,6 @@
 	if (IS_GEN6(dev))
 		dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */
 
-	intel_update_sprite_watermarks(plane, crtc, src_w, src_h,
-				       pixel_size, true,
-				       src_w != crtc_w || src_h != crtc_h);
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -832,8 +817,8 @@
 		hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
 		if (hscale < 0) {
 			DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n");
-			drm_rect_debug_print(src, true);
-			drm_rect_debug_print(dst, false);
+			drm_rect_debug_print("src: ", src, true);
+			drm_rect_debug_print("dst: ", dst, false);
 
 			return hscale;
 		}
@@ -841,8 +826,8 @@
 		vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
 		if (vscale < 0) {
 			DRM_DEBUG_KMS("Vertical scaling factor out of limits\n");
-			drm_rect_debug_print(src, true);
-			drm_rect_debug_print(dst, false);
+			drm_rect_debug_print("src: ", src, true);
+			drm_rect_debug_print("dst: ", dst, false);
 
 			return vscale;
 		}
@@ -938,9 +923,6 @@
 
 	crtc = crtc ? crtc : plane->crtc;
 
-	if (!crtc->state->active)
-		return;
-
 	if (state->visible) {
 		intel_plane->update_plane(plane, crtc, fb,
 					  state->dst.x1, state->dst.y1,
@@ -1141,7 +1123,7 @@
 	ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs,
 				       &intel_plane_funcs,
 				       plane_formats, num_plane_formats,
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (ret) {
 		kfree(intel_plane);
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6bea789..948cbff 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1645,7 +1645,7 @@
 			   DRM_MODE_CONNECTOR_SVIDEO);
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_tv_enc_funcs,
-			 DRM_MODE_ENCODER_TVDAC);
+			 DRM_MODE_ENCODER_TVDAC, NULL);
 
 	intel_encoder->compute_config = intel_tv_compute_config;
 	intel_encoder->get_config = intel_tv_get_config;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43cba12..c2358ba 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,19 +29,7 @@
 
 #define FORCEWAKE_ACK_TIMEOUT_MS 50
 
-#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
-#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__))
-#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
-#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__))
-#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__))
-
-#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
+#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32((dev_priv__), (reg__))
 
 static const char * const forcewake_domain_names[] = {
 	"render",
@@ -72,7 +60,7 @@
 static inline void
 fw_domain_reset(const struct intel_uncore_forcewake_domain *d)
 {
-	WARN_ON(d->reg_set == 0);
+	WARN_ON(!i915_mmio_reg_valid(d->reg_set));
 	__raw_i915_write32(d->i915, d->reg_set, d->val_reset);
 }
 
@@ -118,7 +106,7 @@
 fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d)
 {
 	/* something from same cacheline, but not from the set register */
-	if (d->reg_post)
+	if (i915_mmio_reg_valid(d->reg_post))
 		__raw_posting_read(d->i915, d->reg_post);
 }
 
@@ -525,8 +513,7 @@
 }
 
 /* We give fast paths for the really cool registers */
-#define NEEDS_FORCE_WAKE(reg) \
-	 ((reg) < 0x40000 && (reg) != FORCEWAKE)
+#define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000)
 
 #define REG_RANGE(reg, start, end) ((reg) >= (start) && (reg) < (end))
 
@@ -589,7 +576,7 @@
 	REG_RANGE((reg), 0x9400, 0x9800)
 
 #define FORCEWAKE_GEN9_BLITTER_RANGE_OFFSET(reg) \
-	((reg) < 0x40000 &&\
+	((reg) < 0x40000 && \
 	 !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg) && \
 	 !FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg) && \
 	 !FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg) && \
@@ -605,8 +592,8 @@
 }
 
 static void
-hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
-			bool before)
+hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv,
+			i915_reg_t reg, bool read, bool before)
 {
 	const char *op = read ? "reading" : "writing to";
 	const char *when = before ? "before" : "after";
@@ -616,7 +603,7 @@
 
 	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
 		WARN(1, "Unclaimed register detected %s %s register 0x%x\n",
-		     when, op, reg);
+		     when, op, i915_mmio_reg_offset(reg));
 		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
 		i915.mmio_debug--; /* Only report the first N failures */
 	}
@@ -649,7 +636,7 @@
 
 #define __gen2_read(x) \
 static u##x \
-gen2_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen2_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN2_READ_HEADER(x); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN2_READ_FOOTER; \
@@ -657,7 +644,7 @@
 
 #define __gen5_read(x) \
 static u##x \
-gen5_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen5_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN2_READ_HEADER(x); \
 	ilk_dummy_write(dev_priv); \
 	val = __raw_i915_read##x(dev_priv, reg); \
@@ -680,6 +667,7 @@
 #undef GEN2_READ_HEADER
 
 #define GEN6_READ_HEADER(x) \
+	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	u##x val = 0; \
 	assert_device_not_suspended(dev_priv); \
@@ -714,20 +702,12 @@
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
 }
 
-#define __vgpu_read(x) \
-static u##x \
-vgpu_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
-	GEN6_READ_HEADER(x); \
-	val = __raw_i915_read##x(dev_priv, reg); \
-	GEN6_READ_FOOTER; \
-}
-
 #define __gen6_read(x) \
 static u##x \
-gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN6_READ_HEADER(x); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
-	if (NEEDS_FORCE_WAKE(reg)) \
+	if (NEEDS_FORCE_WAKE(offset)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, false); \
@@ -736,47 +716,56 @@
 
 #define __vlv_read(x) \
 static u##x \
-vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+vlv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_READ_HEADER(x); \
-	if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
 
 #define __chv_read(x) \
 static u##x \
-chv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_READ_HEADER(x); \
-	if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
-	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(reg)) \
-		__force_wake_get(dev_priv, \
-				 FORCEWAKE_RENDER | FORCEWAKE_MEDIA); \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
 
 #define SKL_NEEDS_FORCE_WAKE(reg) \
-	 ((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
+	((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
 
 #define __gen9_read(x) \
 static u##x \
-gen9_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
-	if (!SKL_NEEDS_FORCE_WAKE(reg)) \
+	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
 		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
 	else \
 		fw_engine = FORCEWAKE_BLITTER; \
@@ -787,10 +776,6 @@
 	GEN6_READ_FOOTER; \
 }
 
-__vgpu_read(8)
-__vgpu_read(16)
-__vgpu_read(32)
-__vgpu_read(64)
 __gen9_read(8)
 __gen9_read(16)
 __gen9_read(32)
@@ -812,10 +797,37 @@
 #undef __chv_read
 #undef __vlv_read
 #undef __gen6_read
-#undef __vgpu_read
 #undef GEN6_READ_FOOTER
 #undef GEN6_READ_HEADER
 
+#define VGPU_READ_HEADER(x) \
+	unsigned long irqflags; \
+	u##x val = 0; \
+	assert_device_not_suspended(dev_priv); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+
+#define VGPU_READ_FOOTER \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
+	return val
+
+#define __vgpu_read(x) \
+static u##x \
+vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	VGPU_READ_HEADER(x); \
+	val = __raw_i915_read##x(dev_priv, reg); \
+	VGPU_READ_FOOTER; \
+}
+
+__vgpu_read(8)
+__vgpu_read(16)
+__vgpu_read(32)
+__vgpu_read(64)
+
+#undef __vgpu_read
+#undef VGPU_READ_FOOTER
+#undef VGPU_READ_HEADER
+
 #define GEN2_WRITE_HEADER \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
 	assert_device_not_suspended(dev_priv); \
@@ -824,7 +836,7 @@
 
 #define __gen2_write(x) \
 static void \
-gen2_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen2_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN2_WRITE_HEADER; \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN2_WRITE_FOOTER; \
@@ -832,7 +844,7 @@
 
 #define __gen5_write(x) \
 static void \
-gen5_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen5_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN2_WRITE_HEADER; \
 	ilk_dummy_write(dev_priv); \
 	__raw_i915_write##x(dev_priv, reg, val); \
@@ -855,6 +867,7 @@
 #undef GEN2_WRITE_HEADER
 
 #define GEN6_WRITE_HEADER \
+	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
 	assert_device_not_suspended(dev_priv); \
@@ -865,10 +878,10 @@
 
 #define __gen6_write(x) \
 static void \
-gen6_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(reg)) { \
+	if (NEEDS_FORCE_WAKE(offset)) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
 	__raw_i915_write##x(dev_priv, reg, val); \
@@ -880,10 +893,10 @@
 
 #define __hsw_write(x) \
 static void \
-hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+hsw_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	u32 __fifo_ret = 0; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(reg)) { \
+	if (NEEDS_FORCE_WAKE(offset)) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
@@ -896,15 +909,7 @@
 	GEN6_WRITE_FOOTER; \
 }
 
-#define __vgpu_write(x) \
-static void vgpu_write##x(struct drm_i915_private *dev_priv, \
-			  off_t reg, u##x val, bool trace) { \
-	GEN6_WRITE_HEADER; \
-	__raw_i915_write##x(dev_priv, reg, val); \
-	GEN6_WRITE_FOOTER; \
-}
-
-static const u32 gen8_shadowed_regs[] = {
+static const i915_reg_t gen8_shadowed_regs[] = {
 	FORCEWAKE_MT,
 	GEN6_RPNSWREQ,
 	GEN6_RC_VIDEO_FREQ,
@@ -915,11 +920,12 @@
 	/* TODO: Other registers are not yet used */
 };
 
-static bool is_gen8_shadowed(struct drm_i915_private *dev_priv, u32 reg)
+static bool is_gen8_shadowed(struct drm_i915_private *dev_priv,
+			     i915_reg_t reg)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++)
-		if (reg == gen8_shadowed_regs[i])
+		if (i915_mmio_reg_equal(reg, gen8_shadowed_regs[i]))
 			return true;
 
 	return false;
@@ -927,10 +933,10 @@
 
 #define __gen8_write(x) \
 static void \
-gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
+gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN6_WRITE_HEADER; \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
-	if (reg < 0x40000 && !is_gen8_shadowed(dev_priv, reg)) \
+	if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(dev_priv, reg)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
@@ -940,22 +946,25 @@
 
 #define __chv_write(x) \
 static void \
-chv_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \
-	bool shadowed = is_gen8_shadowed(dev_priv, reg); \
+chv_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
+	enum forcewake_domains fw_engine = 0; \
 	GEN6_WRITE_HEADER; \
-	if (!shadowed) { \
-		if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
-		else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_MEDIA); \
-		else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(reg)) \
-			__force_wake_get(dev_priv, FORCEWAKE_RENDER | FORCEWAKE_MEDIA); \
-	} \
+	if (!NEEDS_FORCE_WAKE(offset) || \
+	    is_gen8_shadowed(dev_priv, reg)) \
+		fw_engine = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	if (fw_engine) \
+		__force_wake_get(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN6_WRITE_FOOTER; \
 }
 
-static const u32 gen9_shadowed_regs[] = {
+static const i915_reg_t gen9_shadowed_regs[] = {
 	RING_TAIL(RENDER_RING_BASE),
 	RING_TAIL(GEN6_BSD_RING_BASE),
 	RING_TAIL(VEBOX_RING_BASE),
@@ -968,11 +977,12 @@
 	/* TODO: Other registers are not yet used */
 };
 
-static bool is_gen9_shadowed(struct drm_i915_private *dev_priv, u32 reg)
+static bool is_gen9_shadowed(struct drm_i915_private *dev_priv,
+			     i915_reg_t reg)
 {
 	int i;
 	for (i = 0; i < ARRAY_SIZE(gen9_shadowed_regs); i++)
-		if (reg == gen9_shadowed_regs[i])
+		if (i915_mmio_reg_equal(reg, gen9_shadowed_regs[i]))
 			return true;
 
 	return false;
@@ -980,19 +990,19 @@
 
 #define __gen9_write(x) \
 static void \
-gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
+gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 		bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
-	if (!SKL_NEEDS_FORCE_WAKE(reg) || \
+	if (!SKL_NEEDS_FORCE_WAKE(offset) || \
 	    is_gen9_shadowed(dev_priv, reg)) \
 		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(reg)) \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
 		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
 	else \
 		fw_engine = FORCEWAKE_BLITTER; \
@@ -1024,20 +1034,41 @@
 __gen6_write(16)
 __gen6_write(32)
 __gen6_write(64)
-__vgpu_write(8)
-__vgpu_write(16)
-__vgpu_write(32)
-__vgpu_write(64)
 
 #undef __gen9_write
 #undef __chv_write
 #undef __gen8_write
 #undef __hsw_write
 #undef __gen6_write
-#undef __vgpu_write
 #undef GEN6_WRITE_FOOTER
 #undef GEN6_WRITE_HEADER
 
+#define VGPU_WRITE_HEADER \
+	unsigned long irqflags; \
+	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
+	assert_device_not_suspended(dev_priv); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+
+#define VGPU_WRITE_FOOTER \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags)
+
+#define __vgpu_write(x) \
+static void vgpu_write##x(struct drm_i915_private *dev_priv, \
+			  i915_reg_t reg, u##x val, bool trace) { \
+	VGPU_WRITE_HEADER; \
+	__raw_i915_write##x(dev_priv, reg, val); \
+	VGPU_WRITE_FOOTER; \
+}
+
+__vgpu_write(8)
+__vgpu_write(16)
+__vgpu_write(32)
+__vgpu_write(64)
+
+#undef __vgpu_write
+#undef VGPU_WRITE_FOOTER
+#undef VGPU_WRITE_HEADER
+
 #define ASSIGN_WRITE_MMIO_VFUNCS(x) \
 do { \
 	dev_priv->uncore.funcs.mmio_writeb = x##_write8; \
@@ -1057,7 +1088,8 @@
 
 static void fw_domain_init(struct drm_i915_private *dev_priv,
 			   enum forcewake_domain_id domain_id,
-			   u32 reg_set, u32 reg_ack)
+			   i915_reg_t reg_set,
+			   i915_reg_t reg_ack)
 {
 	struct intel_uncore_forcewake_domain *d;
 
@@ -1087,8 +1119,6 @@
 		d->reg_post = FORCEWAKE_ACK_VLV;
 	else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv))
 		d->reg_post = ECOBUS;
-	else
-		d->reg_post = 0;
 
 	d->i915 = dev_priv;
 	d->id = domain_id;
@@ -1262,12 +1292,14 @@
 #define GEN_RANGE(l, h) GENMASK(h, l)
 
 static const struct register_whitelist {
-	uint64_t offset;
+	i915_reg_t offset_ldw, offset_udw;
 	uint32_t size;
 	/* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
 	uint32_t gen_bitmask;
 } whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, GEN_RANGE(4, 9) },
+	{ .offset_ldw = RING_TIMESTAMP(RENDER_RING_BASE),
+	  .offset_udw = RING_TIMESTAMP_UDW(RENDER_RING_BASE),
+	  .size = 8, .gen_bitmask = GEN_RANGE(4, 9) },
 };
 
 int i915_reg_read_ioctl(struct drm_device *dev,
@@ -1277,11 +1309,11 @@
 	struct drm_i915_reg_read *reg = data;
 	struct register_whitelist const *entry = whitelist;
 	unsigned size;
-	u64 offset;
+	i915_reg_t offset_ldw, offset_udw;
 	int i, ret = 0;
 
 	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == (reg->offset & -entry->size) &&
+		if (i915_mmio_reg_offset(entry->offset_ldw) == (reg->offset & -entry->size) &&
 		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
 			break;
 	}
@@ -1293,27 +1325,28 @@
 	 * be naturally aligned (and those that are not so aligned merely
 	 * limit the available flags for that register).
 	 */
-	offset = entry->offset;
+	offset_ldw = entry->offset_ldw;
+	offset_udw = entry->offset_udw;
 	size = entry->size;
-	size |= reg->offset ^ offset;
+	size |= reg->offset ^ i915_mmio_reg_offset(offset_ldw);
 
 	intel_runtime_pm_get(dev_priv);
 
 	switch (size) {
 	case 8 | 1:
-		reg->val = I915_READ64_2x32(offset, offset+4);
+		reg->val = I915_READ64_2x32(offset_ldw, offset_udw);
 		break;
 	case 8:
-		reg->val = I915_READ64(offset);
+		reg->val = I915_READ64(offset_ldw);
 		break;
 	case 4:
-		reg->val = I915_READ(offset);
+		reg->val = I915_READ(offset_ldw);
 		break;
 	case 2:
-		reg->val = I915_READ16(offset);
+		reg->val = I915_READ16(offset_ldw);
 		break;
 	case 1:
-		reg->val = I915_READ8(offset);
+		reg->val = I915_READ8(offset_ldw);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1470,7 +1503,7 @@
 }
 
 static int wait_for_register(struct drm_i915_private *dev_priv,
-			     const u32 reg,
+			     i915_reg_t reg,
 			     const u32 mask,
 			     const u32 value,
 			     const unsigned long timeout_ms)
diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig
index 2b81a41..35ca4f0 100644
--- a/drivers/gpu/drm/imx/Kconfig
+++ b/drivers/gpu/drm/imx/Kconfig
@@ -10,15 +10,6 @@
 	help
 	  enable i.MX graphics support
 
-config DRM_IMX_FB_HELPER
-	tristate "provide legacy framebuffer /dev/fb0"
-	select DRM_KMS_CMA_HELPER
-	depends on DRM_IMX
-	help
-	  The DRM framework can provide a legacy /dev/fb0 framebuffer
-	  for your device. This is necessary to get a framebuffer console
-	  and also for applications using the legacy framebuffer API
-
 config DRM_IMX_PARALLEL_DISPLAY
 	tristate "Support for parallel displays"
 	select DRM_PANEL
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index 98605ea..35fcf6b 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -251,7 +251,7 @@
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_imx_encoder_helper_funcs);
 	drm_encoder_init(drm, encoder, &dw_hdmi_imx_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
 }
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 64f16ea..09e20ea 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -49,8 +49,10 @@
 	struct imx_drm_crtc_helper_funcs	imx_drm_helper_funcs;
 };
 
+#if IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION)
 static int legacyfb_depth = 16;
 module_param(legacyfb_depth, int, 0444);
+#endif
 
 int imx_drm_crtc_id(struct imx_drm_crtc *crtc)
 {
@@ -60,26 +62,19 @@
 
 static void imx_drm_driver_lastclose(struct drm_device *drm)
 {
-#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
 	struct imx_drm_device *imxdrm = drm->dev_private;
 
-	if (imxdrm->fbhelper)
-		drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
-#endif
+	drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
 }
 
 static int imx_drm_driver_unload(struct drm_device *drm)
 {
-#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
 	struct imx_drm_device *imxdrm = drm->dev_private;
-#endif
 
 	drm_kms_helper_poll_fini(drm);
 
-#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
 	if (imxdrm->fbhelper)
 		drm_fbdev_cma_fini(imxdrm->fbhelper);
-#endif
 
 	component_unbind_all(drm->dev, drm);
 
@@ -215,11 +210,9 @@
 
 static void imx_drm_output_poll_changed(struct drm_device *drm)
 {
-#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
 	struct imx_drm_device *imxdrm = drm->dev_private;
 
 	drm_fbdev_cma_hotplug_event(imxdrm->fbhelper);
-#endif
 }
 
 static struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
@@ -308,7 +301,7 @@
 	 * The fb helper takes copies of key hardware information, so the
 	 * crtcs/connectors/encoders must not change after this point.
 	 */
-#if IS_ENABLED(CONFIG_DRM_IMX_FB_HELPER)
+#if IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION)
 	if (legacyfb_depth != 16 && legacyfb_depth != 32) {
 		dev_warn(drm->dev, "Invalid legacyfb_depth.  Defaulting to 16bpp\n");
 		legacyfb_depth = 16;
@@ -340,7 +333,7 @@
  * imx_drm_add_crtc - add a new crtc
  */
 int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
-		struct imx_drm_crtc **new_crtc,
+		struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
 		const struct imx_drm_crtc_helper_funcs *imx_drm_helper_funcs,
 		struct device_node *port)
 {
@@ -379,8 +372,8 @@
 	drm_crtc_helper_add(crtc,
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs);
 
-	drm_crtc_init(drm, crtc,
-			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs);
+	drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
+			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs, NULL);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 28e776d..83284b4 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -9,6 +9,7 @@
 struct drm_encoder;
 struct drm_fbdev_cma;
 struct drm_framebuffer;
+struct drm_plane;
 struct imx_drm_crtc;
 struct platform_device;
 
@@ -24,7 +25,7 @@
 };
 
 int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
-		struct imx_drm_crtc **new_crtc,
+		struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
 		const struct imx_drm_crtc_helper_funcs *imx_helper_funcs,
 		struct device_node *port);
 int imx_drm_remove_crtc(struct imx_drm_crtc *);
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index abacc8f..c79a61b 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -422,7 +422,7 @@
 	drm_encoder_helper_add(&imx_ldb_ch->encoder,
 			&imx_ldb_encoder_helper_funcs);
 	drm_encoder_init(drm, &imx_ldb_ch->encoder, &imx_ldb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	drm_connector_helper_add(&imx_ldb_ch->connector,
 			&imx_ldb_connector_helper_funcs);
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index e671ad3..e61a8fc 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -508,7 +508,7 @@
 
 	drm_encoder_helper_add(&tve->encoder, &imx_tve_encoder_helper_funcs);
 	drm_encoder_init(drm, &tve->encoder, &imx_tve_encoder_funcs,
-			 encoder_type);
+			 encoder_type, NULL);
 
 	drm_connector_helper_add(&tve->connector,
 			&imx_tve_connector_helper_funcs);
@@ -721,6 +721,7 @@
 	{ .compatible = "fsl,imx53-tve", },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, imx_tve_dt_ids);
 
 static struct platform_driver imx_tve_driver = {
 	.probe		= imx_tve_probe,
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 7bc8301..4ab841e 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -212,7 +212,8 @@
 
 	spin_lock_irqsave(&drm->event_lock, flags);
 	if (ipu_crtc->page_flip_event)
-		drm_send_vblank_event(drm, -1, ipu_crtc->page_flip_event);
+		drm_crtc_send_vblank_event(&ipu_crtc->base,
+					   ipu_crtc->page_flip_event);
 	ipu_crtc->page_flip_event = NULL;
 	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
 	spin_unlock_irqrestore(&drm->event_lock, flags);
@@ -349,7 +350,6 @@
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 	int dp = -EINVAL;
 	int ret;
-	int id;
 
 	ret = ipu_get_resources(ipu_crtc, pdata);
 	if (ret) {
@@ -358,18 +358,23 @@
 		return ret;
 	}
 
+	if (pdata->dp >= 0)
+		dp = IPU_DP_FLOW_SYNC_BG;
+	ipu_crtc->plane[0] = ipu_plane_init(drm, ipu, pdata->dma[0], dp, 0,
+					    DRM_PLANE_TYPE_PRIMARY);
+	if (IS_ERR(ipu_crtc->plane[0])) {
+		ret = PTR_ERR(ipu_crtc->plane[0]);
+		goto err_put_resources;
+	}
+
 	ret = imx_drm_add_crtc(drm, &ipu_crtc->base, &ipu_crtc->imx_crtc,
-			&ipu_crtc_helper_funcs, ipu_crtc->dev->of_node);
+			&ipu_crtc->plane[0]->base, &ipu_crtc_helper_funcs,
+			ipu_crtc->dev->of_node);
 	if (ret) {
 		dev_err(ipu_crtc->dev, "adding crtc failed with %d.\n", ret);
 		goto err_put_resources;
 	}
 
-	if (pdata->dp >= 0)
-		dp = IPU_DP_FLOW_SYNC_BG;
-	id = imx_drm_crtc_id(ipu_crtc->imx_crtc);
-	ipu_crtc->plane[0] = ipu_plane_init(ipu_crtc->base.dev, ipu,
-					    pdata->dma[0], dp, BIT(id), true);
 	ret = ipu_plane_get_resources(ipu_crtc->plane[0]);
 	if (ret) {
 		dev_err(ipu_crtc->dev, "getting plane 0 resources failed with %d.\n",
@@ -379,10 +384,10 @@
 
 	/* If this crtc is using the DP, add an overlay plane */
 	if (pdata->dp >= 0 && pdata->dma[1] > 0) {
-		ipu_crtc->plane[1] = ipu_plane_init(ipu_crtc->base.dev, ipu,
-						    pdata->dma[1],
-						    IPU_DP_FLOW_SYNC_FG,
-						    BIT(id), false);
+		ipu_crtc->plane[1] = ipu_plane_init(drm, ipu, pdata->dma[1],
+						IPU_DP_FLOW_SYNC_FG,
+						drm_crtc_mask(&ipu_crtc->base),
+						DRM_PLANE_TYPE_OVERLAY);
 		if (IS_ERR(ipu_crtc->plane[1]))
 			ipu_crtc->plane[1] = NULL;
 	}
@@ -407,28 +412,6 @@
 	return ret;
 }
 
-static struct device_node *ipu_drm_get_port_by_id(struct device_node *parent,
-						  int port_id)
-{
-	struct device_node *port;
-	int id, ret;
-
-	port = of_get_child_by_name(parent, "port");
-	while (port) {
-		ret = of_property_read_u32(port, "reg", &id);
-		if (!ret && id == port_id)
-			return port;
-
-		do {
-			port = of_get_next_child(parent, port);
-			if (!port)
-				return NULL;
-		} while (of_node_cmp(port->name, "port"));
-	}
-
-	return NULL;
-}
-
 static int ipu_drm_bind(struct device *dev, struct device *master, void *data)
 {
 	struct ipu_client_platformdata *pdata = dev->platform_data;
@@ -470,23 +453,11 @@
 static int ipu_drm_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ipu_client_platformdata *pdata = dev->platform_data;
 	int ret;
 
 	if (!dev->platform_data)
 		return -EINVAL;
 
-	if (!dev->of_node) {
-		/* Associate crtc device with the corresponding DI port node */
-		dev->of_node = ipu_drm_get_port_by_id(dev->parent->of_node,
-						      pdata->di + 2);
-		if (!dev->of_node) {
-			dev_err(dev, "missing port@%d node in %s\n",
-				pdata->di + 2, dev->parent->of_node->full_name);
-			return -ENODEV;
-		}
-	}
-
 	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 575f4c8..591ba2f 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -381,7 +381,7 @@
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 				 int dma, int dp, unsigned int possible_crtcs,
-				 bool priv)
+				 enum drm_plane_type type)
 {
 	struct ipu_plane *ipu_plane;
 	int ret;
@@ -399,10 +399,10 @@
 	ipu_plane->dma = dma;
 	ipu_plane->dp_flow = dp;
 
-	ret = drm_plane_init(dev, &ipu_plane->base, possible_crtcs,
-			     &ipu_plane_funcs, ipu_plane_formats,
-			     ARRAY_SIZE(ipu_plane_formats),
-			     priv);
+	ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs,
+				       &ipu_plane_funcs, ipu_plane_formats,
+				       ARRAY_SIZE(ipu_plane_formats), type,
+				       NULL);
 	if (ret) {
 		DRM_ERROR("failed to initialize plane\n");
 		kfree(ipu_plane);
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 9b5eff1..3a443b4 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -34,7 +34,7 @@
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 				 int dma, int dp, unsigned int possible_crtcs,
-				 bool priv);
+				 enum drm_plane_type type);
 
 /* Init IDMAC, DMFC, DP */
 int ipu_plane_mode_set(struct ipu_plane *plane, struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index b4deb9c..fcbe4d2 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -54,7 +54,11 @@
 
 	if (imxpd->panel && imxpd->panel->funcs &&
 	    imxpd->panel->funcs->get_modes) {
+		struct drm_display_info *di = &connector->display_info;
+
 		num_modes = imxpd->panel->funcs->get_modes(imxpd->panel);
+		if (!imxpd->bus_format && di->num_bus_formats)
+			imxpd->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -188,7 +192,7 @@
 
 	drm_encoder_helper_add(&imxpd->encoder, &imx_pd_encoder_helper_funcs);
 	drm_encoder_init(drm, &imxpd->encoder, &imx_pd_encoder_funcs,
-			 DRM_MODE_ENCODER_NONE);
+			 DRM_MODE_ENCODER_NONE, NULL);
 
 	drm_connector_helper_add(&imxpd->connector,
 			&imx_pd_connector_helper_funcs);
diff --git a/drivers/gpu/drm/mgag200/mgag200_cursor.c b/drivers/gpu/drm/mgag200/mgag200_cursor.c
index 4f2068f..a7bf6a9 100644
--- a/drivers/gpu/drm/mgag200/mgag200_cursor.c
+++ b/drivers/gpu/drm/mgag200/mgag200_cursor.c
@@ -70,6 +70,11 @@
 	BUG_ON(pixels_2 != pixels_current && pixels_2 != pixels_prev);
 	BUG_ON(pixels_current == pixels_prev);
 
+	if (!handle || !file_priv) {
+		mga_hide_cursor(mdev);
+		return 0;
+	}
+
 	obj = drm_gem_object_lookup(dev, file_priv, handle);
 	if (!obj)
 		return -ENOENT;
@@ -88,12 +93,6 @@
 		goto out_unreserve1;
 	}
 
-	if (!handle) {
-		mga_hide_cursor(mdev);
-		ret = 0;
-		goto out1;
-	}
-
 	/* Move cursor buffers into VRAM if they aren't already */
 	if (!pixels_1->pin_count) {
 		ret = mgag200_bo_pin(pixels_1, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 912151c..205b280 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -252,7 +252,7 @@
 				/* mgag200_main.c */
 int mgag200_framebuffer_init(struct drm_device *dev,
 			     struct mga_framebuffer *mfb,
-			     struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
 			     struct drm_gem_object *obj);
 
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c
index b35b5b2..d9b04b0 100644
--- a/drivers/gpu/drm/mgag200/mgag200_fb.c
+++ b/drivers/gpu/drm/mgag200/mgag200_fb.c
@@ -138,7 +138,7 @@
 };
 
 static int mgag200fb_create_object(struct mga_fbdev *afbdev,
-				   struct drm_mode_fb_cmd2 *mode_cmd,
+				   const struct drm_mode_fb_cmd2 *mode_cmd,
 				   struct drm_gem_object **gobj_p)
 {
 	struct drm_device *dev = afbdev->helper.dev;
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index b1a0f56..9147444 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -29,7 +29,7 @@
 
 int mgag200_framebuffer_init(struct drm_device *dev,
 			     struct mga_framebuffer *gfb,
-			     struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
 			     struct drm_gem_object *obj)
 {
 	int ret;
@@ -47,7 +47,7 @@
 static struct drm_framebuffer *
 mgag200_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *filp,
-				struct drm_mode_fb_cmd2 *mode_cmd)
+				const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct mga_framebuffer *mga_fb;
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index c99d3fe..3180212 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1538,7 +1538,7 @@
 	encoder->possible_crtcs = 0x1;
 
 	drm_encoder_init(dev, encoder, &mga_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &mga_encoder_helper_funcs);
 
 	return encoder;
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 84d3ec9..215495c 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -54,3 +54,11 @@
 	default y
 	help
 	  Choose this option if the 20nm DSI PHY is used on the platform.
+
+config DRM_MSM_DSI_28NM_8960_PHY
+	bool "Enable DSI 28nm 8960 PHY driver in MSM DRM"
+	depends on DRM_MSM_DSI
+	default y
+	help
+	  Choose this option if the 28nm DSI PHY 8960 variant is used on the
+	  platform.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 1c90290..065ad41 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -54,6 +54,7 @@
 msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o
 
 msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \
+			mdp/mdp4/mdp4_dsi_encoder.o \
 			dsi/dsi_cfg.o \
 			dsi/dsi_host.o \
 			dsi/dsi_manager.o \
@@ -62,10 +63,12 @@
 
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/phy/dsi_phy_28nm.o
 msm-$(CONFIG_DRM_MSM_DSI_20NM_PHY) += dsi/phy/dsi_phy_20nm.o
+msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/phy/dsi_phy_28nm_8960.o
 
 ifeq ($(CONFIG_DRM_MSM_DSI_PLL),y)
 msm-y += dsi/pll/dsi_pll.o
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/pll/dsi_pll_28nm.o
+msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/pll/dsi_pll_28nm_8960.o
 endif
 
 obj-$(CONFIG_DRM_MSM)	+= msm.o
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 1ea2df5..950d27d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -19,10 +19,6 @@
 
 #include "adreno_gpu.h"
 
-#if defined(DOWNSTREAM_CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
-#  include <mach/kgsl.h>
-#endif
-
 #define ANY_ID 0xff
 
 bool hang_debug = false;
@@ -168,7 +164,6 @@
 static int adreno_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct adreno_platform_config config = {};
-#ifdef CONFIG_OF
 	struct device_node *child, *node = dev->of_node;
 	u32 val;
 	int ret;
@@ -205,53 +200,6 @@
 		return -ENXIO;
 	}
 
-#else
-	struct kgsl_device_platform_data *pdata = dev->platform_data;
-	uint32_t version = socinfo_get_version();
-	if (cpu_is_apq8064ab()) {
-		config.fast_rate = 450000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-		config.rev = ADRENO_REV(3, 2, 1, 0);
-	} else if (cpu_is_apq8064()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MAJOR(version) == 2)
-			config.rev = ADRENO_REV(3, 2, 0, 2);
-		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-				(SOCINFO_VERSION_MINOR(version) == 1))
-			config.rev = ADRENO_REV(3, 2, 0, 1);
-		else
-			config.rev = ADRENO_REV(3, 2, 0, 0);
-
-	} else if (cpu_is_msm8960ab()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 320000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MINOR(version) == 0)
-			config.rev = ADRENO_REV(3, 2, 1, 0);
-		else
-			config.rev = ADRENO_REV(3, 2, 1, 1);
-
-	} else if (cpu_is_msm8930()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 3;
-
-		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-			(SOCINFO_VERSION_MINOR(version) == 2))
-			config.rev = ADRENO_REV(3, 0, 5, 2);
-		else
-			config.rev = ADRENO_REV(3, 0, 5, 0);
-
-	}
-#  ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-	config.bus_scale_table = pdata->bus_scale_table;
-#  endif
-#endif
 	dev->platform_data = &config;
 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
 	return 0;
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 5f5a373..749fbb2 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -31,10 +31,12 @@
 	MSM_DSI_PHY_28NM_HPM,
 	MSM_DSI_PHY_28NM_LP,
 	MSM_DSI_PHY_20NM,
+	MSM_DSI_PHY_28NM_8960,
 	MSM_DSI_PHY_MAX
 };
 
 #define DSI_DEV_REGULATOR_MAX	8
+#define DSI_BUS_CLK_MAX		4
 
 /* Regulators for DSI devices */
 struct dsi_reg_entry {
@@ -89,7 +91,7 @@
 		u32 *clk_pre, u32 *clk_post);
 void msm_dsi_manager_phy_disable(int id);
 int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg);
-bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 iova, u32 len);
+bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len);
 int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
 void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
 
@@ -143,7 +145,7 @@
 int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host,
 					const struct mipi_dsi_msg *msg);
 void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host,
-					u32 iova, u32 len);
+					u32 dma_base, u32 len);
 int msm_dsi_host_enable(struct mipi_dsi_host *host);
 int msm_dsi_host_disable(struct mipi_dsi_host *host);
 int msm_dsi_host_power_on(struct mipi_dsi_host *host);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 5872d5e..2a827d8 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -13,9 +13,26 @@
 
 #include "dsi_cfg.h"
 
-/* DSI v2 has not been supported by now */
-static const struct msm_dsi_config dsi_v2_cfg = {
+static const char * const dsi_v2_bus_clk_names[] = {
+	"core_mmss_clk", "iface_clk", "bus_clk",
+};
+
+static const struct msm_dsi_config apq8064_dsi_cfg = {
 	.io_offset = 0,
+	.reg_cfg = {
+		.num = 3,
+		.regs = {
+			{"vdda", 1200000, 1200000, 100000, 100},
+			{"avdd", 3000000, 3000000, 110000, 100},
+			{"vddio", 1800000, 1800000, 100000, 100},
+		},
+	},
+	.bus_clk_names = dsi_v2_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_v2_bus_clk_names),
+};
+
+static const char * const dsi_6g_bus_clk_names[] = {
+	"mdp_core_clk", "iface_clk", "bus_clk", "core_mmss_clk",
 };
 
 static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = {
@@ -29,6 +46,12 @@
 			{"vddio", 1800000, 1800000, 100000, 100},
 		},
 	},
+	.bus_clk_names = dsi_6g_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+};
+
+static const char * const dsi_8916_bus_clk_names[] = {
+	"mdp_core_clk", "iface_clk", "bus_clk",
 };
 
 static const struct msm_dsi_config msm8916_dsi_cfg = {
@@ -42,6 +65,8 @@
 			{"vddio", 1800000, 1800000, 100000, 100},
 		},
 	},
+	.bus_clk_names = dsi_8916_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_8916_bus_clk_names),
 };
 
 static const struct msm_dsi_config msm8994_dsi_cfg = {
@@ -57,11 +82,13 @@
 			{"lab_reg", -1, -1, -1, -1},
 			{"ibb_reg", -1, -1, -1, -1},
 		},
-	}
+	},
+	.bus_clk_names = dsi_6g_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
 };
 
 static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
-	{MSM_DSI_VER_MAJOR_V2, U32_MAX, &dsi_v2_cfg},
+	{MSM_DSI_VER_MAJOR_V2, MSM_DSI_V2_VER_MINOR_8064, &apq8064_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_0,
 						&msm8974_apq8084_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_1,
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index 4cf8872..a68c836 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -25,11 +25,15 @@
 #define MSM_DSI_6G_VER_MINOR_V1_3	0x10030000
 #define MSM_DSI_6G_VER_MINOR_V1_3_1	0x10030001
 
+#define MSM_DSI_V2_VER_MINOR_8064	0x0
+
 #define DSI_6G_REG_SHIFT	4
 
 struct msm_dsi_config {
 	u32 io_offset;
 	struct dsi_reg_config reg_cfg;
+	const char * const *bus_clk_names;
+	const int num_bus_clks;
 };
 
 struct msm_dsi_cfg_handler {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 4c49868..48f9967 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -24,26 +24,36 @@
 #include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spinlock.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 #include <video/mipi_display.h>
 
 #include "dsi.h"
 #include "dsi.xml.h"
+#include "sfpb.xml.h"
 #include "dsi_cfg.h"
 
 static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
 {
 	u32 ver;
-	u32 ver_6g;
 
 	if (!major || !minor)
 		return -EINVAL;
 
-	/* From DSI6G(v3), addition of a 6G_HW_VERSION register at offset 0
+	/*
+	 * From DSI6G(v3), addition of a 6G_HW_VERSION register at offset 0
 	 * makes all other registers 4-byte shifted down.
+	 *
+	 * In order to identify between DSI6G(v3) and beyond, and DSIv2 and
+	 * older, we read the DSI_VERSION register without any shift(offset
+	 * 0x1f0). In the case of DSIv2, this hast to be a non-zero value. In
+	 * the case of DSI6G, this has to be zero (the offset points to a
+	 * scratch register which we never touch)
 	 */
-	ver_6g = msm_readl(base + REG_DSI_6G_HW_VERSION);
-	if (ver_6g == 0) {
-		ver = msm_readl(base + REG_DSI_VERSION);
+
+	ver = msm_readl(base + REG_DSI_VERSION);
+	if (ver) {
+		/* older dsi host, there is no register shift */
 		ver = FIELD(ver, DSI_VERSION_MAJOR);
 		if (ver <= MSM_DSI_VER_MAJOR_V2) {
 			/* old versions */
@@ -54,12 +64,17 @@
 			return -EINVAL;
 		}
 	} else {
+		/*
+		 * newer host, offset 0 has 6G_HW_VERSION, the rest of the
+		 * registers are shifted down, read DSI_VERSION again with
+		 * the shifted offset
+		 */
 		ver = msm_readl(base + DSI_6G_REG_SHIFT + REG_DSI_VERSION);
 		ver = FIELD(ver, DSI_VERSION_MAJOR);
 		if (ver == MSM_DSI_VER_MAJOR_6G) {
 			/* 6G version */
 			*major = ver;
-			*minor = ver_6g;
+			*minor = msm_readl(base + REG_DSI_6G_HW_VERSION);
 			return 0;
 		} else {
 			return -EINVAL;
@@ -91,10 +106,9 @@
 
 	void __iomem *ctrl_base;
 	struct regulator_bulk_data supplies[DSI_DEV_REGULATOR_MAX];
-	struct clk *mdp_core_clk;
-	struct clk *ahb_clk;
-	struct clk *axi_clk;
-	struct clk *mmss_misc_ahb_clk;
+
+	struct clk *bus_clks[DSI_BUS_CLK_MAX];
+
 	struct clk *byte_clk;
 	struct clk *esc_clk;
 	struct clk *pixel_clk;
@@ -102,6 +116,14 @@
 	struct clk *pixel_clk_src;
 
 	u32 byte_clk_rate;
+	u32 esc_clk_rate;
+
+	/* DSI v2 specific clocks */
+	struct clk *src_clk;
+	struct clk *esc_clk_src;
+	struct clk *dsi_clk_src;
+
+	u32 src_clk_rate;
 
 	struct gpio_desc *disp_en_gpio;
 	struct gpio_desc *te_gpio;
@@ -119,9 +141,19 @@
 	struct work_struct err_work;
 	struct workqueue_struct *workqueue;
 
+	/* DSI 6G TX buffer*/
 	struct drm_gem_object *tx_gem_obj;
+
+	/* DSI v2 TX buffer */
+	void *tx_buf;
+	dma_addr_t tx_buf_paddr;
+
+	int tx_size;
+
 	u8 *rx_buf;
 
+	struct regmap *sfpb;
+
 	struct drm_display_mode *mode;
 
 	/* connected device info */
@@ -165,21 +197,31 @@
 						struct msm_dsi_host *msm_host)
 {
 	const struct msm_dsi_cfg_handler *cfg_hnd = NULL;
+	struct device *dev = &msm_host->pdev->dev;
 	struct regulator *gdsc_reg;
+	struct clk *ahb_clk;
 	int ret;
 	u32 major = 0, minor = 0;
 
-	gdsc_reg = regulator_get(&msm_host->pdev->dev, "gdsc");
+	gdsc_reg = regulator_get(dev, "gdsc");
 	if (IS_ERR(gdsc_reg)) {
 		pr_err("%s: cannot get gdsc\n", __func__);
 		goto exit;
 	}
+
+	ahb_clk = clk_get(dev, "iface_clk");
+	if (IS_ERR(ahb_clk)) {
+		pr_err("%s: cannot get interface clock\n", __func__);
+		goto put_gdsc;
+	}
+
 	ret = regulator_enable(gdsc_reg);
 	if (ret) {
 		pr_err("%s: unable to enable gdsc\n", __func__);
-		goto put_gdsc;
+		goto put_clk;
 	}
-	ret = clk_prepare_enable(msm_host->ahb_clk);
+
+	ret = clk_prepare_enable(ahb_clk);
 	if (ret) {
 		pr_err("%s: unable to enable ahb_clk\n", __func__);
 		goto disable_gdsc;
@@ -196,9 +238,11 @@
 	DBG("%s: Version %x:%x\n", __func__, major, minor);
 
 disable_clks:
-	clk_disable_unprepare(msm_host->ahb_clk);
+	clk_disable_unprepare(ahb_clk);
 disable_gdsc:
 	regulator_disable(gdsc_reg);
+put_clk:
+	clk_put(ahb_clk);
 put_gdsc:
 	regulator_put(gdsc_reg);
 exit:
@@ -295,40 +339,23 @@
 static int dsi_clk_init(struct msm_dsi_host *msm_host)
 {
 	struct device *dev = &msm_host->pdev->dev;
-	int ret = 0;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+	const struct msm_dsi_config *cfg = cfg_hnd->cfg;
+	int i, ret = 0;
 
-	msm_host->mdp_core_clk = devm_clk_get(dev, "mdp_core_clk");
-	if (IS_ERR(msm_host->mdp_core_clk)) {
-		ret = PTR_ERR(msm_host->mdp_core_clk);
-		pr_err("%s: Unable to get mdp core clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
+	/* get bus clocks */
+	for (i = 0; i < cfg->num_bus_clks; i++) {
+		msm_host->bus_clks[i] = devm_clk_get(dev,
+						cfg->bus_clk_names[i]);
+		if (IS_ERR(msm_host->bus_clks[i])) {
+			ret = PTR_ERR(msm_host->bus_clks[i]);
+			pr_err("%s: Unable to get %s, ret = %d\n",
+				__func__, cfg->bus_clk_names[i], ret);
+			goto exit;
+		}
 	}
 
-	msm_host->ahb_clk = devm_clk_get(dev, "iface_clk");
-	if (IS_ERR(msm_host->ahb_clk)) {
-		ret = PTR_ERR(msm_host->ahb_clk);
-		pr_err("%s: Unable to get mdss ahb clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
-	msm_host->axi_clk = devm_clk_get(dev, "bus_clk");
-	if (IS_ERR(msm_host->axi_clk)) {
-		ret = PTR_ERR(msm_host->axi_clk);
-		pr_err("%s: Unable to get axi bus clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
-	msm_host->mmss_misc_ahb_clk = devm_clk_get(dev, "core_mmss_clk");
-	if (IS_ERR(msm_host->mmss_misc_ahb_clk)) {
-		ret = PTR_ERR(msm_host->mmss_misc_ahb_clk);
-		pr_err("%s: Unable to get mmss misc ahb clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
+	/* get link and source clocks */
 	msm_host->byte_clk = devm_clk_get(dev, "byte_clk");
 	if (IS_ERR(msm_host->byte_clk)) {
 		ret = PTR_ERR(msm_host->byte_clk);
@@ -356,80 +383,85 @@
 		goto exit;
 	}
 
-	msm_host->byte_clk_src = devm_clk_get(dev, "byte_clk_src");
-	if (IS_ERR(msm_host->byte_clk_src)) {
-		ret = PTR_ERR(msm_host->byte_clk_src);
+	msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk);
+	if (!msm_host->byte_clk_src) {
+		ret = -ENODEV;
 		pr_err("%s: can't find byte_clk_src. ret=%d\n", __func__, ret);
-		msm_host->byte_clk_src = NULL;
 		goto exit;
 	}
 
-	msm_host->pixel_clk_src = devm_clk_get(dev, "pixel_clk_src");
-	if (IS_ERR(msm_host->pixel_clk_src)) {
-		ret = PTR_ERR(msm_host->pixel_clk_src);
+	msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk);
+	if (!msm_host->pixel_clk_src) {
+		ret = -ENODEV;
 		pr_err("%s: can't find pixel_clk_src. ret=%d\n", __func__, ret);
-		msm_host->pixel_clk_src = NULL;
 		goto exit;
 	}
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		msm_host->src_clk = devm_clk_get(dev, "src_clk");
+		if (IS_ERR(msm_host->src_clk)) {
+			ret = PTR_ERR(msm_host->src_clk);
+			pr_err("%s: can't find dsi_src_clk. ret=%d\n",
+				__func__, ret);
+			msm_host->src_clk = NULL;
+			goto exit;
+		}
+
+		msm_host->esc_clk_src = clk_get_parent(msm_host->esc_clk);
+		if (!msm_host->esc_clk_src) {
+			ret = -ENODEV;
+			pr_err("%s: can't get esc_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+
+		msm_host->dsi_clk_src = clk_get_parent(msm_host->src_clk);
+		if (!msm_host->dsi_clk_src) {
+			ret = -ENODEV;
+			pr_err("%s: can't get dsi_clk_src. ret=%d\n",
+				__func__, ret);
+		}
+	}
 exit:
 	return ret;
 }
 
 static int dsi_bus_clk_enable(struct msm_dsi_host *msm_host)
 {
-	int ret;
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	int i, ret;
 
 	DBG("id=%d", msm_host->id);
 
-	ret = clk_prepare_enable(msm_host->mdp_core_clk);
-	if (ret) {
-		pr_err("%s: failed to enable mdp_core_clock, %d\n",
-							 __func__, ret);
-		goto core_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->ahb_clk);
-	if (ret) {
-		pr_err("%s: failed to enable ahb clock, %d\n", __func__, ret);
-		goto ahb_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->axi_clk);
-	if (ret) {
-		pr_err("%s: failed to enable ahb clock, %d\n", __func__, ret);
-		goto axi_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->mmss_misc_ahb_clk);
-	if (ret) {
-		pr_err("%s: failed to enable mmss misc ahb clk, %d\n",
-			__func__, ret);
-		goto misc_ahb_clk_err;
+	for (i = 0; i < cfg->num_bus_clks; i++) {
+		ret = clk_prepare_enable(msm_host->bus_clks[i]);
+		if (ret) {
+			pr_err("%s: failed to enable bus clock %d ret %d\n",
+				__func__, i, ret);
+			goto err;
+		}
 	}
 
 	return 0;
+err:
+	for (; i > 0; i--)
+		clk_disable_unprepare(msm_host->bus_clks[i]);
 
-misc_ahb_clk_err:
-	clk_disable_unprepare(msm_host->axi_clk);
-axi_clk_err:
-	clk_disable_unprepare(msm_host->ahb_clk);
-ahb_clk_err:
-	clk_disable_unprepare(msm_host->mdp_core_clk);
-core_clk_err:
 	return ret;
 }
 
 static void dsi_bus_clk_disable(struct msm_dsi_host *msm_host)
 {
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	int i;
+
 	DBG("");
-	clk_disable_unprepare(msm_host->mmss_misc_ahb_clk);
-	clk_disable_unprepare(msm_host->axi_clk);
-	clk_disable_unprepare(msm_host->ahb_clk);
-	clk_disable_unprepare(msm_host->mdp_core_clk);
+
+	for (i = cfg->num_bus_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(msm_host->bus_clks[i]);
 }
 
-static int dsi_link_clk_enable(struct msm_dsi_host *msm_host)
+static int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
 {
 	int ret;
 
@@ -476,11 +508,98 @@
 	return ret;
 }
 
+static int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
+{
+	int ret;
+
+	DBG("Set clk rates: pclk=%d, byteclk=%d, esc_clk=%d, dsi_src_clk=%d",
+		msm_host->mode->clock, msm_host->byte_clk_rate,
+		msm_host->esc_clk_rate, msm_host->src_clk_rate);
+
+	ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->esc_clk, msm_host->esc_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate esc clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->src_clk, msm_host->src_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate src clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->pixel_clk, msm_host->mode->clock * 1000);
+	if (ret) {
+		pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_prepare_enable(msm_host->byte_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi byte clk\n", __func__);
+		goto error;
+	}
+
+	ret = clk_prepare_enable(msm_host->esc_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi esc clk\n", __func__);
+		goto esc_clk_err;
+	}
+
+	ret = clk_prepare_enable(msm_host->src_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi src clk\n", __func__);
+		goto src_clk_err;
+	}
+
+	ret = clk_prepare_enable(msm_host->pixel_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi pixel clk\n", __func__);
+		goto pixel_clk_err;
+	}
+
+	return 0;
+
+pixel_clk_err:
+	clk_disable_unprepare(msm_host->src_clk);
+src_clk_err:
+	clk_disable_unprepare(msm_host->esc_clk);
+esc_clk_err:
+	clk_disable_unprepare(msm_host->byte_clk);
+error:
+	return ret;
+}
+
+static int dsi_link_clk_enable(struct msm_dsi_host *msm_host)
+{
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G)
+		return dsi_link_clk_enable_6g(msm_host);
+	else
+		return dsi_link_clk_enable_v2(msm_host);
+}
+
 static void dsi_link_clk_disable(struct msm_dsi_host *msm_host)
 {
-	clk_disable_unprepare(msm_host->esc_clk);
-	clk_disable_unprepare(msm_host->pixel_clk);
-	clk_disable_unprepare(msm_host->byte_clk);
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		clk_disable_unprepare(msm_host->esc_clk);
+		clk_disable_unprepare(msm_host->pixel_clk);
+		clk_disable_unprepare(msm_host->byte_clk);
+	} else {
+		clk_disable_unprepare(msm_host->pixel_clk);
+		clk_disable_unprepare(msm_host->src_clk);
+		clk_disable_unprepare(msm_host->esc_clk);
+		clk_disable_unprepare(msm_host->byte_clk);
+	}
 }
 
 static int dsi_clk_ctrl(struct msm_dsi_host *msm_host, bool enable)
@@ -515,6 +634,7 @@
 static int dsi_calc_clk_rate(struct msm_dsi_host *msm_host)
 {
 	struct drm_display_mode *mode = msm_host->mode;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	u8 lanes = msm_host->lanes;
 	u32 bpp = dsi_get_bpp(msm_host->format);
 	u32 pclk_rate;
@@ -534,6 +654,47 @@
 
 	DBG("pclk=%d, bclk=%d", pclk_rate, msm_host->byte_clk_rate);
 
+	msm_host->esc_clk_rate = clk_get_rate(msm_host->esc_clk);
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		unsigned int esc_mhz, esc_div;
+		unsigned long byte_mhz;
+
+		msm_host->src_clk_rate = (pclk_rate * bpp) / 8;
+
+		/*
+		 * esc clock is byte clock followed by a 4 bit divider,
+		 * we need to find an escape clock frequency within the
+		 * mipi DSI spec range within the maximum divider limit
+		 * We iterate here between an escape clock frequencey
+		 * between 20 Mhz to 5 Mhz and pick up the first one
+		 * that can be supported by our divider
+		 */
+
+		byte_mhz = msm_host->byte_clk_rate / 1000000;
+
+		for (esc_mhz = 20; esc_mhz >= 5; esc_mhz--) {
+			esc_div = DIV_ROUND_UP(byte_mhz, esc_mhz);
+
+			/*
+			 * TODO: Ideally, we shouldn't know what sort of divider
+			 * is available in mmss_cc, we're just assuming that
+			 * it'll always be a 4 bit divider. Need to come up with
+			 * a better way here.
+			 */
+			if (esc_div >= 1 && esc_div <= 16)
+				break;
+		}
+
+		if (esc_mhz < 5)
+			return -EINVAL;
+
+		msm_host->esc_clk_rate = msm_host->byte_clk_rate / esc_div;
+
+		DBG("esc=%d, src=%d", msm_host->esc_clk_rate,
+			msm_host->src_clk_rate);
+	}
+
 	return 0;
 }
 
@@ -835,29 +996,46 @@
 static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size)
 {
 	struct drm_device *dev = msm_host->dev;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	int ret;
 	u32 iova;
 
-	mutex_lock(&dev->struct_mutex);
-	msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED);
-	if (IS_ERR(msm_host->tx_gem_obj)) {
-		ret = PTR_ERR(msm_host->tx_gem_obj);
-		pr_err("%s: failed to allocate gem, %d\n", __func__, ret);
-		msm_host->tx_gem_obj = NULL;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		mutex_lock(&dev->struct_mutex);
+		msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED);
+		if (IS_ERR(msm_host->tx_gem_obj)) {
+			ret = PTR_ERR(msm_host->tx_gem_obj);
+			pr_err("%s: failed to allocate gem, %d\n",
+				__func__, ret);
+			msm_host->tx_gem_obj = NULL;
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+
+		ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova);
 		mutex_unlock(&dev->struct_mutex);
-		return ret;
-	}
+		if (ret) {
+			pr_err("%s: failed to get iova, %d\n", __func__, ret);
+			return ret;
+		}
 
-	ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova);
-	if (ret) {
-		pr_err("%s: failed to get iova, %d\n", __func__, ret);
-		return ret;
-	}
-	mutex_unlock(&dev->struct_mutex);
+		if (iova & 0x07) {
+			pr_err("%s: buf NOT 8 bytes aligned\n", __func__);
+			return -EINVAL;
+		}
 
-	if (iova & 0x07) {
-		pr_err("%s: buf NOT 8 bytes aligned\n", __func__);
-		return -EINVAL;
+		msm_host->tx_size = msm_host->tx_gem_obj->size;
+	} else {
+		msm_host->tx_buf = dma_alloc_coherent(dev->dev, size,
+					&msm_host->tx_buf_paddr, GFP_KERNEL);
+		if (!msm_host->tx_buf) {
+			ret = -ENOMEM;
+			pr_err("%s: failed to allocate tx buf, %d\n",
+				__func__, ret);
+			return ret;
+		}
+
+		msm_host->tx_size = size;
 	}
 
 	return 0;
@@ -874,14 +1052,19 @@
 		msm_host->tx_gem_obj = NULL;
 		mutex_unlock(&dev->struct_mutex);
 	}
+
+	if (msm_host->tx_buf)
+		dma_free_coherent(dev->dev, msm_host->tx_size, msm_host->tx_buf,
+			msm_host->tx_buf_paddr);
 }
 
 /*
  * prepare cmd buffer to be txed
  */
-static int dsi_cmd_dma_add(struct drm_gem_object *tx_gem,
-			const struct mipi_dsi_msg *msg)
+static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
+			   const struct mipi_dsi_msg *msg)
 {
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	struct mipi_dsi_packet packet;
 	int len;
 	int ret;
@@ -894,17 +1077,20 @@
 	}
 	len = (packet.size + 3) & (~0x3);
 
-	if (len > tx_gem->size) {
+	if (len > msm_host->tx_size) {
 		pr_err("%s: packet size is too big\n", __func__);
 		return -EINVAL;
 	}
 
-	data = msm_gem_vaddr(tx_gem);
-
-	if (IS_ERR(data)) {
-		ret = PTR_ERR(data);
-		pr_err("%s: get vaddr failed, %d\n", __func__, ret);
-		return ret;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		data = msm_gem_vaddr(msm_host->tx_gem_obj);
+		if (IS_ERR(data)) {
+			ret = PTR_ERR(data);
+			pr_err("%s: get vaddr failed, %d\n", __func__, ret);
+			return ret;
+		}
+	} else {
+		data = msm_host->tx_buf;
 	}
 
 	/* MSM specific command format in memory */
@@ -970,17 +1156,21 @@
 	return msg->rx_len;
 }
 
-
 static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len)
 {
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	int ret;
-	u32 iova;
+	u32 dma_base;
 	bool triggered;
 
-	ret = msm_gem_get_iova(msm_host->tx_gem_obj, 0, &iova);
-	if (ret) {
-		pr_err("%s: failed to get iova: %d\n", __func__, ret);
-		return ret;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		ret = msm_gem_get_iova(msm_host->tx_gem_obj, 0, &dma_base);
+		if (ret) {
+			pr_err("%s: failed to get iova: %d\n", __func__, ret);
+			return ret;
+		}
+	} else {
+		dma_base = msm_host->tx_buf_paddr;
 	}
 
 	reinit_completion(&msm_host->dma_comp);
@@ -988,7 +1178,7 @@
 	dsi_wait4video_eng_busy(msm_host);
 
 	triggered = msm_dsi_manager_cmd_xfer_trigger(
-						msm_host->id, iova, len);
+						msm_host->id, dma_base, len);
 	if (triggered) {
 		ret = wait_for_completion_timeout(&msm_host->dma_comp,
 					msecs_to_jiffies(200));
@@ -1060,7 +1250,7 @@
 	int bllp_len = msm_host->mode->hdisplay *
 			dsi_get_bpp(msm_host->format) / 8;
 
-	len = dsi_cmd_dma_add(msm_host->tx_gem_obj, msg);
+	len = dsi_cmd_dma_add(msm_host, msg);
 	if (!len) {
 		pr_err("%s: failed to add cmd type = 0x%x\n",
 			__func__,  msg->type);
@@ -1383,6 +1573,16 @@
 
 	msm_host->device_node = device_node;
 
+	if (of_property_read_bool(np, "syscon-sfpb")) {
+		msm_host->sfpb = syscon_regmap_lookup_by_phandle(np,
+					"syscon-sfpb");
+		if (IS_ERR(msm_host->sfpb)) {
+			dev_err(dev, "%s: failed to get sfpb regmap\n",
+				__func__);
+			return PTR_ERR(msm_host->sfpb);
+		}
+	}
+
 	return 0;
 }
 
@@ -1408,12 +1608,6 @@
 		goto fail;
 	}
 
-	ret = dsi_clk_init(msm_host);
-	if (ret) {
-		pr_err("%s: unable to initialize dsi clks\n", __func__);
-		goto fail;
-	}
-
 	msm_host->ctrl_base = msm_ioremap(pdev, "dsi_ctrl", "DSI CTRL");
 	if (IS_ERR(msm_host->ctrl_base)) {
 		pr_err("%s: unable to map Dsi ctrl base\n", __func__);
@@ -1437,6 +1631,12 @@
 		goto fail;
 	}
 
+	ret = dsi_clk_init(msm_host);
+	if (ret) {
+		pr_err("%s: unable to initialize dsi clks\n", __func__);
+		goto fail;
+	}
+
 	msm_host->rx_buf = devm_kzalloc(&pdev->dev, SZ_4K, GFP_KERNEL);
 	if (!msm_host->rx_buf) {
 		pr_err("%s: alloc rx temp buf failed\n", __func__);
@@ -1750,11 +1950,12 @@
 	return ret;
 }
 
-void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host, u32 iova, u32 len)
+void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host, u32 dma_base,
+				  u32 len)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
 
-	dsi_write(msm_host, REG_DSI_DMA_BASE, iova);
+	dsi_write(msm_host, REG_DSI_DMA_BASE, dma_base);
 	dsi_write(msm_host, REG_DSI_DMA_LEN, len);
 	dsi_write(msm_host, REG_DSI_TRIG_DMA, 1);
 
@@ -1766,6 +1967,7 @@
 	struct msm_dsi_pll *src_pll)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	struct clk *byte_clk_provider, *pixel_clk_provider;
 	int ret;
 
@@ -1791,6 +1993,22 @@
 		goto exit;
 	}
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		ret = clk_set_parent(msm_host->dsi_clk_src, pixel_clk_provider);
+		if (ret) {
+			pr_err("%s: can't set parent to dsi_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+
+		ret = clk_set_parent(msm_host->esc_clk_src, byte_clk_provider);
+		if (ret) {
+			pr_err("%s: can't set parent to esc_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+	}
+
 exit:
 	return ret;
 }
@@ -1828,6 +2046,20 @@
 	return 0;
 }
 
+static void msm_dsi_sfpb_config(struct msm_dsi_host *msm_host, bool enable)
+{
+	enum sfpb_ahb_arb_master_port_en en;
+
+	if (!msm_host->sfpb)
+		return;
+
+	en = enable ? SFPB_MASTER_PORT_ENABLE : SFPB_MASTER_PORT_DISABLE;
+
+	regmap_update_bits(msm_host->sfpb, REG_SFPB_GPREG,
+			SFPB_GPREG_MASTER_PORT_EN__MASK,
+			SFPB_GPREG_MASTER_PORT_EN(en));
+}
+
 int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
@@ -1840,6 +2072,8 @@
 		goto unlock_ret;
 	}
 
+	msm_dsi_sfpb_config(msm_host, true);
+
 	ret = dsi_calc_clk_rate(msm_host);
 	if (ret) {
 		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
@@ -1862,7 +2096,7 @@
 	dsi_phy_sw_reset(msm_host);
 	ret = msm_dsi_manager_phy_enable(msm_host->id,
 					msm_host->byte_clk_rate * 8,
-					clk_get_rate(msm_host->esc_clk),
+					msm_host->esc_clk_rate,
 					&clk_pre, &clk_post);
 	dsi_bus_clk_disable(msm_host);
 	if (ret) {
@@ -1927,6 +2161,8 @@
 
 	dsi_host_regulator_disable(msm_host);
 
+	msm_dsi_sfpb_config(msm_host, false);
+
 	DBG("-");
 
 	msm_host->power_on = false;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 0455ff7..58ba7ec 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -774,7 +774,7 @@
 	return ret;
 }
 
-bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 iova, u32 len)
+bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len)
 {
 	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
 	struct msm_dsi *msm_dsi0 = dsi_mgr_get_dsi(DSI_0);
@@ -784,9 +784,9 @@
 		return false;
 
 	if (IS_SYNC_NEEDED() && msm_dsi0)
-		msm_dsi_host_cmd_xfer_commit(msm_dsi0->host, iova, len);
+		msm_dsi_host_cmd_xfer_commit(msm_dsi0->host, dma_base, len);
 
-	msm_dsi_host_cmd_xfer_commit(host, iova, len);
+	msm_dsi_host_cmd_xfer_commit(host, dma_base, len);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index f1f955f..91a95fb 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -277,6 +277,10 @@
 	{ .compatible = "qcom,dsi-phy-20nm",
 	  .data = &dsi_phy_20nm_cfgs },
 #endif
+#ifdef CONFIG_DRM_MSM_DSI_28NM_8960_PHY
+	{ .compatible = "qcom,dsi-phy-28nm-8960",
+	  .data = &dsi_phy_28nm_8960_cfgs },
+#endif
 	{}
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 0456b25..0d54ed0 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -43,6 +43,7 @@
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs;
 
 struct msm_dsi_dphy_timing {
 	u32 clk_pre;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
new file mode 100644
index 0000000..197b039
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "dsi_phy.h"
+#include "dsi.xml.h"
+
+static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy,
+		struct msm_dsi_dphy_timing *timing)
+{
+	void __iomem *base = phy->base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_0,
+		DSI_28nm_8960_PHY_TIMING_CTRL_0_CLK_ZERO(timing->clk_zero));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_1,
+		DSI_28nm_8960_PHY_TIMING_CTRL_1_CLK_TRAIL(timing->clk_trail));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_2,
+		DSI_28nm_8960_PHY_TIMING_CTRL_2_CLK_PREPARE(timing->clk_prepare));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_3, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_4,
+		DSI_28nm_8960_PHY_TIMING_CTRL_4_HS_EXIT(timing->hs_exit));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_5,
+		DSI_28nm_8960_PHY_TIMING_CTRL_5_HS_ZERO(timing->hs_zero));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_6,
+		DSI_28nm_8960_PHY_TIMING_CTRL_6_HS_PREPARE(timing->hs_prepare));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_7,
+		DSI_28nm_8960_PHY_TIMING_CTRL_7_HS_TRAIL(timing->hs_trail));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_8,
+		DSI_28nm_8960_PHY_TIMING_CTRL_8_HS_RQST(timing->hs_rqst));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_9,
+		DSI_28nm_8960_PHY_TIMING_CTRL_9_TA_GO(timing->ta_go) |
+		DSI_28nm_8960_PHY_TIMING_CTRL_9_TA_SURE(timing->ta_sure));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_10,
+		DSI_28nm_8960_PHY_TIMING_CTRL_10_TA_GET(timing->ta_get));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_11,
+		DSI_28nm_8960_PHY_TIMING_CTRL_11_TRIG3_CMD(0));
+}
+
+static void dsi_28nm_phy_regulator_init(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_0, 0x3);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_1, 1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_2, 1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_3, 0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_4,
+		0x100);
+}
+
+static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_0, 0x3);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_1, 0xa);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_2, 0x4);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_3, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_4, 0x20);
+}
+
+static void dsi_28nm_phy_calibration(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+	u32 status;
+	int i = 5000;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CAL_PWR_CFG,
+			0x3);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_SW_CFG_2, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_1, 0x5a);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_3, 0x10);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_4, 0x1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_0, 0x1);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_TRIGGER, 0x1);
+	usleep_range(5000, 6000);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_TRIGGER, 0x0);
+
+	do {
+		status = dsi_phy_read(base +
+				REG_DSI_28nm_8960_PHY_MISC_CAL_STATUS);
+
+		if (!(status & DSI_28nm_8960_PHY_MISC_CAL_STATUS_CAL_BUSY))
+			break;
+
+		udelay(1);
+	} while (--i > 0);
+}
+
+static void dsi_28nm_phy_lane_config(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->base;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_0(i), 0x80);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_1(i), 0x45);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_2(i), 0x00);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_DATAPATH(i),
+			0x00);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_STR_0(i),
+			0x01);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_STR_1(i),
+			0x66);
+	}
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_0, 0x40);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_1, 0x67);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_2, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_DATAPATH, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_STR0, 0x1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_STR1, 0x88);
+}
+
+static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
+		const unsigned long bit_rate, const unsigned long esc_rate)
+{
+	struct msm_dsi_dphy_timing *timing = &phy->timing;
+	void __iomem *base = phy->base;
+
+	DBG("");
+
+	if (msm_dsi_dphy_timing_calc(timing, bit_rate, esc_rate)) {
+		dev_err(&phy->pdev->dev,
+			"%s: D-PHY timing calculation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	dsi_28nm_phy_regulator_init(phy);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LDO_CTRL, 0x04);
+
+	/* strength control */
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_0, 0xff);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_1, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_2, 0x06);
+
+	/* phy ctrl */
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_0, 0x5f);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_1, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_2, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_3, 0x10);
+
+	dsi_28nm_phy_regulator_ctrl(phy);
+
+	dsi_28nm_phy_calibration(phy);
+
+	dsi_28nm_phy_lane_config(phy);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_4, 0x0f);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_1, 0x03);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_0, 0x03);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_4, 0x0);
+
+	dsi_28nm_dphy_set_timing(phy, timing);
+
+	return 0;
+}
+
+static void dsi_28nm_phy_disable(struct msm_dsi_phy *phy)
+{
+	dsi_phy_write(phy->base + REG_DSI_28nm_8960_PHY_CTRL_0, 0x0);
+
+	/*
+	 * Wait for the registers writes to complete in order to
+	 * ensure that the phy is completely disabled
+	 */
+	wmb();
+}
+
+const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs = {
+	.type = MSM_DSI_PHY_28NM_8960,
+	.src_pll_truthtable = { {true, true}, {false, true} },
+	.reg_cfg = {
+		.num = 1,
+		.regs = {
+			{"vddio", 1800000, 1800000, 100000, 100},
+		},
+	},
+	.ops = {
+		.enable = dsi_28nm_phy_enable,
+		.disable = dsi_28nm_phy_disable,
+	},
+};
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
index 5104fc9..5cd438f 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
@@ -151,6 +151,9 @@
 	case MSM_DSI_PHY_28NM_LP:
 		pll = msm_dsi_pll_28nm_init(pdev, type, id);
 		break;
+	case MSM_DSI_PHY_28NM_8960:
+		pll = msm_dsi_pll_28nm_8960_init(pdev, id);
+		break;
 	default:
 		pll = ERR_PTR(-ENXIO);
 		break;
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
index 063caa2..80b6038 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
@@ -93,6 +93,16 @@
 	return ERR_PTR(-ENODEV);
 }
 #endif
+#ifdef CONFIG_DRM_MSM_DSI_28NM_8960_PHY
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id);
+#else
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #endif /* __DSI_PLL_H__ */
 
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c
new file mode 100644
index 0000000..38c90e1
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+
+#include "dsi_pll.h"
+#include "dsi.xml.h"
+
+/*
+ * DSI PLL 28nm (8960/A family) - clock diagram (eg: DSI1):
+ *
+ *
+ *                        +------+
+ *  dsi1vco_clk ----o-----| DIV1 |---dsi1pllbit (not exposed as clock)
+ *  F * byte_clk    |     +------+
+ *                  | bit clock divider (F / 8)
+ *                  |
+ *                  |     +------+
+ *                  o-----| DIV2 |---dsi0pllbyte---o---> To byte RCG
+ *                  |     +------+                 | (sets parent rate)
+ *                  | byte clock divider (F)       |
+ *                  |                              |
+ *                  |                              o---> To esc RCG
+ *                  |                                (doesn't set parent rate)
+ *                  |
+ *                  |     +------+
+ *                  o-----| DIV3 |----dsi0pll------o---> To dsi RCG
+ *                        +------+                 | (sets parent rate)
+ *                  dsi clock divider (F * magic)  |
+ *                                                 |
+ *                                                 o---> To pixel rcg
+ *                                                  (doesn't set parent rate)
+ */
+
+#define POLL_MAX_READS		8000
+#define POLL_TIMEOUT_US		1
+
+#define NUM_PROVIDED_CLKS	2
+
+#define VCO_REF_CLK_RATE	27000000
+#define VCO_MIN_RATE		600000000
+#define VCO_MAX_RATE		1200000000
+
+#define DSI_BYTE_PLL_CLK	0
+#define DSI_PIXEL_PLL_CLK	1
+
+#define VCO_PREF_DIV_RATIO	27
+
+struct pll_28nm_cached_state {
+	unsigned long vco_rate;
+	u8 postdiv3;
+	u8 postdiv2;
+	u8 postdiv1;
+};
+
+struct clk_bytediv {
+	struct clk_hw hw;
+	void __iomem *reg;
+};
+
+struct dsi_pll_28nm {
+	struct msm_dsi_pll base;
+
+	int id;
+	struct platform_device *pdev;
+	void __iomem *mmio;
+
+	/* custom byte clock divider */
+	struct clk_bytediv *bytediv;
+
+	/* private clocks: */
+	struct clk *clks[NUM_DSI_CLOCKS_MAX];
+	u32 num_clks;
+
+	/* clock-provider: */
+	struct clk *provided_clks[NUM_PROVIDED_CLKS];
+	struct clk_onecell_data clk_data;
+
+	struct pll_28nm_cached_state cached_state;
+};
+
+#define to_pll_28nm(x)	container_of(x, struct dsi_pll_28nm, base)
+
+static bool pll_28nm_poll_for_ready(struct dsi_pll_28nm *pll_28nm,
+				    int nb_tries, int timeout_us)
+{
+	bool pll_locked = false;
+	u32 val;
+
+	while (nb_tries--) {
+		val = pll_read(pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_RDY);
+		pll_locked = !!(val & DSI_28nm_8960_PHY_PLL_RDY_PLL_RDY);
+
+		if (pll_locked)
+			break;
+
+		udelay(timeout_us);
+	}
+	DBG("DSI PLL is %slocked", pll_locked ? "" : "*not* ");
+
+	return pll_locked;
+}
+
+/*
+ * Clock Callbacks
+ */
+static int dsi_pll_28nm_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	void __iomem *base = pll_28nm->mmio;
+	u32 val, temp, fb_divider;
+
+	DBG("rate=%lu, parent's=%lu", rate, parent_rate);
+
+	temp = rate / 10;
+	val = VCO_REF_CLK_RATE / 10;
+	fb_divider = (temp * VCO_PREF_DIV_RATIO) / val;
+	fb_divider = fb_divider / 2 - 1;
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_1,
+			fb_divider & 0xff);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2);
+
+	val |= (fb_divider >> 8) & 0x07;
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2,
+			val);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3);
+
+	val |= (VCO_PREF_DIV_RATIO - 1) & 0x3f;
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3,
+			val);
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_6,
+			0xf);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+	val |= 0x7 << 4;
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8,
+			val);
+
+	return 0;
+}
+
+static int dsi_pll_28nm_clk_is_enabled(struct clk_hw *hw)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	return pll_28nm_poll_for_ready(pll_28nm, POLL_MAX_READS,
+					POLL_TIMEOUT_US);
+}
+
+static unsigned long dsi_pll_28nm_clk_recalc_rate(struct clk_hw *hw,
+						  unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	void __iomem *base = pll_28nm->mmio;
+	unsigned long vco_rate;
+	u32 status, fb_divider, temp, ref_divider;
+
+	VERB("parent_rate=%lu", parent_rate);
+
+	status = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_0);
+
+	if (status & DSI_28nm_8960_PHY_PLL_CTRL_0_ENABLE) {
+		fb_divider = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_1);
+		fb_divider &= 0xff;
+		temp = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2) & 0x07;
+		fb_divider = (temp << 8) | fb_divider;
+		fb_divider += 1;
+
+		ref_divider = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3);
+		ref_divider &= 0x3f;
+		ref_divider += 1;
+
+		/* multiply by 2 */
+		vco_rate = (parent_rate / ref_divider) * fb_divider * 2;
+	} else {
+		vco_rate = 0;
+	}
+
+	DBG("returning vco rate = %lu", vco_rate);
+
+	return vco_rate;
+}
+
+static const struct clk_ops clk_ops_dsi_pll_28nm_vco = {
+	.round_rate = msm_dsi_pll_helper_clk_round_rate,
+	.set_rate = dsi_pll_28nm_clk_set_rate,
+	.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
+	.prepare = msm_dsi_pll_helper_clk_prepare,
+	.unprepare = msm_dsi_pll_helper_clk_unprepare,
+	.is_enabled = dsi_pll_28nm_clk_is_enabled,
+};
+
+/*
+ * Custom byte clock divier clk_ops
+ *
+ * This clock is the entry point to configuring the PLL. The user (dsi host)
+ * will set this clock's rate to the desired byte clock rate. The VCO lock
+ * frequency is a multiple of the byte clock rate. The multiplication factor
+ * (shown as F in the diagram above) is a function of the byte clock rate.
+ *
+ * This custom divider clock ensures that its parent (VCO) is set to the
+ * desired rate, and that the byte clock postdivider (POSTDIV2) is configured
+ * accordingly
+ */
+#define to_clk_bytediv(_hw) container_of(_hw, struct clk_bytediv, hw)
+
+static unsigned long clk_bytediv_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_bytediv *bytediv = to_clk_bytediv(hw);
+	unsigned int div;
+
+	div = pll_read(bytediv->reg) & 0xff;
+
+	return parent_rate / (div + 1);
+}
+
+/* find multiplication factor(wrt byte clock) at which the VCO should be set */
+static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate)
+{
+	unsigned long bit_mhz;
+
+	/* convert to bit clock in Mhz */
+	bit_mhz = (byte_clk_rate * 8) / 1000000;
+
+	if (bit_mhz < 125)
+		return 64;
+	else if (bit_mhz < 250)
+		return 32;
+	else if (bit_mhz < 600)
+		return 16;
+	else
+		return 8;
+}
+
+static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long *prate)
+{
+	unsigned long best_parent;
+	unsigned int factor;
+
+	factor = get_vco_mul_factor(rate);
+
+	best_parent = rate * factor;
+	*prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
+
+	return *prate / factor;
+}
+
+static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	struct clk_bytediv *bytediv = to_clk_bytediv(hw);
+	u32 val;
+	unsigned int factor;
+
+	factor = get_vco_mul_factor(rate);
+
+	val = pll_read(bytediv->reg);
+	val |= (factor - 1) & 0xff;
+	pll_write(bytediv->reg, val);
+
+	return 0;
+}
+
+/* Our special byte clock divider ops */
+static const struct clk_ops clk_bytediv_ops = {
+	.round_rate = clk_bytediv_round_rate,
+	.set_rate = clk_bytediv_set_rate,
+	.recalc_rate = clk_bytediv_recalc_rate,
+};
+
+/*
+ * PLL Callbacks
+ */
+static int dsi_pll_28nm_enable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct device *dev = &pll_28nm->pdev->dev;
+	void __iomem *base = pll_28nm->mmio;
+	bool locked;
+	unsigned int bit_div, byte_div;
+	int max_reads = 1000, timeout_us = 100;
+	u32 val;
+
+	DBG("id=%d", pll_28nm->id);
+
+	/*
+	 * before enabling the PLL, configure the bit clock divider since we
+	 * don't expose it as a clock to the outside world
+	 * 1: read back the byte clock divider that should already be set
+	 * 2: divide by 8 to get bit clock divider
+	 * 3: write it to POSTDIV1
+	 */
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9);
+	byte_div = val + 1;
+	bit_div = byte_div / 8;
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+	val &= ~0xf;
+	val |= (bit_div - 1);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8, val);
+
+	/* enable the PLL */
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_0,
+			DSI_28nm_8960_PHY_PLL_CTRL_0_ENABLE);
+
+	locked = pll_28nm_poll_for_ready(pll_28nm, max_reads, timeout_us);
+
+	if (unlikely(!locked))
+		dev_err(dev, "DSI PLL lock failed\n");
+	else
+		DBG("DSI PLL lock success");
+
+	return locked ? 0 : -EINVAL;
+}
+
+static void dsi_pll_28nm_disable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	DBG("id=%d", pll_28nm->id);
+	pll_write(pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_CTRL_0, 0x00);
+}
+
+static void dsi_pll_28nm_save_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct pll_28nm_cached_state *cached_state = &pll_28nm->cached_state;
+	void __iomem *base = pll_28nm->mmio;
+
+	cached_state->postdiv3 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_10);
+	cached_state->postdiv2 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9);
+	cached_state->postdiv1 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+
+	cached_state->vco_rate = clk_hw_get_rate(&pll->clk_hw);
+}
+
+static int dsi_pll_28nm_restore_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct pll_28nm_cached_state *cached_state = &pll_28nm->cached_state;
+	void __iomem *base = pll_28nm->mmio;
+	int ret;
+
+	ret = dsi_pll_28nm_clk_set_rate(&pll->clk_hw,
+					cached_state->vco_rate, 0);
+	if (ret) {
+		dev_err(&pll_28nm->pdev->dev,
+			"restore vco rate failed. ret=%d\n", ret);
+		return ret;
+	}
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_10,
+			cached_state->postdiv3);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9,
+			cached_state->postdiv2);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8,
+			cached_state->postdiv1);
+
+	return 0;
+}
+
+static int dsi_pll_28nm_get_provider(struct msm_dsi_pll *pll,
+				struct clk **byte_clk_provider,
+				struct clk **pixel_clk_provider)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	if (byte_clk_provider)
+		*byte_clk_provider = pll_28nm->provided_clks[DSI_BYTE_PLL_CLK];
+	if (pixel_clk_provider)
+		*pixel_clk_provider =
+				pll_28nm->provided_clks[DSI_PIXEL_PLL_CLK];
+
+	return 0;
+}
+
+static void dsi_pll_28nm_destroy(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	msm_dsi_pll_helper_unregister_clks(pll_28nm->pdev,
+					pll_28nm->clks, pll_28nm->num_clks);
+}
+
+static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm)
+{
+	char *clk_name, *parent_name, *vco_name;
+	struct clk_init_data vco_init = {
+		.parent_names = (const char *[]){ "pxo" },
+		.num_parents = 1,
+		.ops = &clk_ops_dsi_pll_28nm_vco,
+	};
+	struct device *dev = &pll_28nm->pdev->dev;
+	struct clk **clks = pll_28nm->clks;
+	struct clk **provided_clks = pll_28nm->provided_clks;
+	struct clk_bytediv *bytediv;
+	struct clk_init_data bytediv_init = { };
+	int ret, num = 0;
+
+	DBG("%d", pll_28nm->id);
+
+	bytediv = devm_kzalloc(dev, sizeof(*bytediv), GFP_KERNEL);
+	if (!bytediv)
+		return -ENOMEM;
+
+	vco_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!vco_name)
+		return -ENOMEM;
+
+	parent_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!parent_name)
+		return -ENOMEM;
+
+	clk_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!clk_name)
+		return -ENOMEM;
+
+	pll_28nm->bytediv = bytediv;
+
+	snprintf(vco_name, 32, "dsi%dvco_clk", pll_28nm->id);
+	vco_init.name = vco_name;
+
+	pll_28nm->base.clk_hw.init = &vco_init;
+
+	clks[num++] = clk_register(dev, &pll_28nm->base.clk_hw);
+
+	/* prepare and register bytediv */
+	bytediv->hw.init = &bytediv_init;
+	bytediv->reg = pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_CTRL_9;
+
+	snprintf(parent_name, 32, "dsi%dvco_clk", pll_28nm->id);
+	snprintf(clk_name, 32, "dsi%dpllbyte", pll_28nm->id);
+
+	bytediv_init.name = clk_name;
+	bytediv_init.ops = &clk_bytediv_ops;
+	bytediv_init.flags = CLK_SET_RATE_PARENT;
+	bytediv_init.parent_names = (const char * const *) &parent_name;
+	bytediv_init.num_parents = 1;
+
+	/* DIV2 */
+	clks[num++] = provided_clks[DSI_BYTE_PLL_CLK] =
+			clk_register(dev, &bytediv->hw);
+
+	snprintf(clk_name, 32, "dsi%dpll", pll_28nm->id);
+	/* DIV3 */
+	clks[num++] = provided_clks[DSI_PIXEL_PLL_CLK] =
+			clk_register_divider(dev, clk_name,
+				parent_name, 0, pll_28nm->mmio +
+				REG_DSI_28nm_8960_PHY_PLL_CTRL_10,
+				0, 8, 0, NULL);
+
+	pll_28nm->num_clks = num;
+
+	pll_28nm->clk_data.clk_num = NUM_PROVIDED_CLKS;
+	pll_28nm->clk_data.clks = provided_clks;
+
+	ret = of_clk_add_provider(dev->of_node,
+			of_clk_src_onecell_get, &pll_28nm->clk_data);
+	if (ret) {
+		dev_err(dev, "failed to register clk provider: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id)
+{
+	struct dsi_pll_28nm *pll_28nm;
+	struct msm_dsi_pll *pll;
+	int ret;
+
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	pll_28nm = devm_kzalloc(&pdev->dev, sizeof(*pll_28nm), GFP_KERNEL);
+	if (!pll_28nm)
+		return ERR_PTR(-ENOMEM);
+
+	pll_28nm->pdev = pdev;
+	pll_28nm->id = id + 1;
+
+	pll_28nm->mmio = msm_ioremap(pdev, "dsi_pll", "DSI_PLL");
+	if (IS_ERR_OR_NULL(pll_28nm->mmio)) {
+		dev_err(&pdev->dev, "%s: failed to map pll base\n", __func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pll = &pll_28nm->base;
+	pll->min_rate = VCO_MIN_RATE;
+	pll->max_rate = VCO_MAX_RATE;
+	pll->get_provider = dsi_pll_28nm_get_provider;
+	pll->destroy = dsi_pll_28nm_destroy;
+	pll->disable_seq = dsi_pll_28nm_disable_seq;
+	pll->save_state = dsi_pll_28nm_save_state;
+	pll->restore_state = dsi_pll_28nm_restore_state;
+
+	pll->en_seq_cnt = 1;
+	pll->enable_seqs[0] = dsi_pll_28nm_enable_seq;
+
+	ret = pll_28nm_register(pll_28nm);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register PLL: %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	return pll;
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 1f4a95e..9a0989c 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -17,6 +17,8 @@
  */
 
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+
 #include "hdmi.h"
 
 void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
@@ -322,8 +324,6 @@
  * The hdmi device:
  */
 
-#include <linux/of_gpio.h>
-
 #define HDMI_CFG(item, entry) \
 	.item ## _names = item ##_names_ ## entry, \
 	.item ## _cnt   = ARRAY_SIZE(item ## _names_ ## entry)
@@ -388,17 +388,6 @@
 		.hpd_freq      = hpd_clk_freq_8x74,
 };
 
-static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,hdmi-tx-8996", .data = &hdmi_tx_8996_config },
-	{ .compatible = "qcom,hdmi-tx-8994", .data = &hdmi_tx_8994_config },
-	{ .compatible = "qcom,hdmi-tx-8084", .data = &hdmi_tx_8084_config },
-	{ .compatible = "qcom,hdmi-tx-8974", .data = &hdmi_tx_8974_config },
-	{ .compatible = "qcom,hdmi-tx-8960", .data = &hdmi_tx_8960_config },
-	{ .compatible = "qcom,hdmi-tx-8660", .data = &hdmi_tx_8660_config },
-	{}
-};
-
-#ifdef CONFIG_OF
 static int get_gpio(struct device *dev, struct device_node *of_node, const char *name)
 {
 	int gpio = of_get_named_gpio(of_node, name, 0);
@@ -413,7 +402,6 @@
 	}
 	return gpio;
 }
-#endif
 
 static int hdmi_bind(struct device *dev, struct device *master, void *data)
 {
@@ -421,16 +409,12 @@
 	struct msm_drm_private *priv = drm->dev_private;
 	static struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
-#ifdef CONFIG_OF
 	struct device_node *of_node = dev->of_node;
-	const struct of_device_id *match;
 
-	match = of_match_node(dt_match, of_node);
-	if (match && match->data) {
-		hdmi_cfg = (struct hdmi_platform_config *)match->data;
-		DBG("hdmi phy: %s", match->compatible);
-	} else {
-		dev_err(dev, "unknown phy: %s\n", of_node->name);
+	hdmi_cfg = (struct hdmi_platform_config *)
+			of_device_get_match_data(dev);
+	if (!hdmi_cfg) {
+		dev_err(dev, "unknown hdmi_cfg: %s\n", of_node->name);
 		return -ENXIO;
 	}
 
@@ -443,55 +427,6 @@
 	hdmi_cfg->mux_sel_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-sel");
 	hdmi_cfg->mux_lpm_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-lpm");
 
-#else
-	static struct hdmi_platform_config config = {};
-	static const char *hpd_clk_names[] = {
-			"core_clk", "master_iface_clk", "slave_iface_clk",
-	};
-	if (cpu_is_apq8064()) {
-		static const char *hpd_reg_names[] = {"8921_hdmi_mvs"};
-		config.phy_init      = hdmi_phy_8960_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 70;
-		config.ddc_data_gpio = 71;
-		config.hpd_gpio      = 72;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	} else if (cpu_is_msm8960() || cpu_is_msm8960ab()) {
-		static const char *hpd_reg_names[] = {"8921_hdmi_mvs"};
-		config.phy_init      = hdmi_phy_8960_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 100;
-		config.ddc_data_gpio = 101;
-		config.hpd_gpio      = 102;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	} else if (cpu_is_msm8x60()) {
-		static const char *hpd_reg_names[] = {
-				"8901_hdmi_mvs", "8901_mpp0"
-		};
-		config.phy_init      = hdmi_phy_8x60_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 170;
-		config.ddc_data_gpio = 171;
-		config.hpd_gpio      = 172;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	}
-	config.mmio_name     = "hdmi_msm_hdmi_addr";
-	config.qfprom_mmio_name = "hdmi_msm_qfprom_addr";
-
-	hdmi_cfg = &config;
-#endif
 	dev->platform_data = hdmi_cfg;
 
 	hdmi = hdmi_init(to_platform_device(dev));
@@ -529,6 +464,16 @@
 	return 0;
 }
 
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,hdmi-tx-8996", .data = &hdmi_tx_8996_config },
+	{ .compatible = "qcom,hdmi-tx-8994", .data = &hdmi_tx_8994_config },
+	{ .compatible = "qcom,hdmi-tx-8084", .data = &hdmi_tx_8084_config },
+	{ .compatible = "qcom,hdmi-tx-8974", .data = &hdmi_tx_8974_config },
+	{ .compatible = "qcom,hdmi-tx-8960", .data = &hdmi_tx_8960_config },
+	{ .compatible = "qcom,hdmi-tx-8660", .data = &hdmi_tx_8660_config },
+	{}
+};
+
 static struct platform_driver hdmi_driver = {
 	.probe = hdmi_dev_probe,
 	.remove = hdmi_dev_remove,
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 6ac9aa1..28df397 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -678,7 +678,8 @@
 	drm_flip_work_init(&mdp4_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs,
+				  NULL);
 	drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
 	plane->crtc = crtc;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
new file mode 100644
index 0000000..2f57e94
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014, Inforce Computing. All rights reserved.
+ *
+ * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp4_kms.h"
+
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+struct mdp4_dsi_encoder {
+	struct drm_encoder base;
+	struct drm_panel *panel;
+	bool enabled;
+};
+#define to_mdp4_dsi_encoder(x) container_of(x, struct mdp4_dsi_encoder, base)
+
+static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
+{
+	struct msm_drm_private *priv = encoder->dev->dev_private;
+	return to_mdp4_kms(to_mdp_kms(priv->kms));
+}
+
+static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(mdp4_dsi_encoder);
+}
+
+static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = {
+	.destroy = mdp4_dsi_encoder_destroy,
+};
+
+static bool mdp4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder,
+				      struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
+{
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	uint32_t dsi_hsync_skew, vsync_period, vsync_len, ctrl_pol;
+	uint32_t display_v_start, display_v_end;
+	uint32_t hsync_start_x, hsync_end_x;
+
+	mode = adjusted_mode;
+
+	DBG("set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
+			mode->base.id, mode->name,
+			mode->vrefresh, mode->clock,
+			mode->hdisplay, mode->hsync_start,
+			mode->hsync_end, mode->htotal,
+			mode->vdisplay, mode->vsync_start,
+			mode->vsync_end, mode->vtotal,
+			mode->type, mode->flags);
+
+	ctrl_pol = 0;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		ctrl_pol |= MDP4_DSI_CTRL_POLARITY_HSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		ctrl_pol |= MDP4_DSI_CTRL_POLARITY_VSYNC_LOW;
+	/* probably need to get DATA_EN polarity from panel.. */
+
+	dsi_hsync_skew = 0;  /* get this from panel? */
+
+	hsync_start_x = (mode->htotal - mode->hsync_start);
+	hsync_end_x = mode->htotal - (mode->hsync_start - mode->hdisplay) - 1;
+
+	vsync_period = mode->vtotal * mode->htotal;
+	vsync_len = (mode->vsync_end - mode->vsync_start) * mode->htotal;
+	display_v_start = (mode->vtotal - mode->vsync_start) * mode->htotal + dsi_hsync_skew;
+	display_v_end = vsync_period - ((mode->vsync_start - mode->vdisplay) * mode->htotal) + dsi_hsync_skew - 1;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_HSYNC_CTRL,
+			MDP4_DSI_HSYNC_CTRL_PULSEW(mode->hsync_end - mode->hsync_start) |
+			MDP4_DSI_HSYNC_CTRL_PERIOD(mode->htotal));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_VSYNC_PERIOD, vsync_period);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_VSYNC_LEN, vsync_len);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_HCTRL,
+			MDP4_DSI_DISPLAY_HCTRL_START(hsync_start_x) |
+			MDP4_DSI_DISPLAY_HCTRL_END(hsync_end_x));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_VSTART, display_v_start);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_VEND, display_v_end);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_CTRL_POLARITY, ctrl_pol);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_UNDERFLOW_CLR,
+			MDP4_DSI_UNDERFLOW_CLR_ENABLE_RECOVERY |
+			MDP4_DSI_UNDERFLOW_CLR_COLOR(0xff));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_HCTL,
+			MDP4_DSI_ACTIVE_HCTL_START(0) |
+			MDP4_DSI_ACTIVE_HCTL_END(0));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_HSYNC_SKEW, dsi_hsync_skew);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_BORDER_CLR, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_VSTART, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_VEND, 0);
+}
+
+static void mdp4_dsi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+
+	if (!mdp4_dsi_encoder->enabled)
+		return;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 0);
+
+	/*
+	 * Wait for a vsync so we know the ENABLE=0 latched before
+	 * the (connector) source of the vsync's gets disabled,
+	 * otherwise we end up in a funny state if we re-enable
+	 * before the disable latches, which results that some of
+	 * the settings changes for the new modeset (like new
+	 * scanout buffer) don't latch properly..
+	 */
+	mdp_irq_wait(&mdp4_kms->base, MDP4_IRQ_PRIMARY_VSYNC);
+
+	mdp4_dsi_encoder->enabled = false;
+}
+
+static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+
+	if (mdp4_dsi_encoder->enabled)
+		return;
+
+	 mdp4_crtc_set_config(encoder->crtc,
+			MDP4_DMA_CONFIG_PACK_ALIGN_MSB |
+			MDP4_DMA_CONFIG_DEFLKR_EN |
+			MDP4_DMA_CONFIG_DITHER_EN |
+			MDP4_DMA_CONFIG_R_BPC(BPC8) |
+			MDP4_DMA_CONFIG_G_BPC(BPC8) |
+			MDP4_DMA_CONFIG_B_BPC(BPC8) |
+			MDP4_DMA_CONFIG_PACK(0x21));
+
+	mdp4_crtc_set_intf(encoder->crtc, INTF_DSI_VIDEO, 0);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 1);
+
+	mdp4_dsi_encoder->enabled = true;
+}
+
+static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = {
+	.mode_fixup = mdp4_dsi_encoder_mode_fixup,
+	.mode_set = mdp4_dsi_encoder_mode_set,
+	.disable = mdp4_dsi_encoder_disable,
+	.enable = mdp4_dsi_encoder_enable,
+};
+
+/* initialize encoder */
+struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
+{
+	struct drm_encoder *encoder = NULL;
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder;
+	int ret;
+
+	mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL);
+	if (!mdp4_dsi_encoder) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	encoder = &mdp4_dsi_encoder->base;
+
+	drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs,
+			 DRM_MODE_ENCODER_DSI, NULL);
+	drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs);
+
+	return encoder;
+
+fail:
+	if (encoder)
+		mdp4_dsi_encoder_destroy(encoder);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 89614c6..a21df54 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -262,7 +262,7 @@
 	encoder = &mdp4_dtv_encoder->base;
 
 	drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
 	mdp4_dtv_encoder->src_clk = devm_clk_get(dev->dev, "src_clk");
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
index 5ed38cf..a521207 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
@@ -29,7 +29,7 @@
 
 static void mdp4_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
 {
-	DRM_ERROR("errors: %08x\n", irqstatus);
+	DRM_ERROR_RATELIMITED("errors: %08x\n", irqstatus);
 }
 
 void mdp4_irq_preinstall(struct msm_kms *kms)
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index 077f752..5a8e3d6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -169,7 +169,14 @@
 		struct drm_encoder *encoder)
 {
 	/* if we had >1 encoder, we'd need something more clever: */
-	return mdp4_dtv_round_pixclk(encoder, rate);
+	switch (encoder->encoder_type) {
+	case DRM_MODE_ENCODER_TMDS:
+		return mdp4_dtv_round_pixclk(encoder, rate);
+	case DRM_MODE_ENCODER_LVDS:
+	case DRM_MODE_ENCODER_DSI:
+	default:
+		return rate;
+	}
 }
 
 static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file)
@@ -240,19 +247,18 @@
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static struct drm_panel *detect_panel(struct drm_device *dev)
+static struct device_node *mdp4_detect_lcdc_panel(struct drm_device *dev)
 {
 	struct device_node *endpoint, *panel_node;
 	struct device_node *np = dev->dev->of_node;
-	struct drm_panel *panel = NULL;
 
 	endpoint = of_graph_get_next_endpoint(np, NULL);
 	if (!endpoint) {
-		dev_err(dev->dev, "no valid endpoint\n");
-		return ERR_PTR(-ENODEV);
+		DBG("no endpoint in MDP4 to fetch LVDS panel\n");
+		return NULL;
 	}
 
+	/* don't proceed if we have an endpoint but no panel_node tied to it */
 	panel_node = of_graph_get_remote_port_parent(endpoint);
 	if (!panel_node) {
 		dev_err(dev->dev, "no valid panel node\n");
@@ -262,20 +268,111 @@
 
 	of_node_put(endpoint);
 
-	panel = of_drm_find_panel(panel_node);
-	if (!panel) {
-		of_node_put(panel_node);
-		return ERR_PTR(-EPROBE_DEFER);
+	return panel_node;
+}
+
+static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
+				  int intf_type)
+{
+	struct drm_device *dev = mdp4_kms->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct device_node *panel_node;
+	struct drm_encoder *dsi_encs[MSM_DSI_ENCODER_NUM];
+	int i, dsi_id;
+	int ret;
+
+	switch (intf_type) {
+	case DRM_MODE_ENCODER_LVDS:
+		/*
+		 * bail out early if:
+		 * - there is no panel node (no need to initialize lcdc
+		 *   encoder and lvds connector), or
+		 * - panel node is a bad pointer
+		 */
+		panel_node = mdp4_detect_lcdc_panel(dev);
+		if (IS_ERR_OR_NULL(panel_node))
+			return PTR_ERR(panel_node);
+
+		encoder = mdp4_lcdc_encoder_init(dev, panel_node);
+		if (IS_ERR(encoder)) {
+			dev_err(dev->dev, "failed to construct LCDC encoder\n");
+			return PTR_ERR(encoder);
+		}
+
+		/* LCDC can be hooked to DMA_P (TODO: Add DMA_S later?) */
+		encoder->possible_crtcs = 1 << DMA_P;
+
+		connector = mdp4_lvds_connector_init(dev, panel_node, encoder);
+		if (IS_ERR(connector)) {
+			dev_err(dev->dev, "failed to initialize LVDS connector\n");
+			return PTR_ERR(connector);
+		}
+
+		priv->encoders[priv->num_encoders++] = encoder;
+		priv->connectors[priv->num_connectors++] = connector;
+
+		break;
+	case DRM_MODE_ENCODER_TMDS:
+		encoder = mdp4_dtv_encoder_init(dev);
+		if (IS_ERR(encoder)) {
+			dev_err(dev->dev, "failed to construct DTV encoder\n");
+			return PTR_ERR(encoder);
+		}
+
+		/* DTV can be hooked to DMA_E: */
+		encoder->possible_crtcs = 1 << 1;
+
+		if (priv->hdmi) {
+			/* Construct bridge/connector for HDMI: */
+			ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+			if (ret) {
+				dev_err(dev->dev, "failed to initialize HDMI: %d\n", ret);
+				return ret;
+			}
+		}
+
+		priv->encoders[priv->num_encoders++] = encoder;
+
+		break;
+	case DRM_MODE_ENCODER_DSI:
+		/* only DSI1 supported for now */
+		dsi_id = 0;
+
+		if (!priv->dsi[dsi_id])
+			break;
+
+		for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
+			dsi_encs[i] = mdp4_dsi_encoder_init(dev);
+			if (IS_ERR(dsi_encs[i])) {
+				ret = PTR_ERR(dsi_encs[i]);
+				dev_err(dev->dev,
+					"failed to construct DSI encoder: %d\n",
+					ret);
+				return ret;
+			}
+
+			/* TODO: Add DMA_S later? */
+			dsi_encs[i]->possible_crtcs = 1 << DMA_P;
+			priv->encoders[priv->num_encoders++] = dsi_encs[i];
+		}
+
+		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, dsi_encs);
+		if (ret) {
+			dev_err(dev->dev, "failed to initialize DSI: %d\n",
+				ret);
+			return ret;
+		}
+
+		break;
+	default:
+		dev_err(dev->dev, "Invalid or unsupported interface\n");
+		return -EINVAL;
 	}
 
-	return panel;
+	return 0;
 }
-#else
-static struct drm_panel *detect_panel(struct drm_device *dev)
-{
-	// ??? maybe use a module param to specify which panel is attached?
-}
-#endif
 
 static int modeset_init(struct mdp4_kms *mdp4_kms)
 {
@@ -283,111 +380,73 @@
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	struct drm_connector *connector;
-	struct drm_panel *panel;
-	int ret;
+	int i, ret;
+	static const enum mdp4_pipe rgb_planes[] = {
+		RGB1, RGB2,
+	};
+	static const enum mdp4_pipe vg_planes[] = {
+		VG1, VG2,
+	};
+	static const enum mdp4_dma mdp4_crtcs[] = {
+		DMA_P, DMA_E,
+	};
+	static const char * const mdp4_crtc_names[] = {
+		"DMA_P", "DMA_E",
+	};
+	static const int mdp4_intfs[] = {
+		DRM_MODE_ENCODER_LVDS,
+		DRM_MODE_ENCODER_DSI,
+		DRM_MODE_ENCODER_TMDS,
+	};
 
 	/* construct non-private planes: */
-	plane = mdp4_plane_init(dev, VG1, false);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for VG1\n");
-		ret = PTR_ERR(plane);
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(vg_planes); i++) {
+		plane = mdp4_plane_init(dev, vg_planes[i], false);
+		if (IS_ERR(plane)) {
+			dev_err(dev->dev,
+				"failed to construct plane for VG%d\n", i + 1);
+			ret = PTR_ERR(plane);
+			goto fail;
+		}
+		priv->planes[priv->num_planes++] = plane;
 	}
-	priv->planes[priv->num_planes++] = plane;
 
-	plane = mdp4_plane_init(dev, VG2, false);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for VG2\n");
-		ret = PTR_ERR(plane);
-		goto fail;
+	for (i = 0; i < ARRAY_SIZE(mdp4_crtcs); i++) {
+		plane = mdp4_plane_init(dev, rgb_planes[i], true);
+		if (IS_ERR(plane)) {
+			dev_err(dev->dev,
+				"failed to construct plane for RGB%d\n", i + 1);
+			ret = PTR_ERR(plane);
+			goto fail;
+		}
+
+		crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, i,
+				mdp4_crtcs[i]);
+		if (IS_ERR(crtc)) {
+			dev_err(dev->dev, "failed to construct crtc for %s\n",
+				mdp4_crtc_names[i]);
+			ret = PTR_ERR(crtc);
+			goto fail;
+		}
+
+		priv->crtcs[priv->num_crtcs++] = crtc;
 	}
-	priv->planes[priv->num_planes++] = plane;
 
 	/*
-	 * Setup the LCDC/LVDS path: RGB2 -> DMA_P -> LCDC -> LVDS:
+	 * we currently set up two relatively fixed paths:
+	 *
+	 * LCDC/LVDS path: RGB1 -> DMA_P -> LCDC -> LVDS
+	 *			or
+	 * DSI path: RGB1 -> DMA_P -> DSI1 -> DSI Panel
+	 *
+	 * DTV/HDMI path: RGB2 -> DMA_E -> DTV -> HDMI
 	 */
 
-	panel = detect_panel(dev);
-	if (IS_ERR(panel)) {
-		ret = PTR_ERR(panel);
-		dev_err(dev->dev, "failed to detect LVDS panel: %d\n", ret);
-		goto fail;
-	}
-
-	plane = mdp4_plane_init(dev, RGB2, true);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for RGB2\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
-
-	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 0, DMA_P);
-	if (IS_ERR(crtc)) {
-		dev_err(dev->dev, "failed to construct crtc for DMA_P\n");
-		ret = PTR_ERR(crtc);
-		goto fail;
-	}
-
-	encoder = mdp4_lcdc_encoder_init(dev, panel);
-	if (IS_ERR(encoder)) {
-		dev_err(dev->dev, "failed to construct LCDC encoder\n");
-		ret = PTR_ERR(encoder);
-		goto fail;
-	}
-
-	/* LCDC can be hooked to DMA_P: */
-	encoder->possible_crtcs = 1 << priv->num_crtcs;
-
-	priv->crtcs[priv->num_crtcs++] = crtc;
-	priv->encoders[priv->num_encoders++] = encoder;
-
-	connector = mdp4_lvds_connector_init(dev, panel, encoder);
-	if (IS_ERR(connector)) {
-		ret = PTR_ERR(connector);
-		dev_err(dev->dev, "failed to initialize LVDS connector: %d\n", ret);
-		goto fail;
-	}
-
-	priv->connectors[priv->num_connectors++] = connector;
-
-	/*
-	 * Setup DTV/HDMI path: RGB1 -> DMA_E -> DTV -> HDMI:
-	 */
-
-	plane = mdp4_plane_init(dev, RGB1, true);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for RGB1\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
-
-	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 1, DMA_E);
-	if (IS_ERR(crtc)) {
-		dev_err(dev->dev, "failed to construct crtc for DMA_E\n");
-		ret = PTR_ERR(crtc);
-		goto fail;
-	}
-
-	encoder = mdp4_dtv_encoder_init(dev);
-	if (IS_ERR(encoder)) {
-		dev_err(dev->dev, "failed to construct DTV encoder\n");
-		ret = PTR_ERR(encoder);
-		goto fail;
-	}
-
-	/* DTV can be hooked to DMA_E: */
-	encoder->possible_crtcs = 1 << priv->num_crtcs;
-
-	priv->crtcs[priv->num_crtcs++] = crtc;
-	priv->encoders[priv->num_encoders++] = encoder;
-
-	if (priv->hdmi) {
-		/* Construct bridge/connector for HDMI: */
-		ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+	for (i = 0; i < ARRAY_SIZE(mdp4_intfs); i++) {
+		ret = mdp4_modeset_init_intf(mdp4_kms, mdp4_intfs[i]);
 		if (ret) {
-			dev_err(dev->dev, "failed to initialize HDMI: %d\n", ret);
+			dev_err(dev->dev, "failed to initialize intf: %d, %d\n",
+				i, ret);
 			goto fail;
 		}
 	}
@@ -558,17 +617,10 @@
 static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev)
 {
 	static struct mdp4_platform_config config = {};
-#ifdef CONFIG_OF
-	/* TODO */
+
+	/* TODO: Chips that aren't apq8064 have a 200 Mhz max_clk */
 	config.max_clk = 266667000;
 	config.iommu = iommu_domain_alloc(&platform_bus_type);
-#else
-	if (cpu_is_apq8064())
-		config.max_clk = 266667000;
-	else
-		config.max_clk = 200000000;
 
-	config.iommu = msm_get_iommu_domain(DISPLAY_READ_DOMAIN);
-#endif
 	return &config;
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index 8a7f6e1..d2c96ef 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -157,7 +157,7 @@
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1);
 		break;
 	default:
-		WARN_ON("invalid pipe");
+		WARN(1, "invalid pipe");
 		break;
 	}
 
@@ -212,10 +212,19 @@
 
 long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate);
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
-		struct drm_panel *panel);
+		struct device_node *panel_node);
 
 struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
-		struct drm_panel *panel, struct drm_encoder *encoder);
+		struct device_node *panel_node, struct drm_encoder *encoder);
+
+#ifdef CONFIG_DRM_MSM_DSI
+struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev);
+#else
+static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #ifdef CONFIG_COMMON_CLK
 struct clk *mpd4_lvds_pll_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
index 4cd6e72..cd63fed 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
@@ -23,6 +23,7 @@
 
 struct mdp4_lcdc_encoder {
 	struct drm_encoder base;
+	struct device_node *panel_node;
 	struct drm_panel *panel;
 	struct clk *lcdc_clk;
 	unsigned long int pixclock;
@@ -338,7 +339,7 @@
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
 			to_mdp4_lcdc_encoder(encoder);
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
-	struct drm_panel *panel = mdp4_lcdc_encoder->panel;
+	struct drm_panel *panel;
 	int i, ret;
 
 	if (WARN_ON(!mdp4_lcdc_encoder->enabled))
@@ -346,6 +347,7 @@
 
 	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ENABLE, 0);
 
+	panel = of_drm_find_panel(mdp4_lcdc_encoder->panel_node);
 	if (panel) {
 		drm_panel_disable(panel);
 		drm_panel_unprepare(panel);
@@ -381,7 +383,7 @@
 			to_mdp4_lcdc_encoder(encoder);
 	unsigned long pc = mdp4_lcdc_encoder->pixclock;
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
-	struct drm_panel *panel = mdp4_lcdc_encoder->panel;
+	struct drm_panel *panel;
 	int i, ret;
 
 	if (WARN_ON(mdp4_lcdc_encoder->enabled))
@@ -414,6 +416,7 @@
 	if (ret)
 		dev_err(dev->dev, "failed to enable lcdc_clk: %d\n", ret);
 
+	panel = of_drm_find_panel(mdp4_lcdc_encoder->panel_node);
 	if (panel) {
 		drm_panel_prepare(panel);
 		drm_panel_enable(panel);
@@ -442,7 +445,7 @@
 
 /* initialize encoder */
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
-		struct drm_panel *panel)
+		struct device_node *panel_node)
 {
 	struct drm_encoder *encoder = NULL;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
@@ -455,12 +458,12 @@
 		goto fail;
 	}
 
-	mdp4_lcdc_encoder->panel = panel;
+	mdp4_lcdc_encoder->panel_node = panel_node;
 
 	encoder = &mdp4_lcdc_encoder->base;
 
 	drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
 
 	/* TODO: do we need different pll in other cases? */
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
index 9211851..e73e174 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
@@ -23,6 +23,7 @@
 struct mdp4_lvds_connector {
 	struct drm_connector base;
 	struct drm_encoder *encoder;
+	struct device_node *panel_node;
 	struct drm_panel *panel;
 };
 #define to_mdp4_lvds_connector(x) container_of(x, struct mdp4_lvds_connector, base)
@@ -33,6 +34,10 @@
 	struct mdp4_lvds_connector *mdp4_lvds_connector =
 			to_mdp4_lvds_connector(connector);
 
+	if (!mdp4_lvds_connector->panel)
+		mdp4_lvds_connector->panel =
+			of_drm_find_panel(mdp4_lvds_connector->panel_node);
+
 	return mdp4_lvds_connector->panel ?
 			connector_status_connected :
 			connector_status_disconnected;
@@ -42,10 +47,6 @@
 {
 	struct mdp4_lvds_connector *mdp4_lvds_connector =
 			to_mdp4_lvds_connector(connector);
-	struct drm_panel *panel = mdp4_lvds_connector->panel;
-
-	if (panel)
-		drm_panel_detach(panel);
 
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
@@ -60,9 +61,14 @@
 	struct drm_panel *panel = mdp4_lvds_connector->panel;
 	int ret = 0;
 
-	if (panel)
+	if (panel) {
+		drm_panel_attach(panel, connector);
+
 		ret = panel->funcs->get_modes(panel);
 
+		drm_panel_detach(panel);
+	}
+
 	return ret;
 }
 
@@ -111,7 +117,7 @@
 
 /* initialize connector */
 struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
-		struct drm_panel *panel, struct drm_encoder *encoder)
+		struct device_node *panel_node, struct drm_encoder *encoder)
 {
 	struct drm_connector *connector = NULL;
 	struct mdp4_lvds_connector *mdp4_lvds_connector;
@@ -124,7 +130,7 @@
 	}
 
 	mdp4_lvds_connector->encoder = encoder;
-	mdp4_lvds_connector->panel = panel;
+	mdp4_lvds_connector->panel_node = panel_node;
 
 	connector = &mdp4_lvds_connector->base;
 
@@ -141,9 +147,6 @@
 
 	drm_mode_connector_attach_encoder(connector, encoder);
 
-	if (panel)
-		drm_panel_attach(panel, connector);
-
 	return connector;
 
 fail:
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
index 30d57e7..9f96dfe 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
@@ -397,7 +397,8 @@
 
 	type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	ret = drm_universal_plane_init(dev, plane, 0xff, &mdp4_plane_funcs,
-				 mdp4_plane->formats, mdp4_plane->nformats, type);
+				 mdp4_plane->formats, mdp4_plane->nformats,
+				 type, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index bb1225a..57f73f0 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -553,9 +553,7 @@
 static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev)
 {
 	static struct mdp5_cfg_platform config = {};
-#ifdef CONFIG_OF
-	/* TODO */
-#endif
+
 	config.iommu = iommu_domain_alloc(&platform_bus_type);
 
 	return &config;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 8e6c9b5..1aa21db 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -326,7 +326,7 @@
 	mdp5_cmd_enc->ctl = ctl;
 
 	drm_encoder_init(dev, encoder, &mdp5_cmd_encoder_funcs,
-			DRM_MODE_ENCODER_DSI);
+			DRM_MODE_ENCODER_DSI, NULL);
 
 	drm_encoder_helper_add(encoder, &mdp5_cmd_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 7f9f4ac..20cee5c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -797,7 +797,8 @@
 	snprintf(mdp5_crtc->name, sizeof(mdp5_crtc->name), "%s:%d",
 			pipe2name(mdp5_plane_pipe(plane)), id);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs,
+				  NULL);
 
 	drm_flip_work_init(&mdp5_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index c9e32b0..0d737ca 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -293,6 +293,24 @@
 	.enable = mdp5_encoder_enable,
 };
 
+int mdp5_encoder_get_linecount(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_kms *mdp5_kms = get_kms(encoder);
+	int intf = mdp5_encoder->intf.num;
+
+	return mdp5_read(mdp5_kms, REG_MDP5_INTF_LINE_COUNT(intf));
+}
+
+u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_kms *mdp5_kms = get_kms(encoder);
+	int intf = mdp5_encoder->intf.num;
+
+	return mdp5_read(mdp5_kms, REG_MDP5_INTF_FRAME_COUNT(intf));
+}
+
 int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 					struct drm_encoder *slave_encoder)
 {
@@ -354,7 +372,7 @@
 
 	spin_lock_init(&mdp5_encoder->intf_lock);
 
-	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type);
+	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL);
 
 	drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
index b0d4b53..73bc3e3 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
@@ -31,7 +31,7 @@
 
 static void mdp5_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
 {
-	DRM_ERROR("errors: %08x\n", irqstatus);
+	DRM_ERROR_RATELIMITED("errors: %08x\n", irqstatus);
 }
 
 void mdp5_irq_preinstall(struct msm_kms *kms)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index b532faa..e115318 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -468,6 +468,127 @@
 	return 0;
 }
 
+static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	drm_for_each_encoder(encoder, dev)
+		if (encoder->crtc == crtc)
+			return encoder;
+
+	return NULL;
+}
+
+static int mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
+			       unsigned int flags, int *vpos, int *hpos,
+			       ktime_t *stime, ktime_t *etime,
+			       const struct drm_display_mode *mode)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int line, vsw, vbp, vactive_start, vactive_end, vfp_end;
+	int ret = 0;
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return 0;
+	}
+
+	encoder = get_encoder_from_crtc(crtc);
+	if (!encoder) {
+		DRM_ERROR("no encoder found for crtc %d\n", pipe);
+		return 0;
+	}
+
+	ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;
+
+	vsw = mode->crtc_vsync_end - mode->crtc_vsync_start;
+	vbp = mode->crtc_vtotal - mode->crtc_vsync_end;
+
+	/*
+	 * the line counter is 1 at the start of the VSYNC pulse and VTOTAL at
+	 * the end of VFP. Translate the porch values relative to the line
+	 * counter positions.
+	 */
+
+	vactive_start = vsw + vbp + 1;
+
+	vactive_end = vactive_start + mode->crtc_vdisplay;
+
+	/* last scan line before VSYNC */
+	vfp_end = mode->crtc_vtotal;
+
+	if (stime)
+		*stime = ktime_get();
+
+	line = mdp5_encoder_get_linecount(encoder);
+
+	if (line < vactive_start) {
+		line -= vactive_start;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	} else if (line > vactive_end) {
+		line = line - vfp_end - vactive_start;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	} else {
+		line -= vactive_start;
+	}
+
+	*vpos = line;
+	*hpos = 0;
+
+	if (etime)
+		*etime = ktime_get();
+
+	return ret;
+}
+
+static int mdp5_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
+				     int *max_error,
+				     struct timeval *vblank_time,
+				     unsigned flags)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	if (pipe < 0 || pipe >= priv->num_crtcs) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return -EINVAL;
+	}
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return -EINVAL;
+	}
+
+	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
+						     vblank_time, flags,
+						     &crtc->mode);
+}
+
+static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+
+	if (pipe < 0 || pipe >= priv->num_crtcs)
+		return 0;
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc)
+		return 0;
+
+	encoder = get_encoder_from_crtc(crtc);
+	if (!encoder)
+		return 0;
+
+	return mdp5_encoder_get_framecount(encoder);
+}
+
 struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 {
 	struct platform_device *pdev = dev->platformdev;
@@ -590,6 +711,8 @@
 				!config->hw->intf.base[i])
 			continue;
 		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
+
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
 	}
 	mdp5_disable(mdp5_kms);
 	mdelay(16);
@@ -635,6 +758,12 @@
 	dev->mode_config.max_width = config->hw->lm.max_width;
 	dev->mode_config.max_height = config->hw->lm.max_height;
 
+	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
+	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
+	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
+	dev->max_vblank_count = 0xffffffff;
+	dev->vblank_disable_immediate = true;
+
 	return kms;
 
 fail:
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 84f65d4..00730ba 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -222,6 +222,8 @@
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
 int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 					struct drm_encoder *slave_encoder);
+int mdp5_encoder_get_linecount(struct drm_encoder *encoder);
+u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder);
 
 #ifdef CONFIG_DRM_MSM_DSI
 struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index 81cd490..432c098 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -904,7 +904,7 @@
 	type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs,
 				 mdp5_plane->formats, mdp5_plane->nformats,
-				 type);
+				 type, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index b88ce51..9a30807 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -237,20 +237,9 @@
 
 static int get_mdp_ver(struct platform_device *pdev)
 {
-#ifdef CONFIG_OF
-	static const struct of_device_id match_types[] = { {
-		.compatible = "qcom,mdss_mdp",
-		.data	= (void	*)5,
-	}, {
-		/* end node */
-	} };
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *match;
-	match = of_match_node(match_types, dev->of_node);
-	if (match)
-		return (int)(unsigned long)match->data;
-#endif
-	return 4;
+
+	return (int) (unsigned long) of_device_get_match_data(dev);
 }
 
 #include <linux/of_address.h>
@@ -258,10 +247,10 @@
 static int msm_init_vram(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
+	struct device_node *node;
 	unsigned long size = 0;
 	int ret = 0;
 
-#ifdef CONFIG_OF
 	/* In the device-tree world, we could have a 'memory-region'
 	 * phandle, which gives us a link to our "vram".  Allocating
 	 * is all nicely abstracted behind the dma api, but we need
@@ -278,7 +267,6 @@
 	 *     as corruption on screen before we have a chance to
 	 *     load and do initial modeset)
 	 */
-	struct device_node *node;
 
 	node = of_parse_phandle(dev->dev->of_node, "memory-region", 0);
 	if (node) {
@@ -288,14 +276,12 @@
 			return ret;
 		size = r.end - r.start;
 		DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
-	} else
-#endif
 
-	/* if we have no IOMMU, then we need to use carveout allocator.
-	 * Grab the entire CMA chunk carved out in early startup in
-	 * mach-msm:
-	 */
-	if (!iommu_present(&platform_bus_type)) {
+		/* if we have no IOMMU, then we need to use carveout allocator.
+		 * Grab the entire CMA chunk carved out in early startup in
+		 * mach-msm:
+		 */
+	} else if (!iommu_present(&platform_bus_type)) {
 		DRM_INFO("using %s VRAM carveout\n", vram);
 		size = memparse(vram, NULL);
 	}
@@ -1035,9 +1021,9 @@
  * Componentized driver support:
  */
 
-#ifdef CONFIG_OF
-/* NOTE: the CONFIG_OF case duplicates the same code as exynos or imx
- * (or probably any other).. so probably some room for some helpers
+/*
+ * NOTE: duplication of the same code as exynos or imx (or probably any other).
+ * so probably some room for some helpers
  */
 static int compare_of(struct device *dev, void *data)
 {
@@ -1062,12 +1048,6 @@
 
 	return 0;
 }
-#else
-static int compare_dev(struct device *dev, void *data)
-{
-	return dev == data;
-}
-#endif
 
 static int msm_drm_bind(struct device *dev)
 {
@@ -1091,35 +1071,9 @@
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	struct component_match *match = NULL;
-#ifdef CONFIG_OF
+
 	add_components(&pdev->dev, &match, "connectors");
 	add_components(&pdev->dev, &match, "gpus");
-#else
-	/* For non-DT case, it kinda sucks.  We don't actually have a way
-	 * to know whether or not we are waiting for certain devices (or if
-	 * they are simply not present).  But for non-DT we only need to
-	 * care about apq8064/apq8060/etc (all mdp4/a3xx):
-	 */
-	static const char *devnames[] = {
-			"hdmi_msm.0", "kgsl-3d0.0",
-	};
-	int i;
-
-	DBG("Adding components..");
-
-	for (i = 0; i < ARRAY_SIZE(devnames); i++) {
-		struct device *dev;
-
-		dev = bus_find_device_by_name(&platform_bus_type,
-				NULL, devnames[i]);
-		if (!dev) {
-			dev_info(&pdev->dev, "still waiting for %s\n", devnames[i]);
-			return -EPROBE_DEFER;
-		}
-
-		component_match_add(&pdev->dev, &match, compare_dev, dev);
-	}
-#endif
 
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return component_master_add_with_match(&pdev->dev, &msm_drm_ops, match);
@@ -1138,8 +1092,10 @@
 };
 
 static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,mdp" },      /* mdp4 */
-	{ .compatible = "qcom,mdss_mdp" }, /* mdp5 */
+	{ .compatible = "qcom,mdp4", .data = (void *) 4 },	/* mdp4 */
+	{ .compatible = "qcom,mdp5", .data = (void *) 5 },	/* mdp5 */
+	/* to support downstream DT files */
+	{ .compatible = "qcom,mdss_mdp", .data = (void *) 5 },  /* mdp5 */
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 3be7a56..c1e7bba 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -31,14 +31,9 @@
 #include <linux/iommu.h>
 #include <linux/types.h>
 #include <linux/of_graph.h>
+#include <linux/of_device.h>
 #include <asm/sizes.h>
 
-#ifndef CONFIG_OF
-#include <mach/board.h>
-#include <mach/socinfo.h>
-#include <mach/iommu_domains.h>
-#endif
-
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -240,9 +235,9 @@
 struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane);
 const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb);
 struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
-		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
+		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
 struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
-		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd);
+		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd);
 
 struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 12171328..a474d6c 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -138,7 +138,7 @@
 }
 
 struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
-		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd)
+		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *bos[4] = {0};
 	struct drm_framebuffer *fb;
@@ -168,7 +168,7 @@
 }
 
 struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
-		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
+		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_kms *kms = priv->kms;
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 3f6ec07..d95af6e 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -121,7 +121,7 @@
 		/* note: if fb creation failed, we can't rely on fb destroy
 		 * to unref the bo:
 		 */
-		drm_gem_object_unreference(fbdev->bo);
+		drm_gem_object_unreference_unlocked(fbdev->bo);
 		ret = PTR_ERR(fb);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 3d96b49..6f04397 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1081,8 +1081,6 @@
 }
 
 static const struct drm_crtc_funcs nv04_crtc_funcs = {
-	.save = nv_crtc_save,
-	.restore = nv_crtc_restore,
 	.cursor_set = nv04_crtc_cursor_set,
 	.cursor_move = nv04_crtc_cursor_move,
 	.gamma_set = nv_crtc_gamma_set,
@@ -1123,6 +1121,9 @@
 	nv_crtc->index = crtc_num;
 	nv_crtc->last_dpms = NV_DPMS_CLEARED;
 
+	nv_crtc->save = nv_crtc_save;
+	nv_crtc->restore = nv_crtc_restore;
+
 	drm_crtc_init(dev, &nv_crtc->base, &nv04_crtc_funcs);
 	drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs);
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index 78cb033..b48eec3 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c
@@ -504,8 +504,6 @@
 
 static const struct drm_encoder_helper_funcs nv04_dac_helper_funcs = {
 	.dpms = nv04_dac_dpms,
-	.save = nv04_dac_save,
-	.restore = nv04_dac_restore,
 	.mode_fixup = nv04_dac_mode_fixup,
 	.prepare = nv04_dac_prepare,
 	.commit = nv04_dac_commit,
@@ -515,8 +513,6 @@
 
 static const struct drm_encoder_helper_funcs nv17_dac_helper_funcs = {
 	.dpms = nv04_dac_dpms,
-	.save = nv04_dac_save,
-	.restore = nv04_dac_restore,
 	.mode_fixup = nv04_dac_mode_fixup,
 	.prepare = nv04_dac_prepare,
 	.commit = nv04_dac_commit,
@@ -545,12 +541,16 @@
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
+	nv_encoder->enc_save = nv04_dac_save;
+	nv_encoder->enc_restore = nv04_dac_restore;
+
 	if (nv_gf4_disp_arch(dev))
 		helper = &nv17_dac_helper_funcs;
 	else
 		helper = &nv04_dac_helper_funcs;
 
-	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC);
+	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index 429ab5e..05bfd15 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
@@ -652,8 +652,6 @@
 
 static const struct drm_encoder_helper_funcs nv04_lvds_helper_funcs = {
 	.dpms = nv04_lvds_dpms,
-	.save = nv04_dfp_save,
-	.restore = nv04_dfp_restore,
 	.mode_fixup = nv04_dfp_mode_fixup,
 	.prepare = nv04_dfp_prepare,
 	.commit = nv04_dfp_commit,
@@ -663,8 +661,6 @@
 
 static const struct drm_encoder_helper_funcs nv04_tmds_helper_funcs = {
 	.dpms = nv04_tmds_dpms,
-	.save = nv04_dfp_save,
-	.restore = nv04_dfp_restore,
 	.mode_fixup = nv04_dfp_mode_fixup,
 	.prepare = nv04_dfp_prepare,
 	.commit = nv04_dfp_commit,
@@ -701,12 +697,15 @@
 	if (!nv_encoder)
 		return -ENOMEM;
 
+	nv_encoder->enc_save = nv04_dfp_save;
+	nv_encoder->enc_restore = nv04_dfp_restore;
+
 	encoder = to_drm_encoder(nv_encoder);
 
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
-	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type);
+	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type, NULL);
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 9e65008..b4a6bc4 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -39,7 +39,8 @@
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *ct;
 	struct drm_encoder *encoder;
-	struct drm_crtc *crtc;
+	struct nouveau_encoder *nv_encoder;
+	struct nouveau_crtc *crtc;
 	struct nv04_display *disp;
 	int i, ret;
 
@@ -107,14 +108,11 @@
 	}
 
 	/* Save previous state */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->save(crtc);
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		crtc->save(&crtc->base);
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
-
-		func->save(encoder);
-	}
+	list_for_each_entry(nv_encoder, &dev->mode_config.encoder_list, base.base.head)
+		nv_encoder->enc_save(&nv_encoder->base.base);
 
 	nouveau_overlay_init(dev);
 
@@ -126,8 +124,9 @@
 {
 	struct nv04_display *disp = nv04_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct drm_encoder *encoder;
+	struct nouveau_encoder *encoder;
 	struct drm_crtc *crtc;
+	struct nouveau_crtc *nv_crtc;
 
 	/* Turn every CRTC off. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -139,14 +138,11 @@
 	}
 
 	/* Restore state */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_restore(&encoder->base.base);
 
-		func->restore(encoder);
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->restore(crtc);
+	list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head)
+		nv_crtc->restore(&nv_crtc->base);
 
 	nouveau_hw_save_vga_fonts(dev, 0);
 
@@ -159,8 +155,8 @@
 int
 nv04_display_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder;
-	struct drm_crtc *crtc;
+	struct nouveau_encoder *encoder;
+	struct nouveau_crtc *crtc;
 
 	/* meh.. modeset apparently doesn't setup all the regs and depends
 	 * on pre-existing state, for now load the state of the card *before*
@@ -170,14 +166,11 @@
 	 * save/restore "pre-load" state, but more general so we can save
 	 * on suspend too.
 	 */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		crtc->save(&crtc->base);
 
-		func->restore(encoder);
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->restore(crtc);
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_save(&encoder->base.base);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 5345eb5..54e9fb9e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
@@ -192,8 +192,6 @@
 
 static const struct drm_encoder_helper_funcs nv04_tv_helper_funcs = {
 	.dpms = nv04_tv_dpms,
-	.save = drm_i2c_encoder_save,
-	.restore = drm_i2c_encoder_restore,
 	.mode_fixup = drm_i2c_encoder_mode_fixup,
 	.prepare = nv04_tv_prepare,
 	.commit = nv04_tv_commit,
@@ -225,9 +223,13 @@
 	/* Initialize the common members */
 	encoder = to_drm_encoder(nv_encoder);
 
-	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, &nv04_tv_helper_funcs);
 
+	nv_encoder->enc_save = drm_i2c_encoder_save;
+	nv_encoder->enc_restore = drm_i2c_encoder_restore;
+
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 	nv_encoder->dcb = entry;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index b734195..d9644c0 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -771,8 +771,6 @@
 
 static struct drm_encoder_helper_funcs nv17_tv_helper_funcs = {
 	.dpms = nv17_tv_dpms,
-	.save = nv17_tv_save,
-	.restore = nv17_tv_restore,
 	.mode_fixup = nv17_tv_mode_fixup,
 	.prepare = nv17_tv_prepare,
 	.commit = nv17_tv_commit,
@@ -816,10 +814,14 @@
 	tv_enc->base.dcb = entry;
 	tv_enc->base.or = ffs(entry->or) - 1;
 
-	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
 	to_encoder_slave(encoder)->slave_funcs = &nv17_tv_slave_funcs;
 
+	tv_enc->base.enc_save = nv17_tv_save;
+	tv_enc->base.enc_restore = nv17_tv_restore;
+
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
index 28bc202..40f845e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
@@ -7,6 +7,7 @@
 	const struct nvkm_instmem_func *func;
 	struct nvkm_subdev subdev;
 
+	spinlock_t lock;
 	struct list_head list;
 	u32 reserved;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 8b8332e..d5e6938 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -367,6 +367,7 @@
 		return -ENODEV;
 	}
 	obj = (union acpi_object *)buffer.pointer;
+	len = min(len, (int)obj->buffer.length);
 	memcpy(bios+offset, obj->buffer.pointer, len);
 	kfree(buffer.pointer);
 	return len;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 2e7cbe9..5dd1d01 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -898,8 +898,6 @@
 static const struct drm_connector_funcs
 nouveau_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -910,8 +908,6 @@
 static const struct drm_connector_funcs
 nouveau_connector_funcs_lvds = {
 	.dpms = drm_helper_connector_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect_lvds,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -944,8 +940,6 @@
 static const struct drm_connector_funcs
 nouveau_connector_funcs_dp = {
 	.dpms = nouveau_connector_dp_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index f19cb1c..863f10b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -73,6 +73,9 @@
 	int (*set_dither)(struct nouveau_crtc *crtc, bool update);
 	int (*set_scale)(struct nouveau_crtc *crtc, bool update);
 	int (*set_color_vibrance)(struct nouveau_crtc *crtc, bool update);
+
+	void (*save)(struct drm_crtc *crtc);
+	void (*restore)(struct drm_crtc *crtc);
 };
 
 static inline struct nouveau_crtc *nouveau_crtc(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index db6bc67..18676b8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -246,7 +246,7 @@
 int
 nouveau_framebuffer_init(struct drm_device *dev,
 			 struct nouveau_framebuffer *nv_fb,
-			 struct drm_mode_fb_cmd2 *mode_cmd,
+			 const struct drm_mode_fb_cmd2 *mode_cmd,
 			 struct nouveau_bo *nvbo)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
@@ -272,7 +272,7 @@
 static struct drm_framebuffer *
 nouveau_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *file_priv,
-				struct drm_mode_fb_cmd2 *mode_cmd)
+				const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct nouveau_framebuffer *nouveau_fb;
 	struct drm_gem_object *gem;
@@ -829,7 +829,6 @@
 	struct drm_device *dev = drm->dev;
 	struct nouveau_page_flip_state *s;
 	unsigned long flags;
-	int crtcid = -1;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
@@ -841,15 +840,19 @@
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
 	if (s->event) {
-		/* Vblank timestamps/counts are only correct on >= NV-50 */
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
-			crtcid = s->crtc;
+		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+			drm_arm_vblank_event(dev, s->crtc, s->event);
+		} else {
+			drm_send_vblank_event(dev, s->crtc, s->event);
 
-		drm_send_vblank_event(dev, crtcid, s->event);
+			/* Give up ownership of vblank for page-flipped crtc */
+			drm_vblank_put(dev, s->crtc);
+		}
 	}
-
-	/* Give up ownership of vblank for page-flipped crtc */
-	drm_vblank_put(dev, s->crtc);
+	else {
+		/* Give up ownership of vblank for page-flipped crtc */
+		drm_vblank_put(dev, s->crtc);
+	}
 
 	list_del(&s->head);
 	if (ps)
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index 856abe0..5a57d8b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -23,7 +23,7 @@
 }
 
 int nouveau_framebuffer_init(struct drm_device *, struct nouveau_framebuffer *,
-			     struct drm_mode_fb_cmd2 *, struct nouveau_bo *);
+			     const struct drm_mode_fb_cmd2 *, struct nouveau_bo *);
 
 struct nouveau_page_flip_state {
 	struct list_head head;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 3050042e..a02813e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -39,6 +39,7 @@
 
 #include <nvif/client.h>
 #include <nvif/device.h>
+#include <nvif/ioctl.h>
 
 #include <drmP.h>
 
@@ -65,9 +66,10 @@
 };
 
 enum nouveau_drm_object_route {
-	NVDRM_OBJECT_NVIF = 0,
+	NVDRM_OBJECT_NVIF = NVIF_IOCTL_V0_OWNER_NVIF,
 	NVDRM_OBJECT_USIF,
 	NVDRM_OBJECT_ABI16,
+	NVDRM_OBJECT_ANY = NVIF_IOCTL_V0_OWNER_ANY,
 };
 
 enum nouveau_drm_notify_route {
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index b37da95..c38a864 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -63,6 +63,9 @@
 			u32 datarate;
 		} dp;
 	};
+
+	void (*enc_save)(struct drm_encoder *encoder);
+	void (*enc_restore)(struct drm_encoder *encoder);
 };
 
 struct nouveau_encoder *
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 1e2e9e2..ca77ad0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -34,7 +34,6 @@
 struct nouveau_fbdev {
 	struct drm_fb_helper helper;
 	struct nouveau_framebuffer nouveau_fb;
-	struct list_head fbdev_list;
 	struct drm_device *dev;
 	unsigned int saved_flags;
 	struct nvif_object surf2d;
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 89dc4ce..6ae1b34 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -313,7 +313,10 @@
 	if (nvif_unpack(argv->v0, 0, 0, true)) {
 		/* block access to objects not created via this interface */
 		owner = argv->v0.owner;
-		argv->v0.owner = NVDRM_OBJECT_USIF;
+		if (argv->v0.object == 0ULL)
+			argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
+		else
+			argv->v0.owner = NVDRM_OBJECT_USIF;
 	} else
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index c053c50..44e1952 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -28,6 +28,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include <nvif/class.h>
 
@@ -1717,7 +1718,7 @@
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2125,7 +2126,7 @@
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2305,7 +2306,7 @@
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_pior_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index e3c783d..caf22b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -279,6 +279,12 @@
 };
 
 static const struct nvkm_device_pci_vendor
+nvkm_device_pci_10de_0fe4[] = {
+	{ 0x144d, 0xc740, NULL, { .War00C800_0 = true } },
+	{}
+};
+
+static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_104b[] = {
 	{ 0x1043, 0x844c, "GeForce GT 625" },
 	{ 0x1043, 0x846b, "GeForce GT 625" },
@@ -689,6 +695,12 @@
 };
 
 static const struct nvkm_device_pci_vendor
+nvkm_device_pci_10de_11e0[] = {
+	{ 0x1558, 0x5106, NULL, { .War00C800_0 = true } },
+	{}
+};
+
+static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_11e3[] = {
 	{ 0x17aa, 0x3683, "GeForce GTX 760A" },
 	{}
@@ -1370,7 +1382,7 @@
 	{ 0x0fe1, "GeForce GT 730M" },
 	{ 0x0fe2, "GeForce GT 745M" },
 	{ 0x0fe3, "GeForce GT 745M", nvkm_device_pci_10de_0fe3 },
-	{ 0x0fe4, "GeForce GT 750M" },
+	{ 0x0fe4, "GeForce GT 750M", nvkm_device_pci_10de_0fe4 },
 	{ 0x0fe9, "GeForce GT 750M" },
 	{ 0x0fea, "GeForce GT 755M" },
 	{ 0x0fec, "GeForce 710A" },
@@ -1485,7 +1497,7 @@
 	{ 0x11c6, "GeForce GTX 650 Ti" },
 	{ 0x11c8, "GeForce GTX 650" },
 	{ 0x11cb, "GeForce GT 740" },
-	{ 0x11e0, "GeForce GTX 770M" },
+	{ 0x11e0, "GeForce GTX 770M", nvkm_device_pci_10de_11e0 },
 	{ 0x11e1, "GeForce GTX 765M" },
 	{ 0x11e2, "GeForce GTX 765M" },
 	{ 0x11e3, "GeForce GTX 760M", nvkm_device_pci_10de_11e3 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index b5b8759..74de7a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -207,6 +207,8 @@
 			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+				continue;
 			mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
 			mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
index 194afe9..7dacb3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
@@ -52,10 +52,12 @@
 #endif
 
 #ifdef INCLUDE_CODE
+#define gpc_addr(reg,addr)                                                    /*
+*/	imm32(reg,addr)                                                       /*
+*/	or reg NV_PGRAPH_GPCX_GPCCS_MMIO_CTRL_BASE_ENABLE
 #define gpc_wr32(addr,reg)                                                    /*
+*/	gpc_addr($r14,addr)                                                   /*
 */	mov b32 $r15 reg                                                      /*
-*/	imm32($r14, addr)                                                     /*
-*/	or $r14 NV_PGRAPH_GPCX_GPCCS_MMIO_CTRL_BASE_ENABLE                    /*
 */	call(nv_wr32)
 
 // reports an exception to the host
@@ -161,7 +163,7 @@
 
 #if NV_PGRAPH_GPCX_UNK__SIZE > 0
 	// figure out which, and how many, UNKs are actually present
-	imm32($r14, 0x500c30)
+	gpc_addr($r14, 0x500c30)
 	clear b32 $r2
 	clear b32 $r3
 	clear b32 $r4
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
index 64d07df..bb820ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
@@ -314,7 +314,7 @@
 	0x03f01200,
 	0x0002d000,
 	0x17f104bd,
-	0x10fe0542,
+	0x10fe0545,
 	0x0007f100,
 	0x0003f007,
 	0xbd0000d0,
@@ -338,184 +338,184 @@
 	0x02d00103,
 	0xf104bd00,
 	0xf00c30e7,
-	0x24bd50e3,
-	0x44bd34bd,
-/* 0x0430: init_unk_loop */
-	0xb06821f4,
-	0x0bf400f6,
-	0x01f7f00f,
-	0xfd04f2bb,
-	0x30b6054f,
-/* 0x0445: init_unk_next */
-	0x0120b601,
-	0xb004e0b6,
-	0x1bf40126,
-/* 0x0451: init_unk_done */
-	0x070380e2,
-	0xf1080480,
-	0xf0010027,
-	0x22cf0223,
-	0x9534bd00,
-	0x07f10825,
-	0x03f0c000,
-	0x0005d001,
-	0x07f104bd,
-	0x03f0c100,
-	0x0005d001,
-	0x0e9804bd,
-	0x010f9800,
-	0x015021f5,
-	0xbb002fbb,
-	0x0e98003f,
-	0x020f9801,
-	0x015021f5,
-	0xfd050e98,
-	0x2ebb00ef,
-	0x003ebb00,
-	0x98020e98,
-	0x21f5030f,
-	0x0e980150,
-	0x00effd07,
-	0xbb002ebb,
-	0x35b6003e,
-	0x0007f102,
-	0x0103f0d3,
-	0xbd0003d0,
-	0x0825b604,
-	0xb60635b6,
-	0x30b60120,
-	0x0824b601,
-	0xb90834b6,
-	0x21f5022f,
-	0x2fbb02d3,
-	0x003fbb00,
-	0x010007f1,
-	0xd00203f0,
+	0xe5f050e3,
+	0xbd24bd01,
+/* 0x0433: init_unk_loop */
+	0xf444bd34,
+	0xf6b06821,
+	0x0f0bf400,
+	0xbb01f7f0,
+	0x4ffd04f2,
+	0x0130b605,
+/* 0x0448: init_unk_next */
+	0xb60120b6,
+	0x26b004e0,
+	0xe21bf401,
+/* 0x0454: init_unk_done */
+	0x80070380,
+	0x27f10804,
+	0x23f00100,
+	0x0022cf02,
+	0x259534bd,
+	0x0007f108,
+	0x0103f0c0,
+	0xbd0005d0,
+	0x0007f104,
+	0x0103f0c1,
+	0xbd0005d0,
+	0x000e9804,
+	0xf5010f98,
+	0xbb015021,
+	0x3fbb002f,
+	0x010e9800,
+	0xf5020f98,
+	0x98015021,
+	0xeffd050e,
+	0x002ebb00,
+	0x98003ebb,
+	0x0f98020e,
+	0x5021f503,
+	0x070e9801,
+	0xbb00effd,
+	0x3ebb002e,
+	0x0235b600,
+	0xd30007f1,
+	0xd00103f0,
 	0x04bd0003,
-	0x29f024bd,
-	0x0007f11f,
-	0x0203f008,
-	0xbd0002d0,
-/* 0x0505: main */
-	0x0031f404,
-	0xf00028f4,
-	0x21f424d7,
-	0xf401f439,
-	0xf404e4b0,
-	0x81fe1e18,
-	0x0627f001,
-	0x12fd20bd,
-	0x01e4b604,
-	0xfe051efd,
-	0x21f50018,
-	0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
-	0x10ef94d3,
-	0xf501f5f0,
-	0xf4037e21,
-/* 0x0542: ih */
-	0x80f9c60e,
-	0xf90188fe,
-	0xf990f980,
-	0xf9b0f9a0,
-	0xf9e0f9d0,
-	0xf104bdf0,
-	0xf00200a7,
-	0xaacf00a3,
-	0x04abc400,
-	0xf02c0bf4,
-	0xe7f124d7,
-	0xe3f01a00,
-	0x00eecf00,
-	0x1900f7f1,
-	0xcf00f3f0,
-	0x21f400ff,
-	0x01e7f004,
-	0x1d0007f1,
-	0xd00003f0,
-	0x04bd000e,
-/* 0x0590: ih_no_fifo */
-	0x010007f1,
-	0xd00003f0,
-	0x04bd000a,
-	0xe0fcf0fc,
-	0xb0fcd0fc,
-	0x90fca0fc,
-	0x88fe80fc,
-	0xf480fc00,
-	0x01f80032,
-/* 0x05b4: hub_barrier_done */
-	0x9801f7f0,
-	0xfebb040e,
-	0x02ffb904,
-	0x9418e7f1,
-	0xf440e3f0,
-	0x00f89d21,
-/* 0x05cc: ctx_redswitch */
-	0xf120f7f0,
+	0xb60825b6,
+	0x20b60635,
+	0x0130b601,
+	0xb60824b6,
+	0x2fb90834,
+	0xd321f502,
+	0x002fbb02,
+	0xf1003fbb,
+	0xf0010007,
+	0x03d00203,
+	0xbd04bd00,
+	0x1f29f024,
+	0x080007f1,
+	0xd00203f0,
+	0x04bd0002,
+/* 0x0508: main */
+	0xf40031f4,
+	0xd7f00028,
+	0x3921f424,
+	0xb0f401f4,
+	0x18f404e4,
+	0x0181fe1e,
+	0xbd0627f0,
+	0x0412fd20,
+	0xfd01e4b6,
+	0x18fe051e,
+	0xfd21f500,
+	0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+	0xf010ef94,
+	0x21f501f5,
+	0x0ef4037e,
+/* 0x0545: ih */
+	0xfe80f9c6,
+	0x80f90188,
+	0xa0f990f9,
+	0xd0f9b0f9,
+	0xf0f9e0f9,
+	0xa7f104bd,
+	0xa3f00200,
+	0x00aacf00,
+	0xf404abc4,
+	0xd7f02c0b,
+	0x00e7f124,
+	0x00e3f01a,
+	0xf100eecf,
+	0xf01900f7,
+	0xffcf00f3,
+	0x0421f400,
+	0xf101e7f0,
+	0xf01d0007,
+	0x0ed00003,
+/* 0x0593: ih_no_fifo */
+	0xf104bd00,
+	0xf0010007,
+	0x0ad00003,
+	0xfc04bd00,
+	0xfce0fcf0,
+	0xfcb0fcd0,
+	0xfc90fca0,
+	0x0088fe80,
+	0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+	0xf001f800,
+	0x0e9801f7,
+	0x04febb04,
+	0xf102ffb9,
+	0xf09418e7,
+	0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+	0xf000f89d,
+	0x07f120f7,
+	0x03f08500,
+	0x000fd001,
+	0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+	0x01e2b608,
+	0xf1fd1bf4,
+	0xf10800f5,
+	0xf10200f5,
 	0xf0850007,
 	0x0fd00103,
-	0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
-	0xe2b608e7,
-	0xfd1bf401,
-	0x0800f5f1,
-	0x0200f5f1,
-	0x850007f1,
-	0xd00103f0,
-	0x04bd000f,
-/* 0x05fa: ctx_xfer */
-	0x07f100f8,
-	0x03f08100,
-	0x000fd002,
-	0x11f404bd,
-	0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
-	0x6a21f505,
-	0xf124bd02,
-	0xf047fc07,
-	0x02d00203,
-	0xf004bd00,
-	0x20b6012c,
-	0xfc07f103,
-	0x0203f04a,
-	0xbd0002d0,
-	0x01acf004,
-	0xf102a5f0,
-	0xf00000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98000c,
-	0x00e7f001,
-	0x016f21f5,
-	0xf101acf0,
-	0xf04000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98010c,
-	0x060f9802,
-	0x0800e7f1,
-	0x016f21f5,
+	0xf804bd00,
+/* 0x05fd: ctx_xfer */
+	0x0007f100,
+	0x0203f081,
+	0xbd000fd0,
+	0x0711f404,
+	0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+	0x026a21f5,
+	0x07f124bd,
+	0x03f047fc,
+	0x0002d002,
+	0x2cf004bd,
+	0x0320b601,
+	0x4afc07f1,
+	0xd00203f0,
+	0x04bd0002,
 	0xf001acf0,
-	0xb7f104a5,
-	0xb3f03000,
+	0xb7f102a5,
+	0xb3f00000,
 	0x040c9850,
 	0xbb0fc4b6,
 	0x0c9800bc,
-	0x030d9802,
-	0xf1080f98,
-	0xf50200e7,
-	0xf5016f21,
-	0xf4025e21,
-	0x12f40601,
-/* 0x06a9: ctx_xfer_post */
-	0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
-	0xb421f502,
-	0x0000f805,
-	0x00000000,
+	0x010d9800,
+	0xf500e7f0,
+	0xf0016f21,
+	0xb7f101ac,
+	0xb3f04000,
+	0x040c9850,
+	0xbb0fc4b6,
+	0x0c9800bc,
+	0x020d9801,
+	0xf1060f98,
+	0xf50800e7,
+	0xf0016f21,
+	0xa5f001ac,
+	0x00b7f104,
+	0x50b3f030,
+	0xb6040c98,
+	0xbcbb0fc4,
+	0x020c9800,
+	0x98030d98,
+	0xe7f1080f,
+	0x21f50200,
+	0x21f5016f,
+	0x01f4025e,
+	0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+	0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+	0x05b721f5,
+	0x000000f8,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
index 2f59643..911976d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
@@ -314,7 +314,7 @@
 	0x03f01200,
 	0x0002d000,
 	0x17f104bd,
-	0x10fe0542,
+	0x10fe0545,
 	0x0007f100,
 	0x0003f007,
 	0xbd0000d0,
@@ -338,184 +338,184 @@
 	0x02d00103,
 	0xf104bd00,
 	0xf00c30e7,
-	0x24bd50e3,
-	0x44bd34bd,
-/* 0x0430: init_unk_loop */
-	0xb06821f4,
-	0x0bf400f6,
-	0x01f7f00f,
-	0xfd04f2bb,
-	0x30b6054f,
-/* 0x0445: init_unk_next */
-	0x0120b601,
-	0xb004e0b6,
-	0x1bf40126,
-/* 0x0451: init_unk_done */
-	0x070380e2,
-	0xf1080480,
-	0xf0010027,
-	0x22cf0223,
-	0x9534bd00,
-	0x07f10825,
-	0x03f0c000,
-	0x0005d001,
-	0x07f104bd,
-	0x03f0c100,
-	0x0005d001,
-	0x0e9804bd,
-	0x010f9800,
-	0x015021f5,
-	0xbb002fbb,
-	0x0e98003f,
-	0x020f9801,
-	0x015021f5,
-	0xfd050e98,
-	0x2ebb00ef,
-	0x003ebb00,
-	0x98020e98,
-	0x21f5030f,
-	0x0e980150,
-	0x00effd07,
-	0xbb002ebb,
-	0x35b6003e,
-	0x0007f102,
-	0x0103f0d3,
-	0xbd0003d0,
-	0x0825b604,
-	0xb60635b6,
-	0x30b60120,
-	0x0824b601,
-	0xb90834b6,
-	0x21f5022f,
-	0x2fbb02d3,
-	0x003fbb00,
-	0x010007f1,
-	0xd00203f0,
+	0xe5f050e3,
+	0xbd24bd01,
+/* 0x0433: init_unk_loop */
+	0xf444bd34,
+	0xf6b06821,
+	0x0f0bf400,
+	0xbb01f7f0,
+	0x4ffd04f2,
+	0x0130b605,
+/* 0x0448: init_unk_next */
+	0xb60120b6,
+	0x26b004e0,
+	0xe21bf401,
+/* 0x0454: init_unk_done */
+	0x80070380,
+	0x27f10804,
+	0x23f00100,
+	0x0022cf02,
+	0x259534bd,
+	0x0007f108,
+	0x0103f0c0,
+	0xbd0005d0,
+	0x0007f104,
+	0x0103f0c1,
+	0xbd0005d0,
+	0x000e9804,
+	0xf5010f98,
+	0xbb015021,
+	0x3fbb002f,
+	0x010e9800,
+	0xf5020f98,
+	0x98015021,
+	0xeffd050e,
+	0x002ebb00,
+	0x98003ebb,
+	0x0f98020e,
+	0x5021f503,
+	0x070e9801,
+	0xbb00effd,
+	0x3ebb002e,
+	0x0235b600,
+	0xd30007f1,
+	0xd00103f0,
 	0x04bd0003,
-	0x29f024bd,
-	0x0007f11f,
-	0x0203f008,
-	0xbd0002d0,
-/* 0x0505: main */
-	0x0031f404,
-	0xf00028f4,
-	0x21f424d7,
-	0xf401f439,
-	0xf404e4b0,
-	0x81fe1e18,
-	0x0627f001,
-	0x12fd20bd,
-	0x01e4b604,
-	0xfe051efd,
-	0x21f50018,
-	0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
-	0x10ef94d3,
-	0xf501f5f0,
-	0xf4037e21,
-/* 0x0542: ih */
-	0x80f9c60e,
-	0xf90188fe,
-	0xf990f980,
-	0xf9b0f9a0,
-	0xf9e0f9d0,
-	0xf104bdf0,
-	0xf00200a7,
-	0xaacf00a3,
-	0x04abc400,
-	0xf02c0bf4,
-	0xe7f124d7,
-	0xe3f01a00,
-	0x00eecf00,
-	0x1900f7f1,
-	0xcf00f3f0,
-	0x21f400ff,
-	0x01e7f004,
-	0x1d0007f1,
-	0xd00003f0,
-	0x04bd000e,
-/* 0x0590: ih_no_fifo */
-	0x010007f1,
-	0xd00003f0,
-	0x04bd000a,
-	0xe0fcf0fc,
-	0xb0fcd0fc,
-	0x90fca0fc,
-	0x88fe80fc,
-	0xf480fc00,
-	0x01f80032,
-/* 0x05b4: hub_barrier_done */
-	0x9801f7f0,
-	0xfebb040e,
-	0x02ffb904,
-	0x9418e7f1,
-	0xf440e3f0,
-	0x00f89d21,
-/* 0x05cc: ctx_redswitch */
-	0xf120f7f0,
+	0xb60825b6,
+	0x20b60635,
+	0x0130b601,
+	0xb60824b6,
+	0x2fb90834,
+	0xd321f502,
+	0x002fbb02,
+	0xf1003fbb,
+	0xf0010007,
+	0x03d00203,
+	0xbd04bd00,
+	0x1f29f024,
+	0x080007f1,
+	0xd00203f0,
+	0x04bd0002,
+/* 0x0508: main */
+	0xf40031f4,
+	0xd7f00028,
+	0x3921f424,
+	0xb0f401f4,
+	0x18f404e4,
+	0x0181fe1e,
+	0xbd0627f0,
+	0x0412fd20,
+	0xfd01e4b6,
+	0x18fe051e,
+	0xfd21f500,
+	0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+	0xf010ef94,
+	0x21f501f5,
+	0x0ef4037e,
+/* 0x0545: ih */
+	0xfe80f9c6,
+	0x80f90188,
+	0xa0f990f9,
+	0xd0f9b0f9,
+	0xf0f9e0f9,
+	0xa7f104bd,
+	0xa3f00200,
+	0x00aacf00,
+	0xf404abc4,
+	0xd7f02c0b,
+	0x00e7f124,
+	0x00e3f01a,
+	0xf100eecf,
+	0xf01900f7,
+	0xffcf00f3,
+	0x0421f400,
+	0xf101e7f0,
+	0xf01d0007,
+	0x0ed00003,
+/* 0x0593: ih_no_fifo */
+	0xf104bd00,
+	0xf0010007,
+	0x0ad00003,
+	0xfc04bd00,
+	0xfce0fcf0,
+	0xfcb0fcd0,
+	0xfc90fca0,
+	0x0088fe80,
+	0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+	0xf001f800,
+	0x0e9801f7,
+	0x04febb04,
+	0xf102ffb9,
+	0xf09418e7,
+	0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+	0xf000f89d,
+	0x07f120f7,
+	0x03f08500,
+	0x000fd001,
+	0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+	0x01e2b608,
+	0xf1fd1bf4,
+	0xf10800f5,
+	0xf10200f5,
 	0xf0850007,
 	0x0fd00103,
-	0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
-	0xe2b608e7,
-	0xfd1bf401,
-	0x0800f5f1,
-	0x0200f5f1,
-	0x850007f1,
-	0xd00103f0,
-	0x04bd000f,
-/* 0x05fa: ctx_xfer */
-	0x07f100f8,
-	0x03f08100,
-	0x000fd002,
-	0x11f404bd,
-	0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
-	0x6a21f505,
-	0xf124bd02,
-	0xf047fc07,
-	0x02d00203,
-	0xf004bd00,
-	0x20b6012c,
-	0xfc07f103,
-	0x0203f04a,
-	0xbd0002d0,
-	0x01acf004,
-	0xf102a5f0,
-	0xf00000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98000c,
-	0x00e7f001,
-	0x016f21f5,
-	0xf101acf0,
-	0xf04000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98010c,
-	0x060f9802,
-	0x0800e7f1,
-	0x016f21f5,
+	0xf804bd00,
+/* 0x05fd: ctx_xfer */
+	0x0007f100,
+	0x0203f081,
+	0xbd000fd0,
+	0x0711f404,
+	0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+	0x026a21f5,
+	0x07f124bd,
+	0x03f047fc,
+	0x0002d002,
+	0x2cf004bd,
+	0x0320b601,
+	0x4afc07f1,
+	0xd00203f0,
+	0x04bd0002,
 	0xf001acf0,
-	0xb7f104a5,
-	0xb3f03000,
+	0xb7f102a5,
+	0xb3f00000,
 	0x040c9850,
 	0xbb0fc4b6,
 	0x0c9800bc,
-	0x030d9802,
-	0xf1080f98,
-	0xf50200e7,
-	0xf5016f21,
-	0xf4025e21,
-	0x12f40601,
-/* 0x06a9: ctx_xfer_post */
-	0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
-	0xb421f502,
-	0x0000f805,
-	0x00000000,
+	0x010d9800,
+	0xf500e7f0,
+	0xf0016f21,
+	0xb7f101ac,
+	0xb3f04000,
+	0x040c9850,
+	0xbb0fc4b6,
+	0x0c9800bc,
+	0x020d9801,
+	0xf1060f98,
+	0xf50800e7,
+	0xf0016f21,
+	0xa5f001ac,
+	0x00b7f104,
+	0x50b3f030,
+	0xb6040c98,
+	0xbcbb0fc4,
+	0x020c9800,
+	0x98030d98,
+	0xe7f1080f,
+	0x21f50200,
+	0x21f5016f,
+	0x01f4025e,
+	0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+	0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+	0x05b721f5,
+	0x000000f8,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
index ee8e54d..1c6e11b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
@@ -314,7 +314,7 @@
 	0x03f01200,
 	0x0002d000,
 	0x17f104bd,
-	0x10fe0542,
+	0x10fe0545,
 	0x0007f100,
 	0x0003f007,
 	0xbd0000d0,
@@ -338,184 +338,184 @@
 	0x02d00103,
 	0xf104bd00,
 	0xf00c30e7,
-	0x24bd50e3,
-	0x44bd34bd,
-/* 0x0430: init_unk_loop */
-	0xb06821f4,
-	0x0bf400f6,
-	0x01f7f00f,
-	0xfd04f2bb,
-	0x30b6054f,
-/* 0x0445: init_unk_next */
-	0x0120b601,
-	0xb004e0b6,
-	0x1bf40226,
-/* 0x0451: init_unk_done */
-	0x070380e2,
-	0xf1080480,
-	0xf0010027,
-	0x22cf0223,
-	0x9534bd00,
-	0x07f10825,
-	0x03f0c000,
-	0x0005d001,
-	0x07f104bd,
-	0x03f0c100,
-	0x0005d001,
-	0x0e9804bd,
-	0x010f9800,
-	0x015021f5,
-	0xbb002fbb,
-	0x0e98003f,
-	0x020f9801,
-	0x015021f5,
-	0xfd050e98,
-	0x2ebb00ef,
-	0x003ebb00,
-	0x98020e98,
-	0x21f5030f,
-	0x0e980150,
-	0x00effd07,
-	0xbb002ebb,
-	0x35b6003e,
-	0x0007f102,
-	0x0103f0d3,
-	0xbd0003d0,
-	0x0825b604,
-	0xb60635b6,
-	0x30b60120,
-	0x0824b601,
-	0xb90834b6,
-	0x21f5022f,
-	0x2fbb02d3,
-	0x003fbb00,
-	0x010007f1,
-	0xd00203f0,
+	0xe5f050e3,
+	0xbd24bd01,
+/* 0x0433: init_unk_loop */
+	0xf444bd34,
+	0xf6b06821,
+	0x0f0bf400,
+	0xbb01f7f0,
+	0x4ffd04f2,
+	0x0130b605,
+/* 0x0448: init_unk_next */
+	0xb60120b6,
+	0x26b004e0,
+	0xe21bf402,
+/* 0x0454: init_unk_done */
+	0x80070380,
+	0x27f10804,
+	0x23f00100,
+	0x0022cf02,
+	0x259534bd,
+	0x0007f108,
+	0x0103f0c0,
+	0xbd0005d0,
+	0x0007f104,
+	0x0103f0c1,
+	0xbd0005d0,
+	0x000e9804,
+	0xf5010f98,
+	0xbb015021,
+	0x3fbb002f,
+	0x010e9800,
+	0xf5020f98,
+	0x98015021,
+	0xeffd050e,
+	0x002ebb00,
+	0x98003ebb,
+	0x0f98020e,
+	0x5021f503,
+	0x070e9801,
+	0xbb00effd,
+	0x3ebb002e,
+	0x0235b600,
+	0xd30007f1,
+	0xd00103f0,
 	0x04bd0003,
-	0x29f024bd,
-	0x0007f11f,
-	0x0203f030,
-	0xbd0002d0,
-/* 0x0505: main */
-	0x0031f404,
-	0xf00028f4,
-	0x21f424d7,
-	0xf401f439,
-	0xf404e4b0,
-	0x81fe1e18,
-	0x0627f001,
-	0x12fd20bd,
-	0x01e4b604,
-	0xfe051efd,
-	0x21f50018,
-	0x0ef405fa,
-/* 0x0535: main_not_ctx_xfer */
-	0x10ef94d3,
-	0xf501f5f0,
-	0xf4037e21,
-/* 0x0542: ih */
-	0x80f9c60e,
-	0xf90188fe,
-	0xf990f980,
-	0xf9b0f9a0,
-	0xf9e0f9d0,
-	0xf104bdf0,
-	0xf00200a7,
-	0xaacf00a3,
-	0x04abc400,
-	0xf02c0bf4,
-	0xe7f124d7,
-	0xe3f01a00,
-	0x00eecf00,
-	0x1900f7f1,
-	0xcf00f3f0,
-	0x21f400ff,
-	0x01e7f004,
-	0x1d0007f1,
-	0xd00003f0,
-	0x04bd000e,
-/* 0x0590: ih_no_fifo */
-	0x010007f1,
-	0xd00003f0,
-	0x04bd000a,
-	0xe0fcf0fc,
-	0xb0fcd0fc,
-	0x90fca0fc,
-	0x88fe80fc,
-	0xf480fc00,
-	0x01f80032,
-/* 0x05b4: hub_barrier_done */
-	0x9801f7f0,
-	0xfebb040e,
-	0x02ffb904,
-	0x9418e7f1,
-	0xf440e3f0,
-	0x00f89d21,
-/* 0x05cc: ctx_redswitch */
-	0xf120f7f0,
+	0xb60825b6,
+	0x20b60635,
+	0x0130b601,
+	0xb60824b6,
+	0x2fb90834,
+	0xd321f502,
+	0x002fbb02,
+	0xf1003fbb,
+	0xf0010007,
+	0x03d00203,
+	0xbd04bd00,
+	0x1f29f024,
+	0x300007f1,
+	0xd00203f0,
+	0x04bd0002,
+/* 0x0508: main */
+	0xf40031f4,
+	0xd7f00028,
+	0x3921f424,
+	0xb0f401f4,
+	0x18f404e4,
+	0x0181fe1e,
+	0xbd0627f0,
+	0x0412fd20,
+	0xfd01e4b6,
+	0x18fe051e,
+	0xfd21f500,
+	0xd30ef405,
+/* 0x0538: main_not_ctx_xfer */
+	0xf010ef94,
+	0x21f501f5,
+	0x0ef4037e,
+/* 0x0545: ih */
+	0xfe80f9c6,
+	0x80f90188,
+	0xa0f990f9,
+	0xd0f9b0f9,
+	0xf0f9e0f9,
+	0xa7f104bd,
+	0xa3f00200,
+	0x00aacf00,
+	0xf404abc4,
+	0xd7f02c0b,
+	0x00e7f124,
+	0x00e3f01a,
+	0xf100eecf,
+	0xf01900f7,
+	0xffcf00f3,
+	0x0421f400,
+	0xf101e7f0,
+	0xf01d0007,
+	0x0ed00003,
+/* 0x0593: ih_no_fifo */
+	0xf104bd00,
+	0xf0010007,
+	0x0ad00003,
+	0xfc04bd00,
+	0xfce0fcf0,
+	0xfcb0fcd0,
+	0xfc90fca0,
+	0x0088fe80,
+	0x32f480fc,
+/* 0x05b7: hub_barrier_done */
+	0xf001f800,
+	0x0e9801f7,
+	0x04febb04,
+	0xf102ffb9,
+	0xf09418e7,
+	0x21f440e3,
+/* 0x05cf: ctx_redswitch */
+	0xf000f89d,
+	0x07f120f7,
+	0x03f08500,
+	0x000fd001,
+	0xe7f004bd,
+/* 0x05e1: ctx_redswitch_delay */
+	0x01e2b608,
+	0xf1fd1bf4,
+	0xf10800f5,
+	0xf10200f5,
 	0xf0850007,
 	0x0fd00103,
-	0xf004bd00,
-/* 0x05de: ctx_redswitch_delay */
-	0xe2b608e7,
-	0xfd1bf401,
-	0x0800f5f1,
-	0x0200f5f1,
-	0x850007f1,
-	0xd00103f0,
-	0x04bd000f,
-/* 0x05fa: ctx_xfer */
-	0x07f100f8,
-	0x03f08100,
-	0x000fd002,
-	0x11f404bd,
-	0xcc21f507,
-/* 0x060d: ctx_xfer_not_load */
-	0x6a21f505,
-	0xf124bd02,
-	0xf047fc07,
-	0x02d00203,
-	0xf004bd00,
-	0x20b6012c,
-	0xfc07f103,
-	0x0203f04a,
-	0xbd0002d0,
-	0x01acf004,
-	0xf102a5f0,
-	0xf00000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98000c,
-	0x00e7f001,
-	0x016f21f5,
-	0xf101acf0,
-	0xf04000b7,
-	0x0c9850b3,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98010c,
-	0x060f9802,
-	0x0800e7f1,
-	0x016f21f5,
+	0xf804bd00,
+/* 0x05fd: ctx_xfer */
+	0x0007f100,
+	0x0203f081,
+	0xbd000fd0,
+	0x0711f404,
+	0x05cf21f5,
+/* 0x0610: ctx_xfer_not_load */
+	0x026a21f5,
+	0x07f124bd,
+	0x03f047fc,
+	0x0002d002,
+	0x2cf004bd,
+	0x0320b601,
+	0x4afc07f1,
+	0xd00203f0,
+	0x04bd0002,
 	0xf001acf0,
-	0xb7f104a5,
-	0xb3f03000,
+	0xb7f102a5,
+	0xb3f00000,
 	0x040c9850,
 	0xbb0fc4b6,
 	0x0c9800bc,
-	0x030d9802,
-	0xf1080f98,
-	0xf50200e7,
-	0xf5016f21,
-	0xf4025e21,
-	0x12f40601,
-/* 0x06a9: ctx_xfer_post */
-	0x7f21f507,
-/* 0x06ad: ctx_xfer_done */
-	0xb421f502,
-	0x0000f805,
-	0x00000000,
+	0x010d9800,
+	0xf500e7f0,
+	0xf0016f21,
+	0xb7f101ac,
+	0xb3f04000,
+	0x040c9850,
+	0xbb0fc4b6,
+	0x0c9800bc,
+	0x020d9801,
+	0xf1060f98,
+	0xf50800e7,
+	0xf0016f21,
+	0xa5f001ac,
+	0x00b7f104,
+	0x50b3f030,
+	0xb6040c98,
+	0xbcbb0fc4,
+	0x020c9800,
+	0x98030d98,
+	0xe7f1080f,
+	0x21f50200,
+	0x21f5016f,
+	0x01f4025e,
+	0x0712f406,
+/* 0x06ac: ctx_xfer_post */
+	0x027f21f5,
+/* 0x06b0: ctx_xfer_done */
+	0x05b721f5,
+	0x000000f8,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
index fbcc342..84af7ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
@@ -276,7 +276,7 @@
 	0x02020014,
 	0xf6120040,
 	0x04bd0002,
-	0xfe048141,
+	0xfe048441,
 	0x00400010,
 	0x0000f607,
 	0x040204bd,
@@ -295,165 +295,165 @@
 	0x01c90080,
 	0xbd0002f6,
 	0x0c308e04,
-	0xbd24bd50,
-/* 0x0383: init_unk_loop */
-	0x7e44bd34,
-	0xb0000065,
-	0x0bf400f6,
-	0xbb010f0e,
-	0x4ffd04f2,
-	0x0130b605,
-/* 0x0398: init_unk_next */
-	0xb60120b6,
-	0x26b004e0,
-	0xe21bf401,
-/* 0x03a4: init_unk_done */
-	0xb50703b5,
-	0x00820804,
-	0x22cf0201,
-	0x9534bd00,
-	0x00800825,
-	0x05f601c0,
-	0x8004bd00,
-	0xf601c100,
-	0x04bd0005,
-	0x98000e98,
-	0x207e010f,
-	0x2fbb0001,
-	0x003fbb00,
-	0x98010e98,
-	0x207e020f,
-	0x0e980001,
-	0x00effd05,
-	0xbb002ebb,
-	0x0e98003e,
-	0x030f9802,
-	0x0001207e,
-	0xfd070e98,
-	0x2ebb00ef,
-	0x003ebb00,
-	0x800235b6,
-	0xf601d300,
-	0x04bd0003,
-	0xb60825b6,
-	0x20b60635,
-	0x0130b601,
-	0xb60824b6,
-	0x2fb20834,
-	0x0002687e,
-	0xbb002fbb,
-	0x0080003f,
-	0x03f60201,
-	0xbd04bd00,
-	0x1f29f024,
-	0x02300080,
-	0xbd0002f6,
-/* 0x0445: main */
-	0x0031f404,
-	0x0d0028f4,
-	0x00377e24,
-	0xf401f400,
-	0xf404e4b0,
-	0x81fe1d18,
-	0xbd060201,
-	0x0412fd20,
-	0xfd01e4b6,
-	0x18fe051e,
-	0x05187e00,
-	0xd40ef400,
-/* 0x0474: main_not_ctx_xfer */
-	0xf010ef94,
-	0xf87e01f5,
-	0x0ef40002,
-/* 0x0481: ih */
-	0xfe80f9c7,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0x004a04bd,
-	0x00aacf02,
-	0xf404abc4,
-	0x240d1f0b,
-	0xcf1a004e,
-	0x004f00ee,
-	0x00ffcf19,
-	0x0000047e,
-	0x0040010e,
-	0x000ef61d,
-/* 0x04be: ih_no_fifo */
-	0x004004bd,
-	0x000af601,
-	0xf0fc04bd,
-	0xd0fce0fc,
-	0xa0fcb0fc,
-	0x80fc90fc,
-	0xfc0088fe,
-	0x0032f480,
-/* 0x04de: hub_barrier_done */
-	0x010f01f8,
-	0xbb040e98,
-	0xffb204fe,
-	0x4094188e,
-	0x00008f7e,
-/* 0x04f2: ctx_redswitch */
-	0x200f00f8,
+	0x01e5f050,
+	0x34bd24bd,
+/* 0x0386: init_unk_loop */
+	0x657e44bd,
+	0xf6b00000,
+	0x0e0bf400,
+	0xf2bb010f,
+	0x054ffd04,
+/* 0x039b: init_unk_next */
+	0xb60130b6,
+	0xe0b60120,
+	0x0126b004,
+/* 0x03a7: init_unk_done */
+	0xb5e21bf4,
+	0x04b50703,
+	0x01008208,
+	0x0022cf02,
+	0x259534bd,
+	0xc0008008,
+	0x0005f601,
+	0x008004bd,
+	0x05f601c1,
+	0x9804bd00,
+	0x0f98000e,
+	0x01207e01,
+	0x002fbb00,
+	0x98003fbb,
+	0x0f98010e,
+	0x01207e02,
+	0x050e9800,
+	0xbb00effd,
+	0x3ebb002e,
+	0x020e9800,
+	0x7e030f98,
+	0x98000120,
+	0xeffd070e,
+	0x002ebb00,
+	0xb6003ebb,
+	0x00800235,
+	0x03f601d3,
+	0xb604bd00,
+	0x35b60825,
+	0x0120b606,
+	0xb60130b6,
+	0x34b60824,
+	0x7e2fb208,
+	0xbb000268,
+	0x3fbb002f,
+	0x01008000,
+	0x0003f602,
+	0x24bd04bd,
+	0x801f29f0,
+	0xf6023000,
+	0x04bd0002,
+/* 0x0448: main */
+	0xf40031f4,
+	0x240d0028,
+	0x0000377e,
+	0xb0f401f4,
+	0x18f404e4,
+	0x0181fe1d,
+	0x20bd0602,
+	0xb60412fd,
+	0x1efd01e4,
+	0x0018fe05,
+	0x00051b7e,
+/* 0x0477: main_not_ctx_xfer */
+	0x94d40ef4,
+	0xf5f010ef,
+	0x02f87e01,
+	0xc70ef400,
+/* 0x0484: ih */
+	0x88fe80f9,
+	0xf980f901,
+	0xf9a0f990,
+	0xf9d0f9b0,
+	0xbdf0f9e0,
+	0x02004a04,
+	0xc400aacf,
+	0x0bf404ab,
+	0x4e240d1f,
+	0xeecf1a00,
+	0x19004f00,
+	0x7e00ffcf,
+	0x0e000004,
+	0x1d004001,
+	0xbd000ef6,
+/* 0x04c1: ih_no_fifo */
+	0x01004004,
+	0xbd000af6,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0xf80032f4,
+/* 0x04e1: hub_barrier_done */
+	0x98010f01,
+	0xfebb040e,
+	0x8effb204,
+	0x7e409418,
+	0xf800008f,
+/* 0x04f5: ctx_redswitch */
+	0x80200f00,
+	0xf6018500,
+	0x04bd000f,
+/* 0x0502: ctx_redswitch_delay */
+	0xe2b6080e,
+	0xfd1bf401,
+	0x0800f5f1,
+	0x0200f5f1,
 	0x01850080,
 	0xbd000ff6,
-/* 0x04ff: ctx_redswitch_delay */
-	0xb6080e04,
-	0x1bf401e2,
-	0x00f5f1fd,
-	0x00f5f108,
-	0x85008002,
-	0x000ff601,
-	0x00f804bd,
-/* 0x0518: ctx_xfer */
-	0x02810080,
-	0xbd000ff6,
-	0x0711f404,
-	0x0004f27e,
-/* 0x0528: ctx_xfer_not_load */
-	0x0002167e,
-	0xfc8024bd,
-	0x02f60247,
-	0xf004bd00,
-	0x20b6012c,
-	0x4afc8003,
+/* 0x051b: ctx_xfer */
+	0x8000f804,
+	0xf6028100,
+	0x04bd000f,
+	0x7e0711f4,
+/* 0x052b: ctx_xfer_not_load */
+	0x7e0004f5,
+	0xbd000216,
+	0x47fc8024,
 	0x0002f602,
-	0xacf004bd,
-	0x02a5f001,
-	0x5000008b,
-	0xb6040c98,
-	0xbcbb0fc4,
-	0x000c9800,
-	0x0e010d98,
-	0x013d7e00,
-	0x01acf000,
-	0x5040008b,
-	0xb6040c98,
-	0xbcbb0fc4,
-	0x010c9800,
-	0x98020d98,
-	0x004e060f,
-	0x013d7e08,
-	0x01acf000,
-	0x8b04a5f0,
-	0x98503000,
+	0x2cf004bd,
+	0x0320b601,
+	0x024afc80,
+	0xbd0002f6,
+	0x01acf004,
+	0x8b02a5f0,
+	0x98500000,
 	0xc4b6040c,
 	0x00bcbb0f,
-	0x98020c98,
-	0x0f98030d,
-	0x02004e08,
+	0x98000c98,
+	0x000e010d,
 	0x00013d7e,
-	0x00020a7e,
-	0xf40601f4,
-/* 0x05b2: ctx_xfer_post */
-	0x277e0712,
-/* 0x05b6: ctx_xfer_done */
-	0xde7e0002,
-	0x00f80004,
-	0x00000000,
+	0x8b01acf0,
+	0x98504000,
+	0xc4b6040c,
+	0x00bcbb0f,
+	0x98010c98,
+	0x0f98020d,
+	0x08004e06,
+	0x00013d7e,
+	0xf001acf0,
+	0x008b04a5,
+	0x0c985030,
+	0x0fc4b604,
+	0x9800bcbb,
+	0x0d98020c,
+	0x080f9803,
+	0x7e02004e,
+	0x7e00013d,
+	0xf400020a,
+	0x12f40601,
+/* 0x05b5: ctx_xfer_post */
+	0x02277e07,
+/* 0x05b9: ctx_xfer_done */
+	0x04e17e00,
+	0x0000f800,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
index 51f5c3c..11bf363 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
@@ -289,7 +289,7 @@
 	0x020014fe,
 	0x12004002,
 	0xbd0002f6,
-	0x05b04104,
+	0x05b34104,
 	0x400010fe,
 	0x00f60700,
 	0x0204bd00,
@@ -308,259 +308,259 @@
 	0xc900800f,
 	0x0002f601,
 	0x308e04bd,
-	0x24bd500c,
-	0x44bd34bd,
-/* 0x03b0: init_unk_loop */
-	0x0000657e,
-	0xf400f6b0,
-	0x010f0e0b,
-	0xfd04f2bb,
-	0x30b6054f,
-/* 0x03c5: init_unk_next */
-	0x0120b601,
-	0xb004e0b6,
-	0x1bf40226,
-/* 0x03d1: init_unk_done */
-	0x0703b5e2,
-	0x820804b5,
-	0xcf020100,
-	0x34bd0022,
-	0x80082595,
-	0xf601c000,
+	0xe5f0500c,
+	0xbd24bd01,
+/* 0x03b3: init_unk_loop */
+	0x7e44bd34,
+	0xb0000065,
+	0x0bf400f6,
+	0xbb010f0e,
+	0x4ffd04f2,
+	0x0130b605,
+/* 0x03c8: init_unk_next */
+	0xb60120b6,
+	0x26b004e0,
+	0xe21bf402,
+/* 0x03d4: init_unk_done */
+	0xb50703b5,
+	0x00820804,
+	0x22cf0201,
+	0x9534bd00,
+	0x00800825,
+	0x05f601c0,
+	0x8004bd00,
+	0xf601c100,
 	0x04bd0005,
-	0x01c10080,
-	0xbd0005f6,
-	0x000e9804,
-	0x7e010f98,
-	0xbb000120,
-	0x3fbb002f,
-	0x010e9800,
-	0x7e020f98,
-	0x98000120,
-	0xeffd050e,
-	0x002ebb00,
-	0x98003ebb,
-	0x0f98020e,
-	0x01207e03,
-	0x070e9800,
-	0xbb00effd,
-	0x3ebb002e,
-	0x0235b600,
-	0x01d30080,
-	0xbd0003f6,
-	0x0825b604,
-	0xb60635b6,
-	0x30b60120,
-	0x0824b601,
-	0xb20834b6,
-	0x02687e2f,
-	0x002fbb00,
-	0x0f003fbb,
-	0x8effb23f,
-	0xf0501d60,
-	0x8f7e01e5,
-	0x0c0f0000,
-	0xa88effb2,
-	0xe5f0501d,
-	0x008f7e01,
-	0x03147e00,
-	0xb23f0f00,
-	0x1d608eff,
-	0x01e5f050,
-	0x00008f7e,
-	0xffb2000f,
-	0x501d9c8e,
-	0x7e01e5f0,
-	0x0f00008f,
-	0x03147e01,
-	0x8effb200,
+	0x98000e98,
+	0x207e010f,
+	0x2fbb0001,
+	0x003fbb00,
+	0x98010e98,
+	0x207e020f,
+	0x0e980001,
+	0x00effd05,
+	0xbb002ebb,
+	0x0e98003e,
+	0x030f9802,
+	0x0001207e,
+	0xfd070e98,
+	0x2ebb00ef,
+	0x003ebb00,
+	0x800235b6,
+	0xf601d300,
+	0x04bd0003,
+	0xb60825b6,
+	0x20b60635,
+	0x0130b601,
+	0xb60824b6,
+	0x2fb20834,
+	0x0002687e,
+	0xbb002fbb,
+	0x3f0f003f,
+	0x501d608e,
+	0xb201e5f0,
+	0x008f7eff,
+	0x8e0c0f00,
 	0xf0501da8,
-	0x8f7e01e5,
-	0xff0f0000,
-	0x988effb2,
-	0xe5f0501d,
-	0x008f7e01,
-	0xb2020f00,
-	0x1da88eff,
-	0x01e5f050,
+	0xffb201e5,
 	0x00008f7e,
 	0x0003147e,
-	0x85050498,
-	0x98504000,
-	0x64b60406,
-	0x0056bb0f,
-/* 0x04e0: tpc_strand_init_tpc_loop */
-	0x05705eb8,
-	0x00657e00,
-	0xbdf6b200,
-/* 0x04ed: tpc_strand_init_idx_loop */
-	0x605eb874,
-	0x7fb20005,
-	0x00008f7e,
-	0x05885eb8,
-	0x082f9500,
-	0x00008f7e,
-	0x058c5eb8,
-	0x082f9500,
-	0x00008f7e,
-	0x05905eb8,
-	0x00657e00,
-	0x06f5b600,
-	0xb601f0b6,
-	0x2fbb08f4,
-	0x003fbb00,
-	0xb60170b6,
-	0x1bf40162,
-	0x0050b7bf,
-	0x0142b608,
-	0x0fa81bf4,
-	0x8effb23f,
-	0xf0501d60,
-	0x8f7e01e5,
-	0x0d0f0000,
-	0xa88effb2,
+	0x608e3f0f,
 	0xe5f0501d,
-	0x008f7e01,
-	0x03147e00,
-	0x01008000,
-	0x0003f602,
-	0x24bd04bd,
-	0x801f29f0,
-	0xf6023000,
-	0x04bd0002,
-/* 0x0574: main */
-	0xf40031f4,
-	0x240d0028,
-	0x0000377e,
-	0xb0f401f4,
-	0x18f404e4,
-	0x0181fe1d,
-	0x20bd0602,
-	0xb60412fd,
-	0x1efd01e4,
-	0x0018fe05,
-	0x0006477e,
-/* 0x05a3: main_not_ctx_xfer */
-	0x94d40ef4,
-	0xf5f010ef,
-	0x02f87e01,
-	0xc70ef400,
-/* 0x05b0: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x02004a04,
-	0xc400aacf,
-	0x0bf404ab,
-	0x4e240d1f,
-	0xeecf1a00,
-	0x19004f00,
-	0x7e00ffcf,
-	0x0e000004,
-	0x1d004001,
-	0xbd000ef6,
-/* 0x05ed: ih_no_fifo */
-	0x01004004,
-	0xbd000af6,
-	0xfcf0fc04,
-	0xfcd0fce0,
-	0xfca0fcb0,
-	0xfe80fc90,
-	0x80fc0088,
-	0xf80032f4,
-/* 0x060d: hub_barrier_done */
-	0x98010f01,
-	0xfebb040e,
-	0x8effb204,
-	0x7e409418,
-	0xf800008f,
-/* 0x0621: ctx_redswitch */
-	0x80200f00,
+	0x7effb201,
+	0x0f00008f,
+	0x1d9c8e00,
+	0x01e5f050,
+	0x8f7effb2,
+	0x010f0000,
+	0x0003147e,
+	0x501da88e,
+	0xb201e5f0,
+	0x008f7eff,
+	0x8eff0f00,
+	0xf0501d98,
+	0xffb201e5,
+	0x00008f7e,
+	0xa88e020f,
+	0xe5f0501d,
+	0x7effb201,
+	0x7e00008f,
+	0x98000314,
+	0x00850504,
+	0x06985040,
+	0x0f64b604,
+/* 0x04e3: tpc_strand_init_tpc_loop */
+	0xb80056bb,
+	0x0005705e,
+	0x0000657e,
+	0x74bdf6b2,
+/* 0x04f0: tpc_strand_init_idx_loop */
+	0x05605eb8,
+	0x7e7fb200,
+	0xb800008f,
+	0x0005885e,
+	0x7e082f95,
+	0xb800008f,
+	0x00058c5e,
+	0x7e082f95,
+	0xb800008f,
+	0x0005905e,
+	0x0000657e,
+	0xb606f5b6,
+	0xf4b601f0,
+	0x002fbb08,
+	0xb6003fbb,
+	0x62b60170,
+	0xbf1bf401,
+	0x080050b7,
+	0xf40142b6,
+	0x3f0fa81b,
+	0x501d608e,
+	0xb201e5f0,
+	0x008f7eff,
+	0x8e0d0f00,
+	0xf0501da8,
+	0xffb201e5,
+	0x00008f7e,
+	0x0003147e,
+	0x02010080,
+	0xbd0003f6,
+	0xf024bd04,
+	0x00801f29,
+	0x02f60230,
+/* 0x0577: main */
+	0xf404bd00,
+	0x28f40031,
+	0x7e240d00,
+	0xf4000037,
+	0xe4b0f401,
+	0x1d18f404,
+	0x020181fe,
+	0xfd20bd06,
+	0xe4b60412,
+	0x051efd01,
+	0x7e0018fe,
+	0xf400064a,
+/* 0x05a6: main_not_ctx_xfer */
+	0xef94d40e,
+	0x01f5f010,
+	0x0002f87e,
+/* 0x05b3: ih */
+	0xf9c70ef4,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0xcf02004a,
+	0xabc400aa,
+	0x1f0bf404,
+	0x004e240d,
+	0x00eecf1a,
+	0xcf19004f,
+	0x047e00ff,
+	0x010e0000,
+	0xf61d0040,
+	0x04bd000e,
+/* 0x05f0: ih_no_fifo */
+	0xf6010040,
+	0x04bd000a,
+	0xe0fcf0fc,
+	0xb0fcd0fc,
+	0x90fca0fc,
+	0x88fe80fc,
+	0xf480fc00,
+	0x01f80032,
+/* 0x0610: hub_barrier_done */
+	0x0e98010f,
+	0x04febb04,
+	0x188effb2,
+	0x8f7e4094,
+	0x00f80000,
+/* 0x0624: ctx_redswitch */
+	0x0080200f,
+	0x0ff60185,
+	0x0e04bd00,
+/* 0x0631: ctx_redswitch_delay */
+	0x01e2b608,
+	0xf1fd1bf4,
+	0xf10800f5,
+	0x800200f5,
 	0xf6018500,
 	0x04bd000f,
-/* 0x062e: ctx_redswitch_delay */
-	0xe2b6080e,
-	0xfd1bf401,
-	0x0800f5f1,
-	0x0200f5f1,
-	0x01850080,
-	0xbd000ff6,
-/* 0x0647: ctx_xfer */
-	0x8000f804,
-	0xf6028100,
-	0x04bd000f,
-	0xc48effb2,
-	0xe5f0501d,
-	0x008f7e01,
-	0x0711f400,
-	0x0006217e,
-/* 0x0664: ctx_xfer_not_load */
-	0x0002167e,
-	0xfc8024bd,
-	0x02f60247,
-	0xf004bd00,
-	0x20b6012c,
-	0x4afc8003,
+/* 0x064a: ctx_xfer */
+	0x008000f8,
+	0x0ff60281,
+	0x8e04bd00,
+	0xf0501dc4,
+	0xffb201e5,
+	0x00008f7e,
+	0x7e0711f4,
+/* 0x0667: ctx_xfer_not_load */
+	0x7e000624,
+	0xbd000216,
+	0x47fc8024,
 	0x0002f602,
-	0x0c0f04bd,
-	0xa88effb2,
+	0x2cf004bd,
+	0x0320b601,
+	0x024afc80,
+	0xbd0002f6,
+	0x8e0c0f04,
+	0xf0501da8,
+	0xffb201e5,
+	0x00008f7e,
+	0x0003147e,
+	0x608e3f0f,
 	0xe5f0501d,
-	0x008f7e01,
-	0x03147e00,
-	0xb23f0f00,
-	0x1d608eff,
-	0x01e5f050,
-	0x00008f7e,
-	0xffb2000f,
-	0x501d9c8e,
-	0x7e01e5f0,
+	0x7effb201,
 	0x0f00008f,
-	0x03147e01,
-	0x01fcf000,
-	0xb203f0b6,
-	0x1da88eff,
+	0x1d9c8e00,
 	0x01e5f050,
-	0x00008f7e,
-	0xf001acf0,
-	0x008b02a5,
-	0x0c985000,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98000c,
-	0x7e000e01,
-	0xf000013d,
-	0x008b01ac,
-	0x0c985040,
-	0x0fc4b604,
-	0x9800bcbb,
-	0x0d98010c,
-	0x060f9802,
-	0x7e08004e,
-	0xf000013d,
+	0x8f7effb2,
+	0x010f0000,
+	0x0003147e,
+	0xb601fcf0,
+	0xa88e03f0,
+	0xe5f0501d,
+	0x7effb201,
+	0xf000008f,
 	0xa5f001ac,
-	0x30008b04,
+	0x00008b02,
 	0x040c9850,
 	0xbb0fc4b6,
 	0x0c9800bc,
-	0x030d9802,
-	0x4e080f98,
-	0x3d7e0200,
-	0x0a7e0001,
-	0x147e0002,
-	0x01f40003,
-	0x1a12f406,
-/* 0x073c: ctx_xfer_post */
-	0x0002277e,
-	0xffb20d0f,
-	0x501da88e,
-	0x7e01e5f0,
-	0x7e00008f,
-/* 0x0753: ctx_xfer_done */
-	0x7e000314,
-	0xf800060d,
-	0x00000000,
+	0x010d9800,
+	0x3d7e000e,
+	0xacf00001,
+	0x40008b01,
+	0x040c9850,
+	0xbb0fc4b6,
+	0x0c9800bc,
+	0x020d9801,
+	0x4e060f98,
+	0x3d7e0800,
+	0xacf00001,
+	0x04a5f001,
+	0x5030008b,
+	0xb6040c98,
+	0xbcbb0fc4,
+	0x020c9800,
+	0x98030d98,
+	0x004e080f,
+	0x013d7e02,
+	0x020a7e00,
+	0x03147e00,
+	0x0601f400,
+/* 0x073f: ctx_xfer_post */
+	0x7e1a12f4,
+	0x0f000227,
+	0x1da88e0d,
+	0x01e5f050,
+	0x8f7effb2,
+	0x147e0000,
+/* 0x0756: ctx_xfer_done */
+	0x107e0003,
+	0x00f80006,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index dda7a7d..9f5dfc8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -143,7 +143,7 @@
 static int
 gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
 {
-	struct gf100_gr *gr = (void *)object->engine;
+	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
 	union {
 		struct fermi_a_zbc_color_v0 v0;
 	} *args = data;
@@ -189,7 +189,7 @@
 static int
 gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
 {
-	struct gf100_gr *gr = (void *)object->engine;
+	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
 	union {
 		struct fermi_a_zbc_depth_v0 v0;
 	} *args = data;
@@ -1530,6 +1530,8 @@
 		gr->ppc_nr[i]  = gr->func->ppc_nr;
 		for (j = 0; j < gr->ppc_nr[i]; j++) {
 			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+			if (mask)
+				gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(mask);
 		}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 4611961..02e78b8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -97,6 +97,7 @@
 	u8 tpc_nr[GPC_MAX];
 	u8 tpc_total;
 	u8 ppc_nr[GPC_MAX];
+	u8 ppc_mask[GPC_MAX];
 	u8 ppc_tpc_nr[GPC_MAX][4];
 
 	struct nvkm_memory *unk4188b4;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
index 895ba74..1d7dd38 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
@@ -97,7 +97,9 @@
 nvkm_instobj_dtor(struct nvkm_memory *memory)
 {
 	struct nvkm_instobj *iobj = nvkm_instobj(memory);
+	spin_lock(&iobj->imem->lock);
 	list_del(&iobj->head);
+	spin_unlock(&iobj->imem->lock);
 	nvkm_memory_del(&iobj->parent);
 	return iobj;
 }
@@ -190,7 +192,9 @@
 		nvkm_memory_ctor(&nvkm_instobj_func_slow, &iobj->memory);
 		iobj->parent = memory;
 		iobj->imem = imem;
+		spin_lock(&iobj->imem->lock);
 		list_add_tail(&iobj->head, &imem->list);
+		spin_unlock(&iobj->imem->lock);
 		memory = &iobj->memory;
 	}
 
@@ -309,5 +313,6 @@
 {
 	nvkm_subdev_ctor(&nvkm_instmem, device, index, 0, &imem->subdev);
 	imem->func = func;
+	spin_lock_init(&imem->lock);
 	INIT_LIST_HEAD(&imem->list);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
index b61509e..b735173 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
@@ -59,7 +59,7 @@
 	duty = (uv - bios->base) * div / bios->pwm_range;
 
 	nvkm_wr32(device, 0x20340, div);
-	nvkm_wr32(device, 0x20344, 0x8000000 | duty);
+	nvkm_wr32(device, 0x20344, 0x80000000 | duty);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index ad09590..2ed0754 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -524,7 +524,7 @@
 	omap_crtc->mgr = omap_dss_get_overlay_manager(channel);
 
 	ret = drm_crtc_init_with_planes(dev, crtc, plane, NULL,
-					&omap_crtc_funcs);
+					&omap_crtc_funcs, NULL);
 	if (ret < 0) {
 		kfree(omap_crtc);
 		return NULL;
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 5c367aa..130fca7 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -172,9 +172,9 @@
 uint32_t omap_framebuffer_get_formats(uint32_t *pixel_formats,
 		uint32_t max_formats, enum omap_color_mode supported_modes);
 struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev,
-		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd);
+		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd);
 struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
-		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
+		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
 struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p);
 int omap_framebuffer_pin(struct drm_framebuffer *fb);
 void omap_framebuffer_unpin(struct drm_framebuffer *fb);
@@ -248,7 +248,7 @@
 
 static inline int objects_lookup(struct drm_device *dev,
 		struct drm_file *filp, uint32_t pixel_format,
-		struct drm_gem_object **bos, uint32_t *handles)
+		struct drm_gem_object **bos, const uint32_t *handles)
 {
 	int i, n = drm_format_num_planes(pixel_format);
 
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 7d9b32a..0c104ad 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -178,7 +178,7 @@
 	encoder = &omap_encoder->base;
 
 	drm_encoder_init(dev, encoder, &omap_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &omap_encoder_helper_funcs);
 
 	return encoder;
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 636a1f9..ad202df 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -364,7 +364,7 @@
 #endif
 
 struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev,
-		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd)
+		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *bos[4];
 	struct drm_framebuffer *fb;
@@ -386,7 +386,7 @@
 }
 
 struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
-		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
+		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
 {
 	struct omap_framebuffer *omap_fb = NULL;
 	struct drm_framebuffer *fb = NULL;
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 3054bda..d5ecabd 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -366,7 +366,7 @@
 
 	ret = drm_universal_plane_init(dev, plane, (1 << priv->num_crtcs) - 1,
 				       &omap_plane_funcs, omap_plane->formats,
-				       omap_plane->nformats, type);
+				       omap_plane->nformats, type, NULL);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 183aea1..8627651 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -521,7 +521,7 @@
 int
 qxl_framebuffer_init(struct drm_device *dev,
 		     struct qxl_framebuffer *qfb,
-		     struct drm_mode_fb_cmd2 *mode_cmd,
+		     const struct drm_mode_fb_cmd2 *mode_cmd,
 		     struct drm_gem_object *obj)
 {
 	int ret;
@@ -876,16 +876,6 @@
 	.best_encoder = qxl_best_encoder,
 };
 
-static void qxl_conn_save(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
-static void qxl_conn_restore(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
 static enum drm_connector_status qxl_conn_detect(
 			struct drm_connector *connector,
 			bool force)
@@ -932,10 +922,8 @@
 
 static const struct drm_connector_funcs qxl_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = qxl_conn_save,
-	.restore = qxl_conn_restore,
 	.detect = qxl_conn_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes_nomerge,
+	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = qxl_conn_set_property,
 	.destroy = qxl_conn_destroy,
 };
@@ -980,7 +968,7 @@
 			   &qxl_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL);
 
 	drm_encoder_init(dev, &qxl_output->enc, &qxl_enc_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 
 	/* we get HPD via client monitors config */
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
@@ -1003,7 +991,7 @@
 static struct drm_framebuffer *
 qxl_user_framebuffer_create(struct drm_device *dev,
 			    struct drm_file *file_priv,
-			    struct drm_mode_fb_cmd2 *mode_cmd)
+			    const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct qxl_framebuffer *qxl_fb;
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 01a8694..6e6b9b1 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -390,7 +390,7 @@
 int
 qxl_framebuffer_init(struct drm_device *dev,
 		     struct qxl_framebuffer *rfb,
-		     struct drm_mode_fb_cmd2 *mode_cmd,
+		     const struct drm_mode_fb_cmd2 *mode_cmd,
 		     struct drm_gem_object *obj);
 void qxl_display_read_client_monitors_config(struct qxl_device *qdev);
 void qxl_send_monitors_config(struct qxl_device *qdev);
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index c4a5526..7136e52 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -40,7 +40,6 @@
 struct qxl_fbdev {
 	struct drm_fb_helper helper;
 	struct qxl_framebuffer	qfb;
-	struct list_head	fbdev_list;
 	struct qxl_device	*qdev;
 
 	spinlock_t delayed_ops_lock;
@@ -283,7 +282,7 @@
 }
 
 static int qxlfb_create_pinned_object(struct qxl_fbdev *qfbdev,
-				      struct drm_mode_fb_cmd2 *mode_cmd,
+				      const struct drm_mode_fb_cmd2 *mode_cmd,
 				      struct drm_gem_object **gobj_p)
 {
 	struct qxl_device *qdev = qfbdev->qdev;
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index dac78ad..801dd60 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -25,6 +25,7 @@
  */
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index bb29214..01b20e1 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -2767,23 +2767,27 @@
 	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			radeon_encoder->rmx_type = RMX_FULL;
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
 		} else {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
-		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
 		radeon_encoder->enc_priv = radeon_atombios_set_dac_info(radeon_encoder);
 		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
-		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+				 DRM_MODE_ENCODER_TVDAC, NULL);
 		radeon_encoder->enc_priv = radeon_atombios_set_dac_info(radeon_encoder);
 		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
 		break;
@@ -2797,13 +2801,16 @@
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			radeon_encoder->rmx_type = RMX_FULL;
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
 		} else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		} else {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
@@ -2820,11 +2827,14 @@
 		/* these are handled by the primary encoders */
 		radeon_encoder->is_ext_encoder = true;
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_atom_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 248953d..0154db4 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -8472,7 +8472,7 @@
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_reset) {
 		rdev->needs_reset = true;
 		wake_up_all(&rdev->fence_queue);
@@ -9630,6 +9630,9 @@
 		    (rdev->disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+
+		/* Save number of lines the linebuffer leads before the scanout */
+		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 7f33767..2ad4628 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2372,6 +2372,9 @@
 		c.full = dfixed_div(c, a);
 		priority_b_mark = dfixed_trunc(c);
 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+		/* Save number of lines the linebuffer leads before the scanout */
+		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -5344,7 +5347,7 @@
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 238b13f..9e7e2bf 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -806,7 +806,7 @@
 		status = r100_irq_ack(rdev);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (rdev->msi_enabled) {
 		switch (rdev->family) {
 		case CHIP_RS400:
@@ -3217,6 +3217,9 @@
 	uint32_t pixel_bytes1 = 0;
 	uint32_t pixel_bytes2 = 0;
 
+	/* Guess line buffer size to be 8192 pixels */
+	u32 lb_size = 8192;
+
 	if (!rdev->mode_info.mode_config_initialized)
 		return;
 
@@ -3631,6 +3634,13 @@
 		DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
 			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
 	}
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	if (mode1)
+	    rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
+
+	if (mode2)
+	    rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
 }
 
 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4ea5b10..cc2fdf0 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -4276,7 +4276,7 @@
 		WREG32(IH_RB_RPTR, rptr);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b6cbd81..87db649 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2414,7 +2414,7 @@
 	struct r600_ih ih; /* r6/700 interrupt ring */
 	struct radeon_rlc rlc;
 	struct radeon_mec mec;
-	struct work_struct hotplug_work;
+	struct delayed_work hotplug_work;
 	struct work_struct dp_work;
 	struct work_struct audio_work;
 	int num_crtc; /* number of crtcs */
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index fe994aa..c77d349 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -54,6 +54,9 @@
 	/* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */
 	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4e50,
 		PCI_VENDOR_ID_IBM, 0x0550, 1},
+	/* Intel 82855PM host bridge / RV250/M9 GL [Mobility FireGL 9000/Radeon 9000] needs AGPMode 1 (Thinkpad T40p) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c66,
+		PCI_VENDOR_ID_IBM, 0x054d, 1},
 	/* Intel 82855PM host bridge / Mobility M7 needs AGPMode 1 */
 	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c57,
 		PCI_VENDOR_ID_IBM, 0x0530, 1},
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5a2cafb..340f3f5 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1234,13 +1234,32 @@
 	if (r < 0)
 		return connector_status_disconnected;
 
+	if (radeon_connector->detected_hpd_without_ddc) {
+		force = true;
+		radeon_connector->detected_hpd_without_ddc = false;
+	}
+
 	if (!force && radeon_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
 		goto exit;
 	}
 
-	if (radeon_connector->ddc_bus)
+	if (radeon_connector->ddc_bus) {
 		dret = radeon_ddc_probe(radeon_connector, false);
+
+		/* Sometimes the pins required for the DDC probe on DVI
+		 * connectors don't make contact at the same time that the ones
+		 * for HPD do. If the DDC probe fails even though we had an HPD
+		 * signal, try again later */
+		if (!dret && !force &&
+		    connector->status != connector_status_connected) {
+			DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+			radeon_connector->detected_hpd_without_ddc = true;
+			schedule_delayed_work(&rdev->hotplug_work,
+					      msecs_to_jiffies(1000));
+			goto exit;
+		}
+	}
 	if (dret) {
 		radeon_connector->detected_by_load = false;
 		radeon_connector_free_edid(connector);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index a8d9927..b3bb923 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -322,7 +322,9 @@
 	 * to complete in this vblank?
 	 */
 	if (update_pending &&
-	    (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev, crtc_id, 0,
+	    (DRM_SCANOUTPOS_VALID & radeon_get_crtc_scanoutpos(rdev->ddev,
+							       crtc_id,
+							       USE_REAL_VBLANKSTART,
 							       &vpos, &hpos, NULL, NULL,
 							       &rdev->mode_info.crtcs[crtc_id]->base.hwmode)) &&
 	    ((vpos >= (99 * rdev->mode_info.crtcs[crtc_id]->base.hwmode.crtc_vdisplay)/100) ||
@@ -401,6 +403,8 @@
 	struct drm_crtc *crtc = &radeon_crtc->base;
 	unsigned long flags;
 	int r;
+	int vpos, hpos, stat, min_udelay;
+	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
         down_read(&rdev->exclusive_lock);
 	if (work->fence) {
@@ -437,6 +441,41 @@
 	/* set the proper interrupt */
 	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
 
+	/* If this happens to execute within the "virtually extended" vblank
+	 * interval before the start of the real vblank interval then it needs
+	 * to delay programming the mmio flip until the real vblank is entered.
+	 * This prevents completing a flip too early due to the way we fudge
+	 * our vblank counter and vblank timestamps in order to work around the
+	 * problem that the hw fires vblank interrupts before actual start of
+	 * vblank (when line buffer refilling is done for a frame). It
+	 * complements the fudging logic in radeon_get_crtc_scanoutpos() for
+	 * timestamping and radeon_get_vblank_counter_kms() for vblank counts.
+	 *
+	 * In practice this won't execute very often unless on very fast
+	 * machines because the time window for this to happen is very small.
+	 */
+	for (;;) {
+		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+		 * start in hpos, and to the "fudged earlier" vblank start in
+		 * vpos.
+		 */
+		stat = radeon_get_crtc_scanoutpos(rdev->ddev, work->crtc_id,
+						  GET_DISTANCE_TO_VBLANKSTART,
+						  &vpos, &hpos, NULL, NULL,
+						  &crtc->hwmode);
+
+		if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+		    !(vpos >= 0 && hpos <= 0))
+			break;
+
+		/* Sleep at least until estimated real start of hw vblank */
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		usleep_range(min_udelay, 2 * min_udelay);
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+	};
+
 	/* do the flip (mmio) */
 	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
 
@@ -1292,7 +1331,7 @@
 int
 radeon_framebuffer_init(struct drm_device *dev,
 			struct radeon_framebuffer *rfb,
-			struct drm_mode_fb_cmd2 *mode_cmd,
+			const struct drm_mode_fb_cmd2 *mode_cmd,
 			struct drm_gem_object *obj)
 {
 	int ret;
@@ -1309,7 +1348,7 @@
 static struct drm_framebuffer *
 radeon_user_framebuffer_create(struct drm_device *dev,
 			       struct drm_file *file_priv,
-			       struct drm_mode_fb_cmd2 *mode_cmd)
+			       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct radeon_framebuffer *radeon_fb;
@@ -1768,6 +1807,15 @@
  * \param dev Device to query.
  * \param crtc Crtc to query.
  * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ *              For driver internal use only also supports these flags:
+ *
+ *              USE_REAL_VBLANKSTART to use the real start of vblank instead
+ *              of a fudged earlier start of vblank.
+ *
+ *              GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ *              fudged earlier start of vblank in *vpos and the distance
+ *              to true start of vblank in *hpos.
+ *
  * \param *vpos Location where vertical scanout position should be stored.
  * \param *hpos Location where horizontal scanout position should go.
  * \param *stime Target location for timestamp taken immediately before
@@ -1911,10 +1959,40 @@
 		vbl_end = 0;
 	}
 
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+	    /* Caller wants distance from real vbl_start in *hpos */
+	    *hpos = *vpos - vbl_start;
+	}
+
+	/* Fudge vblank to start a few scanlines earlier to handle the
+	 * problem that vblank irqs fire a few scanlines before start
+	 * of vblank. Some driver internal callers need the true vblank
+	 * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+	 *
+	 * The cause of the "early" vblank irq is that the irq is triggered
+	 * by the line buffer logic when the line buffer read position enters
+	 * the vblank, whereas our crtc scanout position naturally lags the
+	 * line buffer read position.
+	 */
+	if (!(flags & USE_REAL_VBLANKSTART))
+		vbl_start -= rdev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
 	/* Test scanout position against vblank region. */
 	if ((*vpos < vbl_start) && (*vpos >= vbl_end))
 		in_vbl = false;
 
+	/* In vblank? */
+	if (in_vbl)
+	    ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+		/* Caller wants distance from fudged earlier vbl_start */
+		*vpos -= vbl_start;
+		return ret;
+	}
+
 	/* Check if inside vblank area and apply corrective offsets:
 	 * vpos will then be >=0 in video scanout area, but negative
 	 * within vblank area, counting down the number of lines until
@@ -1930,31 +2008,5 @@
 	/* Correct for shifted end of vbl at vbl_end. */
 	*vpos = *vpos - vbl_end;
 
-	/* In vblank? */
-	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
-	/* Is vpos outside nominal vblank area, but less than
-	 * 1/100 of a frame height away from start of vblank?
-	 * If so, assume this isn't a massively delayed vblank
-	 * interrupt, but a vblank interrupt that fired a few
-	 * microseconds before true start of vblank. Compensate
-	 * by adding a full frame duration to the final timestamp.
-	 * Happens, e.g., on ATI R500, R600.
-	 *
-	 * We only do this if DRM_CALLED_FROM_VBLIRQ.
-	 */
-	if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
-		vbl_start = mode->crtc_vdisplay;
-		vtotal = mode->crtc_vtotal;
-
-		if (vbl_start - *vpos < vtotal / 100) {
-			*vpos -= vtotal;
-
-			/* Signal this correction as "applied". */
-			ret |= 0x8;
-		}
-	}
-
 	return ret;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index 744f5c4..94323f5 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -641,7 +641,7 @@
 	}
 
 	drm_encoder_init(dev, &radeon_encoder->base, &radeon_dp_mst_enc_funcs,
-			 DRM_MODE_ENCODER_DPMST);
+			 DRM_MODE_ENCODER_DPMST, NULL);
 	drm_encoder_helper_add(encoder, &radeon_mst_helper_funcs);
 
 	mst_enc = radeon_encoder->enc_priv;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 26da2f4..adc44bb 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -44,7 +44,6 @@
 struct radeon_fbdev {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer rfb;
-	struct list_head fbdev_list;
 	struct radeon_device *rdev;
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 171d3e4..979f3bf 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -74,7 +74,7 @@
 static void radeon_hotplug_work_func(struct work_struct *work)
 {
 	struct radeon_device *rdev = container_of(work, struct radeon_device,
-						  hotplug_work);
+						  hotplug_work.work);
 	struct drm_device *dev = rdev->ddev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
@@ -302,7 +302,7 @@
 		}
 	}
 
-	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
+	INIT_DELAYED_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->dp_work, radeon_dp_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
 
@@ -310,7 +310,7 @@
 	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
 	if (r) {
 		rdev->irq.installed = false;
-		flush_work(&rdev->hotplug_work);
+		flush_delayed_work(&rdev->hotplug_work);
 		return r;
 	}
 
@@ -333,7 +333,7 @@
 		rdev->irq.installed = false;
 		if (rdev->msi_enabled)
 			pci_disable_msi(rdev->pdev);
-		flush_work(&rdev->hotplug_work);
+		flush_delayed_work(&rdev->hotplug_work);
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 0ec6fcc..d290a8a 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -755,6 +755,8 @@
  */
 u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
 {
+	int vpos, hpos, stat;
+	u32 count;
 	struct radeon_device *rdev = dev->dev_private;
 
 	if (crtc < 0 || crtc >= rdev->num_crtc) {
@@ -762,7 +764,53 @@
 		return -EINVAL;
 	}
 
-	return radeon_get_vblank_counter(rdev, crtc);
+	/* The hw increments its frame counter at start of vsync, not at start
+	 * of vblank, as is required by DRM core vblank counter handling.
+	 * Cook the hw count here to make it appear to the caller as if it
+	 * incremented at start of vblank. We measure distance to start of
+	 * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+	 * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+	 * result by 1 to give the proper appearance to caller.
+	 */
+	if (rdev->mode_info.crtcs[crtc]) {
+		/* Repeat readout if needed to provide stable result if
+		 * we cross start of vsync during the queries.
+		 */
+		do {
+			count = radeon_get_vblank_counter(rdev, crtc);
+			/* Ask radeon_get_crtc_scanoutpos to return vpos as
+			 * distance to start of vblank, instead of regular
+			 * vertical scanout pos.
+			 */
+			stat = radeon_get_crtc_scanoutpos(
+				dev, crtc, GET_DISTANCE_TO_VBLANKSTART,
+				&vpos, &hpos, NULL, NULL,
+				&rdev->mode_info.crtcs[crtc]->base.hwmode);
+		} while (count != radeon_get_vblank_counter(rdev, crtc));
+
+		if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+			DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+		}
+		else {
+			DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+				      crtc, vpos);
+
+			/* Bump counter if we are at >= leading edge of vblank,
+			 * but before vsync where vpos would turn negative and
+			 * the hw counter really increments.
+			 */
+			if (vpos >= 0)
+				count++;
+		}
+	}
+	else {
+	    /* Fallback to use value as is. */
+	    count = radeon_get_vblank_counter(rdev, crtc);
+	    DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+	}
+
+	return count;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 678b438..32b338f 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -25,6 +25,7 @@
  */
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 30de433..88dc973 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -1772,7 +1772,8 @@
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
 		encoder->possible_crtcs = 0x1;
-		drm_encoder_init(dev, encoder, &radeon_legacy_lvds_enc_funcs, DRM_MODE_ENCODER_LVDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_lvds_enc_funcs,
+				 DRM_MODE_ENCODER_LVDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_lvds_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
@@ -1781,12 +1782,14 @@
 		radeon_encoder->rmx_type = RMX_FULL;
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_int_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_int_enc_funcs,
+				 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_int_helper_funcs);
 		radeon_encoder->enc_priv = radeon_legacy_get_tmds_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_primary_dac_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_init(dev, encoder, &radeon_legacy_primary_dac_enc_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_primary_dac_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_primary_dac_info(radeon_encoder);
@@ -1794,7 +1797,8 @@
 			radeon_encoder->enc_priv = radeon_combios_get_primary_dac_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tv_dac_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tv_dac_enc_funcs,
+				 DRM_MODE_ENCODER_TVDAC, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tv_dac_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_tv_dac_info(radeon_encoder);
@@ -1802,7 +1806,8 @@
 			radeon_encoder->enc_priv = radeon_combios_get_tv_dac_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_ext_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_ext_enc_funcs,
+				 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_ext_helper_funcs);
 		if (!rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_legacy_get_ext_tmds_info(radeon_encoder);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 830e171..cddd41b 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -367,6 +367,7 @@
 	u32 line_time;
 	u32 wm_low;
 	u32 wm_high;
+	u32 lb_vblank_lead_lines;
 	struct drm_display_mode hw_mode;
 	enum radeon_output_csc output_csc;
 };
@@ -553,6 +554,7 @@
 	void *con_priv;
 	bool dac_load_detect;
 	bool detected_by_load; /* if the connection status was determined by load */
+	bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
 	uint16_t connector_object_id;
 	struct radeon_hpd hpd;
 	struct radeon_router router;
@@ -686,6 +688,9 @@
 	struct atom_voltage_table_entry entries[MAX_VOLTAGE_ENTRIES];
 };
 
+/* Driver internal use only flags of radeon_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART 		(1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
 
 extern void
 radeon_add_atom_connector(struct drm_device *dev,
@@ -929,7 +934,7 @@
 				     u16 *blue, int regno);
 int radeon_framebuffer_init(struct drm_device *dev,
 			     struct radeon_framebuffer *rfb,
-			     struct drm_mode_fb_cmd2 *mode_cmd,
+			     const struct drm_mode_fb_cmd2 *mode_cmd,
 			     struct drm_gem_object *obj);
 
 int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d302488..84d4563 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -221,11 +221,17 @@
 	if (!(rdev->flags & RADEON_IS_PCIE))
 		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
 
+	/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
+	 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
+	 */
+	if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
+		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
+
 #ifdef CONFIG_X86_32
 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
 	 */
-	bo->flags &= ~RADEON_GEM_GTT_WC;
+	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
 #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
 	/* Don't try to enable write-combining when it can't work, or things
 	 * may be slow
@@ -235,9 +241,10 @@
 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
 	 thanks to write-combining
 
-	DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
-		      "better performance thanks to write-combining\n");
-	bo->flags &= ~RADEON_GEM_GTT_WC;
+	if (bo->flags & RADEON_GEM_GTT_WC)
+		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
+			      "better performance thanks to write-combining\n");
+	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
 #endif
 
 	radeon_ttm_placement_from_domain(bo, domain);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 6d80dde..59abebd 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1542,8 +1542,7 @@
 				ret = device_create_file(rdev->dev, &dev_attr_power_method);
 				if (ret)
 					DRM_ERROR("failed to create device file for power method\n");
-				if (!ret)
-					rdev->pm.sysfs_initialized = true;
+				rdev->pm.sysfs_initialized = true;
 			}
 
 			mutex_lock(&rdev->pm.mutex);
@@ -1757,7 +1756,9 @@
 	 */
 	for (crtc = 0; (crtc < rdev->num_crtc) && in_vbl; crtc++) {
 		if (rdev->pm.active_crtcs & (1 << crtc)) {
-			vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, crtc, 0,
+			vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev,
+								crtc,
+								USE_REAL_VBLANKSTART,
 								&vpos, &hpos, NULL, NULL,
 								&rdev->mode_info.crtcs[crtc]->base.hwmode);
 			if ((vbl_status & DRM_SCANOUTPOS_VALID) &&
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 97a9048..6244f4e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -813,7 +813,7 @@
 		status = rs600_irq_ack(rdev);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (rdev->msi_enabled) {
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 516ca27..6bc44c2 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -207,6 +207,9 @@
 {
 	u32 tmp;
 
+	/* Guess line buffer size to be 8192 pixels */
+	u32 lb_size = 8192;
+
 	/*
 	 * Line Buffer Setup
 	 * There is a single line buffer shared by both display controllers.
@@ -243,6 +246,13 @@
 		tmp |= V_006520_DC_LB_MEMORY_SPLIT_D1_1Q_D2_3Q;
 	}
 	WREG32(R_006520_DC_LB_MEMORY_SPLIT, tmp);
+
+	/* Save number of lines the linebuffer leads before the scanout */
+	if (mode1)
+		rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
+
+	if (mode2)
+		rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
 }
 
 struct rs690_watermark {
diff --git a/drivers/gpu/drm/radeon/rv730_dpm.c b/drivers/gpu/drm/radeon/rv730_dpm.c
index 3f5e1cf..d37ba2c 100644
--- a/drivers/gpu/drm/radeon/rv730_dpm.c
+++ b/drivers/gpu/drm/radeon/rv730_dpm.c
@@ -464,7 +464,7 @@
 	result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
 
 	if (result != PPSMC_Result_OK)
-		DRM_ERROR("Could not force DPM to low\n");
+		DRM_DEBUG("Could not force DPM to low\n");
 
 	WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
 
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index b9c7707..e830c89 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -193,7 +193,7 @@
 	result = rv770_send_msg_to_smc(rdev, PPSMC_MSG_TwoLevelsDisabled);
 
 	if (result != PPSMC_Result_OK)
-		DRM_ERROR("Could not force DPM to low.\n");
+		DRM_DEBUG("Could not force DPM to low.\n");
 
 	WREG32_P(GENERAL_PWRMGT, 0, ~GLOBAL_PWRMGT_EN);
 
@@ -1418,7 +1418,7 @@
 int rv770_set_sw_state(struct radeon_device *rdev)
 {
 	if (rv770_send_msg_to_smc(rdev, PPSMC_MSG_SwitchToSwState) != PPSMC_Result_OK)
-		return -EINVAL;
+		DRM_DEBUG("rv770_set_sw_state failed\n");
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 07037e3..f878d69 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2376,6 +2376,9 @@
 		c.full = dfixed_div(c, a);
 		priority_b_mark = dfixed_trunc(c);
 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
+
+		/* Save number of lines the linebuffer leads before the scanout */
+		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -6848,7 +6851,7 @@
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_thermal && rdev->pm.dpm_enabled)
 		schedule_work(&rdev->pm.dpm.thermal.work);
 	rdev->ih.rptr = rptr;
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index e72bf46..a82b891 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2927,7 +2927,7 @@
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
-	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 48cb199..88a4b70 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -613,7 +613,7 @@
 
 	ret = drm_crtc_init_with_planes(rcdu->ddev, crtc,
 					&rgrp->planes[index % 2].plane,
-					NULL, &crtc_funcs);
+					NULL, &crtc_funcs, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index d0ae1e8..c087007 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -173,7 +173,7 @@
 			goto done;
 	} else {
 		ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
-				       encoder_type);
+				       encoder_type, NULL);
 		if (ret < 0)
 			goto done;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 81da841..11267de 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -151,7 +151,7 @@
 		goto error;
 
 	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS);
+			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index ca12e8c..43bce69 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -136,7 +136,7 @@
 
 static struct drm_framebuffer *
 rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv,
-		  struct drm_mode_fb_cmd2 *mode_cmd)
+		  const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct rcar_du_device *rcdu = dev->dev_private;
 	const struct rcar_du_format_info *format;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index ffa5837..c3ed952 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -410,7 +410,8 @@
 
 		ret = drm_universal_plane_init(rcdu->ddev, &plane->plane, crtcs,
 					       &rcar_du_plane_funcs, formats,
-					       ARRAY_SIZE(formats), type);
+					       ARRAY_SIZE(formats), type,
+					       NULL);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 80d6fc8..525b5a8 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -295,7 +295,7 @@
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
 	drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
 }
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 002645b..b8ac591 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -72,7 +72,7 @@
 };
 
 static struct rockchip_drm_fb *
-rockchip_fb_alloc(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd,
+rockchip_fb_alloc(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd,
 		  struct drm_gem_object **obj, unsigned int num_planes)
 {
 	struct rockchip_drm_fb *rockchip_fb;
@@ -102,7 +102,7 @@
 
 static struct drm_framebuffer *
 rockchip_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
-			struct drm_mode_fb_cmd2 *mode_cmd)
+			const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct rockchip_drm_fb *rockchip_fb;
 	struct drm_gem_object *objs[ROCKCHIP_MAX_FB_BUFFER];
@@ -173,7 +173,7 @@
 
 struct drm_framebuffer *
 rockchip_drm_framebuffer_init(struct drm_device *dev,
-			      struct drm_mode_fb_cmd2 *mode_cmd,
+			      const struct drm_mode_fb_cmd2 *mode_cmd,
 			      struct drm_gem_object *obj)
 {
 	struct rockchip_drm_fb *rockchip_fb;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.h b/drivers/gpu/drm/rockchip/rockchip_drm_fb.h
index 09574d4..2fe47f1 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.h
@@ -17,7 +17,7 @@
 
 struct drm_framebuffer *
 rockchip_drm_framebuffer_init(struct drm_device *dev,
-			      struct drm_mode_fb_cmd2 *mode_cmd,
+			      const struct drm_mode_fb_cmd2 *mode_cmd,
 			      struct drm_gem_object *obj);
 void rockchip_drm_framebuffer_fini(struct drm_framebuffer *fb);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 8caea0a..d908321 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -67,6 +67,7 @@
 	 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
 
 	ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
 			     obj->size, &rk_obj->dma_attrs);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 5d8ae5e..dd8e086 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -374,6 +374,7 @@
 	  .data = &rk3288_vop },
 	{},
 };
+MODULE_DEVICE_TABLE(of, vop_driver_dt_match);
 
 static inline void vop_writel(struct vop *vop, uint32_t offset, uint32_t v)
 {
@@ -959,8 +960,8 @@
 	val = (dest.y2 - dest.y1 - 1) << 16;
 	val |= (dest.x2 - dest.x1 - 1) & 0xffff;
 	VOP_WIN_SET(vop, win, dsp_info, val);
-	val = (dsp_sty - 1) << 16;
-	val |= (dsp_stx - 1) & 0xffff;
+	val = dsp_sty << 16;
+	val |= dsp_stx & 0xffff;
 	VOP_WIN_SET(vop, win, dsp_st, val);
 	VOP_WIN_SET(vop, win, rb_swap, rb_swap);
 
@@ -1289,7 +1290,7 @@
 
 	if (state->event) {
 		spin_lock_irqsave(&drm->event_lock, flags);
-		drm_send_vblank_event(drm, -1, state->event);
+		drm_crtc_send_vblank_event(crtc, state->event);
 		spin_unlock_irqrestore(&drm->event_lock, flags);
 	}
 
@@ -1477,7 +1478,7 @@
 					       0, &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       win_data->type);
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_ERROR("failed to initialize plane\n");
 			goto err_cleanup_planes;
@@ -1491,7 +1492,7 @@
 	}
 
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
-					&vop_crtc_funcs);
+					&vop_crtc_funcs, NULL);
 	if (ret)
 		return ret;
 
@@ -1514,7 +1515,7 @@
 					       &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       win_data->type);
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_ERROR("failed to initialize overlay plane\n");
 			goto err_cleanup_crtc;
@@ -1575,32 +1576,25 @@
 		return PTR_ERR(vop->dclk);
 	}
 
-	ret = clk_prepare(vop->hclk);
-	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare hclk\n");
-		return ret;
-	}
-
 	ret = clk_prepare(vop->dclk);
 	if (ret < 0) {
 		dev_err(vop->dev, "failed to prepare dclk\n");
-		goto err_unprepare_hclk;
+		return ret;
 	}
 
-	ret = clk_prepare(vop->aclk);
+	/* Enable both the hclk and aclk to setup the vop */
+	ret = clk_prepare_enable(vop->hclk);
 	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare aclk\n");
+		dev_err(vop->dev, "failed to prepare/enable hclk\n");
 		goto err_unprepare_dclk;
 	}
 
-	/*
-	 * enable hclk, so that we can config vop register.
-	 */
-	ret = clk_enable(vop->hclk);
+	ret = clk_prepare_enable(vop->aclk);
 	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare aclk\n");
-		goto err_unprepare_aclk;
+		dev_err(vop->dev, "failed to prepare/enable aclk\n");
+		goto err_disable_hclk;
 	}
+
 	/*
 	 * do hclk_reset, reset all vop registers.
 	 */
@@ -1608,7 +1602,7 @@
 	if (IS_ERR(ahb_rst)) {
 		dev_err(vop->dev, "failed to get ahb reset\n");
 		ret = PTR_ERR(ahb_rst);
-		goto err_disable_hclk;
+		goto err_disable_aclk;
 	}
 	reset_control_assert(ahb_rst);
 	usleep_range(10, 20);
@@ -1634,26 +1628,25 @@
 	if (IS_ERR(vop->dclk_rst)) {
 		dev_err(vop->dev, "failed to get dclk reset\n");
 		ret = PTR_ERR(vop->dclk_rst);
-		goto err_unprepare_aclk;
+		goto err_disable_aclk;
 	}
 	reset_control_assert(vop->dclk_rst);
 	usleep_range(10, 20);
 	reset_control_deassert(vop->dclk_rst);
 
 	clk_disable(vop->hclk);
+	clk_disable(vop->aclk);
 
 	vop->is_enabled = false;
 
 	return 0;
 
+err_disable_aclk:
+	clk_disable_unprepare(vop->aclk);
 err_disable_hclk:
-	clk_disable(vop->hclk);
-err_unprepare_aclk:
-	clk_unprepare(vop->aclk);
+	clk_disable_unprepare(vop->hclk);
 err_unprepare_dclk:
 	clk_unprepare(vop->dclk);
-err_unprepare_hclk:
-	clk_unprepare(vop->hclk);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index e9272b0..b80802f 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -613,7 +613,7 @@
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(sdev->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
index aaf98ac..388a0fc 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
@@ -104,7 +104,7 @@
 
 static struct drm_framebuffer *
 shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv,
-		    struct drm_mode_fb_cmd2 *mode_cmd)
+		    const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	const struct shmob_drm_format_info *format;
 
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 3ae09dc..de11c7c 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -367,7 +367,7 @@
 	int res;
 
 	res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
-					&sti_crtc_funcs);
+					&sti_crtc_funcs, NULL);
 	if (res) {
 		DRM_ERROR("Can't initialze CRTC\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index dd10321..8078631 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -272,7 +272,7 @@
 				       &sti_plane_helpers_funcs,
 				       cursor_supported_formats,
 				       ARRAY_SIZE(cursor_supported_formats),
-				       DRM_PLANE_TYPE_CURSOR);
+				       DRM_PLANE_TYPE_CURSOR, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		goto err_plane;
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index c85dc7d..f9a1d92 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -630,7 +630,7 @@
 				       &sti_plane_helpers_funcs,
 				       gdp_supported_formats,
 				       ARRAY_SIZE(gdp_supported_formats),
-				       type);
+				       type, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		goto err;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index d735dac..49cce83 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -543,8 +543,6 @@
 		count++;
 	}
 
-	drm_mode_sort(&connector->modes);
-
 	return count;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index ea0690b..43861b5 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -973,7 +973,7 @@
 				       &sti_plane_helpers_funcs,
 				       hqvdp_supported_formats,
 				       ARRAY_SIZE(hqvdp_supported_formats),
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		return NULL;
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index c8a4c5d..f2afcf5 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -512,7 +512,8 @@
 	drm_encoder->possible_clones = 1 << 0;
 
 	drm_encoder_init(dev, drm_encoder,
-			 &sti_tvout_encoder_funcs, DRM_MODE_ENCODER_LVDS);
+			 &sti_tvout_encoder_funcs, DRM_MODE_ENCODER_LVDS,
+			 NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_dvo_encoder_helper_funcs);
 
@@ -564,7 +565,7 @@
 	drm_encoder->possible_clones = 1 << 0;
 
 	drm_encoder_init(dev, drm_encoder,
-			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_DAC);
+			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_hda_encoder_helper_funcs);
 
@@ -613,7 +614,7 @@
 	drm_encoder->possible_clones = 1 << 1;
 
 	drm_encoder_init(dev, drm_encoder,
-			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_TMDS);
+			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 74d9d62..63ebb15 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -16,18 +16,6 @@
 
 if DRM_TEGRA
 
-config DRM_TEGRA_FBDEV
-	bool "Enable legacy fbdev support"
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	default y
-	help
-	  Choose this option if you have a need for the legacy fbdev support.
-	  Note that this support also provides the Linux console on top of
-	  the Tegra modesetting driver.
-
 config DRM_TEGRA_DEBUG
 	bool "NVIDIA Tegra DRM debug support"
 	help
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index e9f24a8..1f5cb68 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -660,7 +660,8 @@
 
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_primary_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_PRIMARY);
+				       num_formats, DRM_PLANE_TYPE_PRIMARY,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -827,7 +828,8 @@
 
 	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
 				       &tegra_cursor_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_CURSOR);
+				       num_formats, DRM_PLANE_TYPE_CURSOR,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -890,7 +892,8 @@
 
 	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
 				       &tegra_overlay_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_OVERLAY);
+				       num_formats, DRM_PLANE_TYPE_OVERLAY,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -1732,7 +1735,7 @@
 	}
 
 	err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor,
-					&tegra_crtc_funcs);
+					&tegra_crtc_funcs, NULL);
 	if (err < 0)
 		goto cleanup;
 
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 159ef51..e0f8277 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -106,7 +106,7 @@
 
 static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
 	.fb_create = tegra_fb_create,
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	.output_poll_changed = tegra_fb_output_poll_changed,
 #endif
 	.atomic_check = drm_atomic_helper_check,
@@ -260,7 +260,7 @@
 
 static void tegra_drm_lastclose(struct drm_device *drm)
 {
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_drm *tegra = drm->dev_private;
 
 	tegra_fbdev_restore_mode(tegra->fbdev);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index ec49275..d88a2d1 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -30,7 +30,7 @@
 	unsigned int num_planes;
 };
 
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 struct tegra_fbdev {
 	struct drm_fb_helper base;
 	struct tegra_fb *fb;
@@ -46,7 +46,7 @@
 	struct mutex clients_lock;
 	struct list_head clients;
 
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_fbdev *fbdev;
 #endif
 
@@ -268,12 +268,12 @@
 			struct tegra_bo_tiling *tiling);
 struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 					struct drm_file *file,
-					struct drm_mode_fb_cmd2 *cmd);
+					const struct drm_mode_fb_cmd2 *cmd);
 int tegra_drm_fb_prepare(struct drm_device *drm);
 void tegra_drm_fb_free(struct drm_device *drm);
 int tegra_drm_fb_init(struct drm_device *drm);
 void tegra_drm_fb_exit(struct drm_device *drm);
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 void tegra_fb_output_poll_changed(struct drm_device *drm);
 #endif
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index f0a138e..50d46ae 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -1023,7 +1023,7 @@
 
 		drm_encoder_init(drm, &dsi->output.encoder,
 				 &tegra_dsi_encoder_funcs,
-				 DRM_MODE_ENCODER_DSI);
+				 DRM_MODE_ENCODER_DSI, NULL);
 		drm_encoder_helper_add(&dsi->output.encoder,
 				       &tegra_dsi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 1004075..ede9e94 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -18,7 +18,7 @@
 	return container_of(fb, struct tegra_fb, base);
 }
 
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
 {
 	return container_of(helper, struct tegra_fbdev, base);
@@ -92,7 +92,7 @@
 };
 
 static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
-				       struct drm_mode_fb_cmd2 *mode_cmd,
+				       const struct drm_mode_fb_cmd2 *mode_cmd,
 				       struct tegra_bo **planes,
 				       unsigned int num_planes)
 {
@@ -131,7 +131,7 @@
 
 struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 					struct drm_file *file,
-					struct drm_mode_fb_cmd2 *cmd)
+					const struct drm_mode_fb_cmd2 *cmd)
 {
 	unsigned int hsub, vsub, i;
 	struct tegra_bo *planes[4];
@@ -181,7 +181,7 @@
 	return ERR_PTR(err);
 }
 
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 static struct fb_ops tegra_fb_ops = {
 	.owner = THIS_MODULE,
 	.fb_fillrect = drm_fb_helper_sys_fillrect,
@@ -370,7 +370,7 @@
 
 int tegra_drm_fb_prepare(struct drm_device *drm)
 {
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_drm *tegra = drm->dev_private;
 
 	tegra->fbdev = tegra_fbdev_create(drm);
@@ -383,7 +383,7 @@
 
 void tegra_drm_fb_free(struct drm_device *drm)
 {
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_drm *tegra = drm->dev_private;
 
 	tegra_fbdev_free(tegra->fbdev);
@@ -392,7 +392,7 @@
 
 int tegra_drm_fb_init(struct drm_device *drm)
 {
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
@@ -407,7 +407,7 @@
 
 void tegra_drm_fb_exit(struct drm_device *drm)
 {
-#ifdef CONFIG_DRM_TEGRA_FBDEV
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	struct tegra_drm *tegra = drm->dev_private;
 
 	tegra_fbdev_exit(tegra->fbdev);
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 52b32cb..b7ef492 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1320,7 +1320,7 @@
 	hdmi->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &hdmi->output.encoder, &tegra_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(&hdmi->output.encoder,
 			       &tegra_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index bc9735b..e246334 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -287,7 +287,7 @@
 	output->connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &output->encoder, &tegra_rgb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(&output->encoder,
 			       &tegra_rgb_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 3eff7cf..3e012ee 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2178,7 +2178,7 @@
 	sor->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &sor->output.encoder, &tegra_sor_encoder_funcs,
-			 encoder);
+			 encoder, NULL);
 	drm_encoder_helper_add(&sor->output.encoder, helpers);
 
 	drm_mode_connector_attach_encoder(&sor->output.connector,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 876cad5..4ddb21e 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -46,7 +46,7 @@
 static struct of_device_id tilcdc_of_match[];
 
 static struct drm_framebuffer *tilcdc_fb_create(struct drm_device *dev,
-		struct drm_file *file_priv, struct drm_mode_fb_cmd2 *mode_cmd)
+		struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	return drm_fb_cma_create(dev, file_priv, mode_cmd);
 }
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 0af8bed..4dda6e2 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -128,7 +128,7 @@
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(dev, encoder, &panel_encoder_funcs,
-			DRM_MODE_ENCODER_LVDS);
+			DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
index 354c47c..5052a8a 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
@@ -138,7 +138,7 @@
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(dev, encoder, &tfp410_encoder_funcs,
-			DRM_MODE_ENCODER_TMDS);
+			DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 80adbac..4a064ef 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -108,7 +108,7 @@
 struct drm_framebuffer *
 udl_fb_user_fb_create(struct drm_device *dev,
 		      struct drm_file *file,
-		      struct drm_mode_fb_cmd2 *mode_cmd);
+		      const struct drm_mode_fb_cmd2 *mode_cmd);
 
 int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr,
 		     const char *front, char **urb_buf_ptr,
diff --git a/drivers/gpu/drm/udl/udl_encoder.c b/drivers/gpu/drm/udl/udl_encoder.c
index 4052c46..a181a64 100644
--- a/drivers/gpu/drm/udl/udl_encoder.c
+++ b/drivers/gpu/drm/udl/udl_encoder.c
@@ -73,7 +73,8 @@
 	if (!encoder)
 		return NULL;
 
-	drm_encoder_init(dev, encoder, &udl_enc_funcs, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_init(dev, encoder, &udl_enc_funcs, DRM_MODE_ENCODER_TMDS,
+			 NULL);
 	drm_encoder_helper_add(encoder, &udl_helper_funcs);
 	encoder->possible_crtcs = 1;
 	return encoder;
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 62c7b1d..200419d 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -33,7 +33,6 @@
 struct udl_fbdev {
 	struct drm_fb_helper helper;
 	struct udl_framebuffer ufb;
-	struct list_head fbdev_list;
 	int fb_count;
 };
 
@@ -456,7 +455,7 @@
 static int
 udl_framebuffer_init(struct drm_device *dev,
 		     struct udl_framebuffer *ufb,
-		     struct drm_mode_fb_cmd2 *mode_cmd,
+		     const struct drm_mode_fb_cmd2 *mode_cmd,
 		     struct udl_gem_object *obj)
 {
 	int ret;
@@ -624,7 +623,7 @@
 struct drm_framebuffer *
 udl_fb_user_fb_create(struct drm_device *dev,
 		   struct drm_file *file,
-		   struct drm_mode_fb_cmd2 *mode_cmd)
+		   const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
 	struct udl_framebuffer *ufb;
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 32b4f9c..4c6a99f 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,10 +8,19 @@
 	vc4_crtc.o \
 	vc4_drv.o \
 	vc4_kms.o \
+	vc4_gem.o \
 	vc4_hdmi.o \
 	vc4_hvs.o \
-	vc4_plane.o
+	vc4_irq.o \
+	vc4_plane.o \
+	vc4_render_cl.o \
+	vc4_trace_points.o \
+	vc4_v3d.o \
+	vc4_validate.o \
+	vc4_validate_shaders.o
 
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
 obj-$(CONFIG_DRM_VC4)  += vc4.o
+
+CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index ab9f510..18dfe3e 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -12,19 +12,236 @@
  * access to system memory with no MMU in between.  To support it, we
  * use the GEM CMA helper functions to allocate contiguous ranges of
  * physical memory for our BOs.
+ *
+ * Since the CMA allocator is very slow, we keep a cache of recently
+ * freed BOs around so that the kernel's allocation of objects for 3D
+ * rendering can return quickly.
  */
 
 #include "vc4_drv.h"
+#include "uapi/drm/vc4_drm.h"
 
-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size)
+static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 {
+	DRM_INFO("num bos allocated: %d\n",
+		 vc4->bo_stats.num_allocated);
+	DRM_INFO("size bos allocated: %dkb\n",
+		 vc4->bo_stats.size_allocated / 1024);
+	DRM_INFO("num bos used: %d\n",
+		 vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
+	DRM_INFO("size bos used: %dkb\n",
+		 (vc4->bo_stats.size_allocated -
+		  vc4->bo_stats.size_cached) / 1024);
+	DRM_INFO("num bos cached: %d\n",
+		 vc4->bo_stats.num_cached);
+	DRM_INFO("size bos cached: %dkb\n",
+		 vc4->bo_stats.size_cached / 1024);
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo_stats stats;
+
+	/* Take a snapshot of the current stats with the lock held. */
+	mutex_lock(&vc4->bo_lock);
+	stats = vc4->bo_stats;
+	mutex_unlock(&vc4->bo_lock);
+
+	seq_printf(m, "num bos allocated: %d\n",
+		   stats.num_allocated);
+	seq_printf(m, "size bos allocated: %dkb\n",
+		   stats.size_allocated / 1024);
+	seq_printf(m, "num bos used: %d\n",
+		   stats.num_allocated - stats.num_cached);
+	seq_printf(m, "size bos used: %dkb\n",
+		   (stats.size_allocated - stats.size_cached) / 1024);
+	seq_printf(m, "num bos cached: %d\n",
+		   stats.num_cached);
+	seq_printf(m, "size bos cached: %dkb\n",
+		   stats.size_cached / 1024);
+
+	return 0;
+}
+#endif
+
+static uint32_t bo_page_index(size_t size)
+{
+	return (size / PAGE_SIZE) - 1;
+}
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_destroy(struct vc4_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
+	vc4->bo_stats.num_allocated--;
+	vc4->bo_stats.size_allocated -= obj->size;
+	drm_gem_cma_free_object(obj);
+}
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+	vc4->bo_stats.num_cached--;
+	vc4->bo_stats.size_cached -= obj->size;
+
+	list_del(&bo->unref_head);
+	list_del(&bo->size_head);
+}
+
+static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
+						     size_t size)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t page_index = bo_page_index(size);
+
+	if (vc4->bo_cache.size_list_size <= page_index) {
+		uint32_t new_size = max(vc4->bo_cache.size_list_size * 2,
+					page_index + 1);
+		struct list_head *new_list;
+		uint32_t i;
+
+		new_list = kmalloc_array(new_size, sizeof(struct list_head),
+					 GFP_KERNEL);
+		if (!new_list)
+			return NULL;
+
+		/* Rebase the old cached BO lists to their new list
+		 * head locations.
+		 */
+		for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
+			struct list_head *old_list =
+				&vc4->bo_cache.size_list[i];
+
+			if (list_empty(old_list))
+				INIT_LIST_HEAD(&new_list[i]);
+			else
+				list_replace(old_list, &new_list[i]);
+		}
+		/* And initialize the brand new BO list heads. */
+		for (i = vc4->bo_cache.size_list_size; i < new_size; i++)
+			INIT_LIST_HEAD(&new_list[i]);
+
+		kfree(vc4->bo_cache.size_list);
+		vc4->bo_cache.size_list = new_list;
+		vc4->bo_cache.size_list_size = new_size;
+	}
+
+	return &vc4->bo_cache.size_list[page_index];
+}
+
+void vc4_bo_cache_purge(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_lock(&vc4->bo_lock);
+	while (!list_empty(&vc4->bo_cache.time_list)) {
+		struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+						    struct vc4_bo, unref_head);
+		vc4_bo_remove_from_cache(bo);
+		vc4_bo_destroy(bo);
+	}
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
+					    uint32_t size)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t page_index = bo_page_index(size);
+	struct vc4_bo *bo = NULL;
+
+	size = roundup(size, PAGE_SIZE);
+
+	mutex_lock(&vc4->bo_lock);
+	if (page_index >= vc4->bo_cache.size_list_size)
+		goto out;
+
+	if (list_empty(&vc4->bo_cache.size_list[page_index]))
+		goto out;
+
+	bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
+			      struct vc4_bo, size_head);
+	vc4_bo_remove_from_cache(bo);
+	kref_init(&bo->base.base.refcount);
+
+out:
+	mutex_unlock(&vc4->bo_lock);
+	return bo;
+}
+
+/**
+ * vc4_gem_create_object - Implementation of driver->gem_create_object.
+ *
+ * This lets the CMA helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo *bo;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&vc4->bo_lock);
+	vc4->bo_stats.num_allocated++;
+	vc4->bo_stats.size_allocated += size;
+	mutex_unlock(&vc4->bo_lock);
+
+	return &bo->base.base;
+}
+
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
+			     bool from_cache)
+{
+	size_t size = roundup(unaligned_size, PAGE_SIZE);
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_gem_cma_object *cma_obj;
 
-	cma_obj = drm_gem_cma_create(dev, size);
-	if (IS_ERR(cma_obj))
+	if (size == 0)
 		return NULL;
-	else
-		return to_vc4_bo(&cma_obj->base);
+
+	/* First, try to get a vc4_bo from the kernel BO cache. */
+	if (from_cache) {
+		struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
+
+		if (bo)
+			return bo;
+	}
+
+	cma_obj = drm_gem_cma_create(dev, size);
+	if (IS_ERR(cma_obj)) {
+		/*
+		 * If we've run out of CMA memory, kill the cache of
+		 * CMA allocations we've got laying around and try again.
+		 */
+		vc4_bo_cache_purge(dev);
+
+		cma_obj = drm_gem_cma_create(dev, size);
+		if (IS_ERR(cma_obj)) {
+			DRM_ERROR("Failed to allocate from CMA:\n");
+			vc4_bo_stats_dump(vc4);
+			return NULL;
+		}
+	}
+
+	return to_vc4_bo(&cma_obj->base);
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
@@ -41,7 +258,7 @@
 	if (args->size < args->pitch * args->height)
 		args->size = args->pitch * args->height;
 
-	bo = vc4_bo_create(dev, roundup(args->size, PAGE_SIZE));
+	bo = vc4_bo_create(dev, args->size, false);
 	if (!bo)
 		return -ENOMEM;
 
@@ -50,3 +267,291 @@
 
 	return ret;
 }
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_cache_free_old(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
+
+	while (!list_empty(&vc4->bo_cache.time_list)) {
+		struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+						    struct vc4_bo, unref_head);
+		if (time_before(expire_time, bo->free_time)) {
+			mod_timer(&vc4->bo_cache.time_timer,
+				  round_jiffies_up(jiffies +
+						   msecs_to_jiffies(1000)));
+			return;
+		}
+
+		vc4_bo_remove_from_cache(bo);
+		vc4_bo_destroy(bo);
+	}
+}
+
+/* Called on the last userspace/kernel unreference of the BO.  Returns
+ * it to the BO cache if possible, otherwise frees it.
+ *
+ * Note that this is called with the struct_mutex held.
+ */
+void vc4_free_object(struct drm_gem_object *gem_bo)
+{
+	struct drm_device *dev = gem_bo->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo *bo = to_vc4_bo(gem_bo);
+	struct list_head *cache_list;
+
+	mutex_lock(&vc4->bo_lock);
+	/* If the object references someone else's memory, we can't cache it.
+	 */
+	if (gem_bo->import_attach) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	/* Don't cache if it was publicly named. */
+	if (gem_bo->name) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size);
+	if (!cache_list) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
+	bo->free_time = jiffies;
+	list_add(&bo->size_head, cache_list);
+	list_add(&bo->unref_head, &vc4->bo_cache.time_list);
+
+	vc4->bo_stats.num_cached++;
+	vc4->bo_stats.size_cached += gem_bo->size;
+
+	vc4_bo_cache_free_old(dev);
+
+out:
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static void vc4_bo_cache_time_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, bo_cache.time_work);
+	struct drm_device *dev = vc4->dev;
+
+	mutex_lock(&vc4->bo_lock);
+	vc4_bo_cache_free_old(dev);
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static void vc4_bo_cache_time_timer(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	schedule_work(&vc4->bo_cache.time_work);
+}
+
+struct dma_buf *
+vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Attempting to export shader BO\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_prime_export(dev, obj, flags);
+}
+
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	gem_obj = vma->vm_private_data;
+	bo = to_vc4_bo(gem_obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
+
+	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
+				    bo->base.vaddr, bo->base.paddr,
+				    vma->vm_end - vma->vm_start);
+	if (ret)
+		drm_gem_vm_close(vma);
+
+	return ret;
+}
+
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	return drm_gem_cma_prime_mmap(obj, vma);
+}
+
+void *vc4_prime_vmap(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("mmaping of shader BOs not allowed.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_cma_prime_vmap(obj);
+}
+
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_vc4_create_bo *args = data;
+	struct vc4_bo *bo = NULL;
+	int ret;
+
+	/*
+	 * We can't allocate from the BO cache, because the BOs don't
+	 * get zeroed, and that might leak data between users.
+	 */
+	bo = vc4_bo_create(dev, args->size, false);
+	if (!bo)
+		return -ENOMEM;
+
+	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+	drm_gem_object_unreference_unlocked(&bo->base.base);
+
+	return ret;
+}
+
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_vc4_mmap_bo *args = data;
+	struct drm_gem_object *gem_obj;
+
+	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+	return 0;
+}
+
+int
+vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct drm_vc4_create_shader_bo *args = data;
+	struct vc4_bo *bo = NULL;
+	int ret;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	if (args->size % sizeof(u64) != 0)
+		return -EINVAL;
+
+	if (args->flags != 0) {
+		DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->pad != 0) {
+		DRM_INFO("Pad set: 0x%08x\n", args->pad);
+		return -EINVAL;
+	}
+
+	bo = vc4_bo_create(dev, args->size, true);
+	if (!bo)
+		return -ENOMEM;
+
+	ret = copy_from_user(bo->base.vaddr,
+			     (void __user *)(uintptr_t)args->data,
+			     args->size);
+	if (ret != 0)
+		goto fail;
+	/* Clear the rest of the memory from allocating from the BO
+	 * cache.
+	 */
+	memset(bo->base.vaddr + args->size, 0,
+	       bo->base.base.size - args->size);
+
+	bo->validated_shader = vc4_validate_shader(&bo->base);
+	if (!bo->validated_shader) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* We have to create the handle after validation, to avoid
+	 * races for users to do doing things like mmap the shader BO.
+	 */
+	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+
+ fail:
+	drm_gem_object_unreference_unlocked(&bo->base.base);
+
+	return ret;
+}
+
+void vc4_bo_cache_init(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_init(&vc4->bo_lock);
+
+	INIT_LIST_HEAD(&vc4->bo_cache.time_list);
+
+	INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
+	setup_timer(&vc4->bo_cache.time_timer,
+		    vc4_bo_cache_time_timer,
+		    (unsigned long)dev);
+}
+
+void vc4_bo_cache_destroy(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	del_timer(&vc4->bo_cache.time_timer);
+	cancel_work_sync(&vc4->bo_cache.time_work);
+
+	vc4_bo_cache_purge(dev);
+
+	if (vc4->bo_stats.num_allocated) {
+		DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
+		vc4_bo_stats_dump(vc4);
+	}
+}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7a9f476..8d0d70e 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -35,6 +35,7 @@
 #include "drm_atomic_helper.h"
 #include "drm_crtc_helper.h"
 #include "linux/clk.h"
+#include "drm_fb_cma_helper.h"
 #include "linux/component.h"
 #include "linux/of_device.h"
 #include "vc4_drv.h"
@@ -168,7 +169,7 @@
 	struct drm_connector *connector;
 
 	drm_for_each_connector(connector, crtc->dev) {
-		if (connector && connector->state->crtc == crtc) {
+		if (connector->state->crtc == crtc) {
 			struct drm_encoder *encoder = connector->encoder;
 			struct vc4_encoder *vc4_encoder =
 				to_vc4_encoder(encoder);
@@ -401,7 +402,8 @@
 		dlist_next++;
 
 		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-			  (u32 *)vc4_crtc->dlist - (u32 *)vc4->hvs->dlist);
+			  (u32 __iomem *)vc4_crtc->dlist -
+			  (u32 __iomem *)vc4->hvs->dlist);
 
 		/* Make the next display list start after ours. */
 		vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist);
@@ -475,10 +477,106 @@
 	return ret;
 }
 
+struct vc4_async_flip_state {
+	struct drm_crtc *crtc;
+	struct drm_framebuffer *fb;
+	struct drm_pending_vblank_event *event;
+
+	struct vc4_seqno_cb cb;
+};
+
+/* Called when the V3D execution for the BO being flipped to is done, so that
+ * we can actually update the plane's address to point to it.
+ */
+static void
+vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+{
+	struct vc4_async_flip_state *flip_state =
+		container_of(cb, struct vc4_async_flip_state, cb);
+	struct drm_crtc *crtc = flip_state->crtc;
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_plane *plane = crtc->primary;
+
+	vc4_plane_async_set_fb(plane, flip_state->fb);
+	if (flip_state->event) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		drm_crtc_send_vblank_event(crtc, flip_state->event);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+
+	drm_framebuffer_unreference(flip_state->fb);
+	kfree(flip_state);
+
+	up(&vc4->async_modeset);
+}
+
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb,
+			       struct drm_pending_vblank_event *event,
+			       uint32_t flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_plane *plane = crtc->primary;
+	int ret = 0;
+	struct vc4_async_flip_state *flip_state;
+	struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+	struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+	flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
+	if (!flip_state)
+		return -ENOMEM;
+
+	drm_framebuffer_reference(fb);
+	flip_state->fb = fb;
+	flip_state->crtc = crtc;
+	flip_state->event = event;
+
+	/* Make sure all other async modesetes have landed. */
+	ret = down_interruptible(&vc4->async_modeset);
+	if (ret) {
+		kfree(flip_state);
+		return ret;
+	}
+
+	/* Immediately update the plane's legacy fb pointer, so that later
+	 * modeset prep sees the state that will be present when the semaphore
+	 * is released.
+	 */
+	drm_atomic_set_fb_for_plane(plane->state, fb);
+	plane->fb = fb;
+
+	vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
+			   vc4_async_page_flip_complete);
+
+	/* Driver takes ownership of state on successful async commit. */
+	return 0;
+}
+
+static int vc4_page_flip(struct drm_crtc *crtc,
+			 struct drm_framebuffer *fb,
+			 struct drm_pending_vblank_event *event,
+			 uint32_t flags)
+{
+	if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		return vc4_async_page_flip(crtc, fb, event, flags);
+	else
+		return drm_atomic_helper_page_flip(crtc, fb, event, flags);
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
-	.page_flip = drm_atomic_helper_page_flip,
+	.page_flip = vc4_page_flip,
 	.set_property = NULL,
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
@@ -591,21 +689,21 @@
 	 * that will take too much.
 	 */
 	primary_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_PRIMARY);
-	if (!primary_plane) {
+	if (IS_ERR(primary_plane)) {
 		dev_err(dev, "failed to construct primary plane\n");
 		ret = PTR_ERR(primary_plane);
 		goto err;
 	}
 
 	cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
-	if (!cursor_plane) {
+	if (IS_ERR(cursor_plane)) {
 		dev_err(dev, "failed to construct cursor plane\n");
 		ret = PTR_ERR(cursor_plane);
 		goto err_primary;
 	}
 
 	drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane,
-				  &vc4_crtc_funcs);
+				  &vc4_crtc_funcs, NULL);
 	drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs);
 	primary_plane->crtc = crtc;
 	cursor_plane->crtc = crtc;
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index 4297b0a5..d76ad10 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -16,11 +16,14 @@
 #include "vc4_regs.h"
 
 static const struct drm_info_list vc4_debugfs_list[] = {
+	{"bo_stats", vc4_bo_stats_debugfs, 0},
 	{"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
 	{"hvs_regs", vc4_hvs_debugfs_regs, 0},
 	{"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
 	{"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
 	{"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
+	{"v3d_ident", vc4_v3d_debugfs_ident, 0},
+	{"v3d_regs", vc4_v3d_debugfs_regs, 0},
 };
 
 #define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 6e73060..cbcbbb8 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include "drm_fb_cma_helper.h"
 
+#include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -63,7 +64,7 @@
 	.open = drm_open,
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_cma_mmap,
+	.mmap = vc4_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
@@ -73,16 +74,30 @@
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
+			  DRM_ROOT_ONLY),
 };
 
 static struct drm_driver vc4_drm_driver = {
 	.driver_features = (DRIVER_MODESET |
 			    DRIVER_ATOMIC |
 			    DRIVER_GEM |
+			    DRIVER_HAVE_IRQ |
 			    DRIVER_PRIME),
 	.lastclose = vc4_lastclose,
 	.preclose = vc4_drm_preclose,
 
+	.irq_handler = vc4_irq,
+	.irq_preinstall = vc4_irq_preinstall,
+	.irq_postinstall = vc4_irq_postinstall,
+	.irq_uninstall = vc4_irq_uninstall,
+
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
 	.get_vblank_counter = drm_vblank_count,
@@ -92,18 +107,19 @@
 	.debugfs_cleanup = vc4_debugfs_cleanup,
 #endif
 
-	.gem_free_object = drm_gem_cma_free_object,
+	.gem_create_object = vc4_create_object,
+	.gem_free_object = vc4_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_export = vc4_prime_export,
 	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
 	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
-	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vmap = vc4_prime_vmap,
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
-	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+	.gem_prime_mmap = vc4_prime_mmap,
 
 	.dumb_create = vc4_dumb_create,
 	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
@@ -170,13 +186,17 @@
 
 	drm_dev_set_unique(drm, dev_name(dev));
 
+	vc4_bo_cache_init(drm);
+
 	drm_mode_config_init(drm);
 	if (ret)
 		goto unref;
 
+	vc4_gem_init(drm);
+
 	ret = component_bind_all(dev, drm);
 	if (ret)
-		goto unref;
+		goto gem_destroy;
 
 	ret = drm_dev_register(drm, 0);
 	if (ret < 0)
@@ -200,8 +220,11 @@
 	drm_dev_unregister(drm);
 unbind_all:
 	component_unbind_all(dev, drm);
+gem_destroy:
+	vc4_gem_destroy(drm);
 unref:
 	drm_dev_unref(drm);
+	vc4_bo_cache_destroy(drm);
 	return ret;
 }
 
@@ -228,6 +251,7 @@
 	&vc4_hdmi_driver,
 	&vc4_crtc_driver,
 	&vc4_hvs_driver,
+	&vc4_v3d_driver,
 };
 
 static int vc4_platform_drm_probe(struct platform_device *pdev)
@@ -259,7 +283,6 @@
 	.remove		= vc4_platform_drm_remove,
 	.driver		= {
 		.name	= "vc4-drm",
-		.owner	= THIS_MODULE,
 		.of_match_table = vc4_of_match,
 	},
 };
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index fd8319f..080865e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -15,8 +15,89 @@
 	struct vc4_hdmi *hdmi;
 	struct vc4_hvs *hvs;
 	struct vc4_crtc *crtc[3];
+	struct vc4_v3d *v3d;
 
 	struct drm_fbdev_cma *fbdev;
+
+	struct vc4_hang_state *hang_state;
+
+	/* The kernel-space BO cache.  Tracks buffers that have been
+	 * unreferenced by all other users (refcounts of 0!) but not
+	 * yet freed, so we can do cheap allocations.
+	 */
+	struct vc4_bo_cache {
+		/* Array of list heads for entries in the BO cache,
+		 * based on number of pages, so we can do O(1) lookups
+		 * in the cache when allocating.
+		 */
+		struct list_head *size_list;
+		uint32_t size_list_size;
+
+		/* List of all BOs in the cache, ordered by age, so we
+		 * can do O(1) lookups when trying to free old
+		 * buffers.
+		 */
+		struct list_head time_list;
+		struct work_struct time_work;
+		struct timer_list time_timer;
+	} bo_cache;
+
+	struct vc4_bo_stats {
+		u32 num_allocated;
+		u32 size_allocated;
+		u32 num_cached;
+		u32 size_cached;
+	} bo_stats;
+
+	/* Protects bo_cache and the BO stats. */
+	struct mutex bo_lock;
+
+	/* Sequence number for the last job queued in job_list.
+	 * Starts at 0 (no jobs emitted).
+	 */
+	uint64_t emit_seqno;
+
+	/* Sequence number for the last completed job on the GPU.
+	 * Starts at 0 (no jobs completed).
+	 */
+	uint64_t finished_seqno;
+
+	/* List of all struct vc4_exec_info for jobs to be executed.
+	 * The first job in the list is the one currently programmed
+	 * into ct0ca/ct1ca for execution.
+	 */
+	struct list_head job_list;
+	/* List of the finished vc4_exec_infos waiting to be freed by
+	 * job_done_work.
+	 */
+	struct list_head job_done_list;
+	/* Spinlock used to synchronize the job_list and seqno
+	 * accesses between the IRQ handler and GEM ioctls.
+	 */
+	spinlock_t job_lock;
+	wait_queue_head_t job_wait_queue;
+	struct work_struct job_done_work;
+
+	/* List of struct vc4_seqno_cb for callbacks to be made from a
+	 * workqueue when the given seqno is passed.
+	 */
+	struct list_head seqno_cb_list;
+
+	/* The binner overflow memory that's currently set up in
+	 * BPOA/BPOS registers.  When overflow occurs and a new one is
+	 * allocated, the previous one will be moved to
+	 * vc4->current_exec's free list.
+	 */
+	struct vc4_bo *overflow_mem;
+	struct work_struct overflow_mem_work;
+
+	struct {
+		uint32_t last_ct0ca, last_ct1ca;
+		struct timer_list timer;
+		struct work_struct reset_work;
+	} hangcheck;
+
+	struct semaphore async_modeset;
 };
 
 static inline struct vc4_dev *
@@ -27,6 +108,25 @@
 
 struct vc4_bo {
 	struct drm_gem_cma_object base;
+
+	/* seqno of the last job to render to this BO. */
+	uint64_t seqno;
+
+	/* List entry for the BO's position in either
+	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
+	 */
+	struct list_head unref_head;
+
+	/* Time in jiffies when the BO was put in vc4->bo_cache. */
+	unsigned long free_time;
+
+	/* List entry for the BO's position in vc4_dev->bo_cache.size_list */
+	struct list_head size_head;
+
+	/* Struct for shader validation state, if created by
+	 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
+	 */
+	struct vc4_validated_shader_info *validated_shader;
 };
 
 static inline struct vc4_bo *
@@ -35,6 +135,17 @@
 	return (struct vc4_bo *)bo;
 }
 
+struct vc4_seqno_cb {
+	struct work_struct work;
+	uint64_t seqno;
+	void (*func)(struct vc4_seqno_cb *cb);
+};
+
+struct vc4_v3d {
+	struct platform_device *pdev;
+	void __iomem *regs;
+};
+
 struct vc4_hvs {
 	struct platform_device *pdev;
 	void __iomem *regs;
@@ -72,9 +183,142 @@
 	return container_of(encoder, struct vc4_encoder, base);
 }
 
+#define V3D_READ(offset) readl(vc4->v3d->regs + offset)
+#define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
 
+struct vc4_exec_info {
+	/* Sequence number for this bin/render job. */
+	uint64_t seqno;
+
+	/* Kernel-space copy of the ioctl arguments */
+	struct drm_vc4_submit_cl *args;
+
+	/* This is the array of BOs that were looked up at the start of exec.
+	 * Command validation will use indices into this array.
+	 */
+	struct drm_gem_cma_object **bo;
+	uint32_t bo_count;
+
+	/* Pointers for our position in vc4->job_list */
+	struct list_head head;
+
+	/* List of other BOs used in the job that need to be released
+	 * once the job is complete.
+	 */
+	struct list_head unref_list;
+
+	/* Current unvalidated indices into @bo loaded by the non-hardware
+	 * VC4_PACKET_GEM_HANDLES.
+	 */
+	uint32_t bo_index[2];
+
+	/* This is the BO where we store the validated command lists, shader
+	 * records, and uniforms.
+	 */
+	struct drm_gem_cma_object *exec_bo;
+
+	/**
+	 * This tracks the per-shader-record state (packet 64) that
+	 * determines the length of the shader record and the offset
+	 * it's expected to be found at.  It gets read in from the
+	 * command lists.
+	 */
+	struct vc4_shader_state {
+		uint32_t addr;
+		/* Maximum vertex index referenced by any primitive using this
+		 * shader state.
+		 */
+		uint32_t max_index;
+	} *shader_state;
+
+	/** How many shader states the user declared they were using. */
+	uint32_t shader_state_size;
+	/** How many shader state records the validator has seen. */
+	uint32_t shader_state_count;
+
+	bool found_tile_binning_mode_config_packet;
+	bool found_start_tile_binning_packet;
+	bool found_increment_semaphore_packet;
+	bool found_flush;
+	uint8_t bin_tiles_x, bin_tiles_y;
+	struct drm_gem_cma_object *tile_bo;
+	uint32_t tile_alloc_offset;
+
+	/**
+	 * Computed addresses pointing into exec_bo where we start the
+	 * bin thread (ct0) and render thread (ct1).
+	 */
+	uint32_t ct0ca, ct0ea;
+	uint32_t ct1ca, ct1ea;
+
+	/* Pointer to the unvalidated bin CL (if present). */
+	void *bin_u;
+
+	/* Pointers to the shader recs.  These paddr gets incremented as CL
+	 * packets are relocated in validate_gl_shader_state, and the vaddrs
+	 * (u and v) get incremented and size decremented as the shader recs
+	 * themselves are validated.
+	 */
+	void *shader_rec_u;
+	void *shader_rec_v;
+	uint32_t shader_rec_p;
+	uint32_t shader_rec_size;
+
+	/* Pointers to the uniform data.  These pointers are incremented, and
+	 * size decremented, as each batch of uniforms is uploaded.
+	 */
+	void *uniforms_u;
+	void *uniforms_v;
+	uint32_t uniforms_p;
+	uint32_t uniforms_size;
+};
+
+static inline struct vc4_exec_info *
+vc4_first_job(struct vc4_dev *vc4)
+{
+	if (list_empty(&vc4->job_list))
+		return NULL;
+	return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
+}
+
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+	bool is_direct;
+	uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info {
+	uint32_t uniforms_size;
+	uint32_t uniforms_src_size;
+	uint32_t num_texture_samples;
+	struct vc4_texture_sample_info *texture_samples;
+};
+
 /**
  * _wait_for - magic (register) wait macro
  *
@@ -104,13 +348,29 @@
 #define wait_for(COND, MS) _wait_for(COND, MS, 1)
 
 /* vc4_bo.c */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
 void vc4_free_object(struct drm_gem_object *gem_obj);
-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size);
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
+			     bool from_cache);
 int vc4_dumb_create(struct drm_file *file_priv,
 		    struct drm_device *dev,
 		    struct drm_mode_create_dumb *args);
 struct dma_buf *vc4_prime_export(struct drm_device *dev,
 				 struct drm_gem_object *obj, int flags);
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+void *vc4_prime_vmap(struct drm_gem_object *obj);
+void vc4_bo_cache_init(struct drm_device *dev);
+void vc4_bo_cache_destroy(struct drm_device *dev);
+int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
@@ -126,10 +386,34 @@
 /* vc4_drv.c */
 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
 
+/* vc4_gem.c */
+void vc4_gem_init(struct drm_device *dev);
+void vc4_gem_destroy(struct drm_device *dev);
+int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+void vc4_submit_next_job(struct drm_device *dev);
+int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
+		       uint64_t timeout_ns, bool interruptible);
+void vc4_job_handle_completed(struct vc4_dev *vc4);
+int vc4_queue_seqno_cb(struct drm_device *dev,
+		       struct vc4_seqno_cb *cb, uint64_t seqno,
+		       void (*func)(struct vc4_seqno_cb *cb));
+
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
 int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_irq.c */
+irqreturn_t vc4_irq(int irq, void *arg);
+void vc4_irq_preinstall(struct drm_device *dev);
+int vc4_irq_postinstall(struct drm_device *dev);
+void vc4_irq_uninstall(struct drm_device *dev);
+void vc4_irq_reset(struct drm_device *dev);
+
 /* vc4_hvs.c */
 extern struct platform_driver vc4_hvs_driver;
 void vc4_hvs_dump_state(struct drm_device *dev);
@@ -143,3 +427,35 @@
 				 enum drm_plane_type type);
 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
 u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+void vc4_plane_async_set_fb(struct drm_plane *plane,
+			    struct drm_framebuffer *fb);
+
+/* vc4_v3d.c */
+extern struct platform_driver vc4_v3d_driver;
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
+
+/* vc4_validate.c */
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec);
+
+int
+vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
+
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+				      uint32_t hindex);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+			struct drm_gem_cma_object *fbo,
+			uint32_t offset, uint8_t tiling_format,
+			uint32_t width, uint32_t height, uint8_t cpp);
+
+/* vc4_validate_shader.c */
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 0000000..39f29e7
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,867 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/io.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+#include "vc4_trace.h"
+
+static void
+vc4_queue_hangcheck(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mod_timer(&vc4->hangcheck.timer,
+		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
+}
+
+struct vc4_hang_state {
+	struct drm_vc4_get_hang_state user_state;
+
+	u32 bo_count;
+	struct drm_gem_object **bo;
+};
+
+static void
+vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
+{
+	unsigned int i;
+
+	mutex_lock(&dev->struct_mutex);
+	for (i = 0; i < state->user_state.bo_count; i++)
+		drm_gem_object_unreference(state->bo[i]);
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(state);
+}
+
+int
+vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vc4_get_hang_state *get_state = data;
+	struct drm_vc4_get_hang_state_bo *bo_state;
+	struct vc4_hang_state *kernel_state;
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+	u32 i;
+	int ret;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	kernel_state = vc4->hang_state;
+	if (!kernel_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return -ENOENT;
+	}
+	state = &kernel_state->user_state;
+
+	/* If the user's array isn't big enough, just return the
+	 * required array size.
+	 */
+	if (get_state->bo_count < state->bo_count) {
+		get_state->bo_count = state->bo_count;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return 0;
+	}
+
+	vc4->hang_state = NULL;
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	/* Save the user's BO pointer, so we don't stomp it with the memcpy. */
+	state->bo = get_state->bo;
+	memcpy(get_state, state, sizeof(*state));
+
+	bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
+	if (!bo_state) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	for (i = 0; i < state->bo_count; i++) {
+		struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
+		u32 handle;
+
+		ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
+					    &handle);
+
+		if (ret) {
+			state->bo_count = i - 1;
+			goto err;
+		}
+		bo_state[i].handle = handle;
+		bo_state[i].paddr = vc4_bo->base.paddr;
+		bo_state[i].size = vc4_bo->base.base.size;
+	}
+
+	ret = copy_to_user((void __user *)(uintptr_t)get_state->bo,
+			   bo_state,
+			   state->bo_count * sizeof(*bo_state));
+	kfree(bo_state);
+
+err_free:
+
+	vc4_free_hang_state(dev, kernel_state);
+
+err:
+	return ret;
+}
+
+static void
+vc4_save_hang_state(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_hang_state *kernel_state;
+	struct vc4_exec_info *exec;
+	struct vc4_bo *bo;
+	unsigned long irqflags;
+	unsigned int i, unref_list_count;
+
+	kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL);
+	if (!kernel_state)
+		return;
+
+	state = &kernel_state->user_state;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	exec = vc4_first_job(vc4);
+	if (!exec) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	unref_list_count = 0;
+	list_for_each_entry(bo, &exec->unref_list, unref_head)
+		unref_list_count++;
+
+	state->bo_count = exec->bo_count + unref_list_count;
+	kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
+				   GFP_ATOMIC);
+	if (!kernel_state->bo) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	for (i = 0; i < exec->bo_count; i++) {
+		drm_gem_object_reference(&exec->bo[i]->base);
+		kernel_state->bo[i] = &exec->bo[i]->base;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		drm_gem_object_reference(&bo->base.base);
+		kernel_state->bo[i] = &bo->base.base;
+		i++;
+	}
+
+	state->start_bin = exec->ct0ca;
+	state->start_render = exec->ct1ca;
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	state->ct0ca = V3D_READ(V3D_CTNCA(0));
+	state->ct0ea = V3D_READ(V3D_CTNEA(0));
+
+	state->ct1ca = V3D_READ(V3D_CTNCA(1));
+	state->ct1ea = V3D_READ(V3D_CTNEA(1));
+
+	state->ct0cs = V3D_READ(V3D_CTNCS(0));
+	state->ct1cs = V3D_READ(V3D_CTNCS(1));
+
+	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
+	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
+
+	state->bpca = V3D_READ(V3D_BPCA);
+	state->bpcs = V3D_READ(V3D_BPCS);
+	state->bpoa = V3D_READ(V3D_BPOA);
+	state->bpos = V3D_READ(V3D_BPOS);
+
+	state->vpmbase = V3D_READ(V3D_VPMBASE);
+
+	state->dbge = V3D_READ(V3D_DBGE);
+	state->fdbgo = V3D_READ(V3D_FDBGO);
+	state->fdbgb = V3D_READ(V3D_FDBGB);
+	state->fdbgr = V3D_READ(V3D_FDBGR);
+	state->fdbgs = V3D_READ(V3D_FDBGS);
+	state->errstat = V3D_READ(V3D_ERRSTAT);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	if (vc4->hang_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_free_hang_state(dev, kernel_state);
+	} else {
+		vc4->hang_state = kernel_state;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+}
+
+static void
+vc4_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	DRM_INFO("Resetting GPU.\n");
+	vc4_v3d_set_power(vc4, false);
+	vc4_v3d_set_power(vc4, true);
+
+	vc4_irq_reset(dev);
+
+	/* Rearm the hangcheck -- another job might have been waiting
+	 * for our hung one to get kicked off, and vc4_irq_reset()
+	 * would have started it.
+	 */
+	vc4_queue_hangcheck(dev);
+}
+
+static void
+vc4_reset_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, hangcheck.reset_work);
+
+	vc4_save_hang_state(vc4->dev);
+
+	vc4_reset(vc4->dev);
+}
+
+static void
+vc4_hangcheck_elapsed(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t ct0ca, ct1ca;
+
+	/* If idle, we can stop watching for hangs. */
+	if (list_empty(&vc4->job_list))
+		return;
+
+	ct0ca = V3D_READ(V3D_CTNCA(0));
+	ct1ca = V3D_READ(V3D_CTNCA(1));
+
+	/* If we've made any progress in execution, rearm the timer
+	 * and wait.
+	 */
+	if (ct0ca != vc4->hangcheck.last_ct0ca ||
+	    ct1ca != vc4->hangcheck.last_ct1ca) {
+		vc4->hangcheck.last_ct0ca = ct0ca;
+		vc4->hangcheck.last_ct1ca = ct1ca;
+		vc4_queue_hangcheck(dev);
+		return;
+	}
+
+	/* We've gone too long with no progress, reset.  This has to
+	 * be done from a work struct, since resetting can sleep and
+	 * this timer hook isn't allowed to.
+	 */
+	schedule_work(&vc4->hangcheck.reset_work);
+}
+
+static void
+submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	V3D_WRITE(V3D_CTNCA(thread), start);
+	V3D_WRITE(V3D_CTNEA(thread), end);
+}
+
+int
+vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
+		   bool interruptible)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret = 0;
+	unsigned long timeout_expire;
+	DEFINE_WAIT(wait);
+
+	if (vc4->finished_seqno >= seqno)
+		return 0;
+
+	if (timeout_ns == 0)
+		return -ETIME;
+
+	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
+
+	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
+	for (;;) {
+		prepare_to_wait(&vc4->job_wait_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE :
+				TASK_UNINTERRUPTIBLE);
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (vc4->finished_seqno >= seqno)
+			break;
+
+		if (timeout_ns != ~0ull) {
+			if (time_after_eq(jiffies, timeout_expire)) {
+				ret = -ETIME;
+				break;
+			}
+			schedule_timeout(timeout_expire - jiffies);
+		} else {
+			schedule();
+		}
+	}
+
+	finish_wait(&vc4->job_wait_queue, &wait);
+	trace_vc4_wait_for_seqno_end(dev, seqno);
+
+	if (ret && ret != -ERESTARTSYS) {
+		DRM_ERROR("timeout waiting for render thread idle\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+vc4_flush_caches(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Flush the GPU L2 caches.  These caches sit on top of system
+	 * L3 (the 128kb or so shared with the CPU), and are
+	 * non-allocating in the L3.
+	 */
+	V3D_WRITE(V3D_L2CACTL,
+		  V3D_L2CACTL_L2CCLR);
+
+	V3D_WRITE(V3D_SLCACTL,
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Sets the registers for the next job to be actually be executed in
+ * the hardware.
+ *
+ * The job_lock should be held during this.
+ */
+void
+vc4_submit_next_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4_flush_caches(dev);
+
+	/* Disable the binner's pre-loaded overflow memory address */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	if (exec->ct0ca != exec->ct0ea)
+		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
+	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
+}
+
+static void
+vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+{
+	struct vc4_bo *bo;
+	unsigned i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_vc4_bo(&exec->bo[i]->base);
+		bo->seqno = seqno;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		bo->seqno = seqno;
+	}
+}
+
+/* Queues a struct vc4_exec_info for execution.  If no job is
+ * currently executing, then submits it.
+ *
+ * Unlike most GPUs, our hardware only handles one command list at a
+ * time.  To queue multiple jobs at once, we'd need to edit the
+ * previous command list to have a jump to the new one at the end, and
+ * then bump the end address.  That's a change for a later date,
+ * though.
+ */
+static void
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint64_t seqno;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+	seqno = ++vc4->emit_seqno;
+	exec->seqno = seqno;
+	vc4_update_bo_seqnos(exec, seqno);
+
+	list_add_tail(&exec->head, &vc4->job_list);
+
+	/* If no job was executing, kick ours off.  Otherwise, it'll
+	 * get started when the previous job's frame done interrupt
+	 * occurs.
+	 */
+	if (vc4_first_job(vc4) == exec) {
+		vc4_submit_next_job(dev);
+		vc4_queue_hangcheck(dev);
+	}
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+/**
+ * Looks up a bunch of GEM handles for BOs and stores the array for
+ * use in the command validator that actually writes relocated
+ * addresses pointing to them.
+ */
+static int
+vc4_cl_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	uint32_t *handles;
+	int ret = 0;
+	int i;
+
+	exec->bo_count = args->bo_handle_count;
+
+	if (!exec->bo_count) {
+		/* See comment on bo_index for why we have to check
+		 * this.
+		 */
+		DRM_ERROR("Rendering requires BOs to validate\n");
+		return -EINVAL;
+	}
+
+	exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
+			   GFP_KERNEL);
+	if (!exec->bo) {
+		DRM_ERROR("Failed to allocate validated BO pointers\n");
+		return -ENOMEM;
+	}
+
+	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
+	if (!handles) {
+		DRM_ERROR("Failed to allocate incoming GEM handles\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(handles,
+			     (void __user *)(uintptr_t)args->bo_handles,
+			     exec->bo_count * sizeof(uint32_t));
+	if (ret) {
+		DRM_ERROR("Failed to copy in GEM handles\n");
+		goto fail;
+	}
+
+	spin_lock(&file_priv->table_lock);
+	for (i = 0; i < exec->bo_count; i++) {
+		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+						     handles[i]);
+		if (!bo) {
+			DRM_ERROR("Failed to look up GEM BO %d: %d\n",
+				  i, handles[i]);
+			ret = -EINVAL;
+			spin_unlock(&file_priv->table_lock);
+			goto fail;
+		}
+		drm_gem_object_reference(bo);
+		exec->bo[i] = (struct drm_gem_cma_object *)bo;
+	}
+	spin_unlock(&file_priv->table_lock);
+
+fail:
+	kfree(handles);
+	return 0;
+}
+
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	void *temp = NULL;
+	void *bin;
+	int ret = 0;
+	uint32_t bin_offset = 0;
+	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+					     16);
+	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
+	uint32_t exec_size = uniforms_offset + args->uniforms_size;
+	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
+					  args->shader_rec_count);
+	struct vc4_bo *bo;
+
+	if (uniforms_offset < shader_rec_offset ||
+	    exec_size < uniforms_offset ||
+	    args->shader_rec_count >= (UINT_MAX /
+					  sizeof(struct vc4_shader_state)) ||
+	    temp_size < exec_size) {
+		DRM_ERROR("overflow in exec arguments\n");
+		goto fail;
+	}
+
+	/* Allocate space where we'll store the copied in user command lists
+	 * and shader records.
+	 *
+	 * We don't just copy directly into the BOs because we need to
+	 * read the contents back for validation, and I think the
+	 * bo->vaddr is uncached access.
+	 */
+	temp = kmalloc(temp_size, GFP_KERNEL);
+	if (!temp) {
+		DRM_ERROR("Failed to allocate storage for copying "
+			  "in bin/render CLs.\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	bin = temp + bin_offset;
+	exec->shader_rec_u = temp + shader_rec_offset;
+	exec->uniforms_u = temp + uniforms_offset;
+	exec->shader_state = temp + exec_size;
+	exec->shader_state_size = args->shader_rec_count;
+
+	ret = copy_from_user(bin,
+			     (void __user *)(uintptr_t)args->bin_cl,
+			     args->bin_cl_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in bin cl\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(exec->shader_rec_u,
+			     (void __user *)(uintptr_t)args->shader_rec,
+			     args->shader_rec_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in shader recs\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(exec->uniforms_u,
+			     (void __user *)(uintptr_t)args->uniforms,
+			     args->uniforms_size);
+	if (ret) {
+		DRM_ERROR("Failed to copy in uniforms cl\n");
+		goto fail;
+	}
+
+	bo = vc4_bo_create(dev, exec_size, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate BO for binning\n");
+		ret = PTR_ERR(exec->exec_bo);
+		goto fail;
+	}
+	exec->exec_bo = &bo->base;
+
+	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+		      &exec->unref_list);
+
+	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+
+	exec->bin_u = bin;
+
+	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
+	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
+	exec->shader_rec_size = args->shader_rec_size;
+
+	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+	exec->uniforms_size = args->uniforms_size;
+
+	ret = vc4_validate_bin_cl(dev,
+				  exec->exec_bo->vaddr + bin_offset,
+				  bin,
+				  exec);
+	if (ret)
+		goto fail;
+
+	ret = vc4_validate_shader_recs(dev, exec);
+
+fail:
+	kfree(temp);
+	return ret;
+}
+
+static void
+vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	unsigned i;
+
+	/* Need the struct lock for drm_gem_object_unreference(). */
+	mutex_lock(&dev->struct_mutex);
+	if (exec->bo) {
+		for (i = 0; i < exec->bo_count; i++)
+			drm_gem_object_unreference(&exec->bo[i]->base);
+		kfree(exec->bo);
+	}
+
+	while (!list_empty(&exec->unref_list)) {
+		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
+						     struct vc4_bo, unref_head);
+		list_del(&bo->unref_head);
+		drm_gem_object_unreference(&bo->base.base);
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(exec);
+}
+
+void
+vc4_job_handle_completed(struct vc4_dev *vc4)
+{
+	unsigned long irqflags;
+	struct vc4_seqno_cb *cb, *cb_temp;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	while (!list_empty(&vc4->job_done_list)) {
+		struct vc4_exec_info *exec =
+			list_first_entry(&vc4->job_done_list,
+					 struct vc4_exec_info, head);
+		list_del(&exec->head);
+
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_complete_exec(vc4->dev, exec);
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+	}
+
+	list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
+		if (cb->seqno <= vc4->finished_seqno) {
+			list_del_init(&cb->work.entry);
+			schedule_work(&cb->work);
+		}
+	}
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+static void vc4_seqno_cb_work(struct work_struct *work)
+{
+	struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
+
+	cb->func(cb);
+}
+
+int vc4_queue_seqno_cb(struct drm_device *dev,
+		       struct vc4_seqno_cb *cb, uint64_t seqno,
+		       void (*func)(struct vc4_seqno_cb *cb))
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret = 0;
+	unsigned long irqflags;
+
+	cb->func = func;
+	INIT_WORK(&cb->work, vc4_seqno_cb_work);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	if (seqno > vc4->finished_seqno) {
+		cb->seqno = seqno;
+		list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
+	} else {
+		schedule_work(&cb->work);
+	}
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	return ret;
+}
+
+/* Scheduled when any job has been completed, this walks the list of
+ * jobs that had completed and unrefs their BOs and frees their exec
+ * structs.
+ */
+static void
+vc4_job_done_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, job_done_work);
+
+	vc4_job_handle_completed(vc4);
+}
+
+static int
+vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
+				uint64_t seqno,
+				uint64_t *timeout_ns)
+{
+	unsigned long start = jiffies;
+	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
+
+	if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
+		uint64_t delta = jiffies_to_nsecs(jiffies - start);
+
+		if (*timeout_ns >= delta)
+			*timeout_ns -= delta;
+	}
+
+	return ret;
+}
+
+int
+vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_vc4_wait_seqno *args = data;
+
+	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
+					       &args->timeout_ns);
+}
+
+int
+vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	int ret;
+	struct drm_vc4_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+
+	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+	bo = to_vc4_bo(gem_obj);
+
+	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
+					      &args->timeout_ns);
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+	return ret;
+}
+
+/**
+ * Submits a command list to the VC4.
+ *
+ * This is what is called batchbuffer emitting on other hardware.
+ */
+int
+vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_submit_cl *args = data;
+	struct vc4_exec_info *exec;
+	int ret;
+
+	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
+		DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
+		return -EINVAL;
+	}
+
+	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+	if (!exec) {
+		DRM_ERROR("malloc failure on exec struct\n");
+		return -ENOMEM;
+	}
+
+	exec->args = args;
+	INIT_LIST_HEAD(&exec->unref_list);
+
+	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
+	if (ret)
+		goto fail;
+
+	if (exec->args->bin_cl_size != 0) {
+		ret = vc4_get_bcl(dev, exec);
+		if (ret)
+			goto fail;
+	} else {
+		exec->ct0ca = 0;
+		exec->ct0ea = 0;
+	}
+
+	ret = vc4_get_rcl(dev, exec);
+	if (ret)
+		goto fail;
+
+	/* Clear this out of the struct we'll be putting in the queue,
+	 * since it's part of our stack.
+	 */
+	exec->args = NULL;
+
+	vc4_queue_submit(dev, exec);
+
+	/* Return the seqno for our job. */
+	args->seqno = vc4->emit_seqno;
+
+	return 0;
+
+fail:
+	vc4_complete_exec(vc4->dev, exec);
+
+	return ret;
+}
+
+void
+vc4_gem_init(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	INIT_LIST_HEAD(&vc4->job_list);
+	INIT_LIST_HEAD(&vc4->job_done_list);
+	INIT_LIST_HEAD(&vc4->seqno_cb_list);
+	spin_lock_init(&vc4->job_lock);
+
+	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
+	setup_timer(&vc4->hangcheck.timer,
+		    vc4_hangcheck_elapsed,
+		    (unsigned long)dev);
+
+	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+}
+
+void
+vc4_gem_destroy(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Waiting for exec to finish would need to be done before
+	 * unregistering V3D.
+	 */
+	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
+
+	/* V3D should already have disabled its interrupt and cleared
+	 * the overflow allocation registers.  Now free the object.
+	 */
+	if (vc4->overflow_mem) {
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+		vc4->overflow_mem = NULL;
+	}
+
+	vc4_bo_cache_destroy(dev);
+
+	if (vc4->hang_state)
+		vc4_free_hang_state(dev, vc4->hang_state);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index da9a36d..c69c046 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -519,7 +519,7 @@
 	WARN_ON_ONCE((HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE) == 0);
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(hdmi->encoder, &vc4_hdmi_encoder_helper_funcs);
 
 	hdmi->connector = vc4_hdmi_connector_init(drm, hdmi->encoder);
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index ab1673f..8098c5b 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -75,10 +75,10 @@
 	for (i = 0; i < 64; i += 4) {
 		DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
 			 i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
-			 ((uint32_t *)vc4->hvs->dlist)[i + 0],
-			 ((uint32_t *)vc4->hvs->dlist)[i + 1],
-			 ((uint32_t *)vc4->hvs->dlist)[i + 2],
-			 ((uint32_t *)vc4->hvs->dlist)[i + 3]);
+			 readl((u32 __iomem *)vc4->hvs->dlist + i + 0),
+			 readl((u32 __iomem *)vc4->hvs->dlist + i + 1),
+			 readl((u32 __iomem *)vc4->hvs->dlist + i + 2),
+			 readl((u32 __iomem *)vc4->hvs->dlist + i + 3));
 	}
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 0000000..b68060e
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** DOC: Interrupt management for the V3D engine.
+ *
+ * We have an interrupt status register (V3D_INTCTL) which reports
+ * interrupts, and where writing 1 bits clears those interrupts.
+ * There are also a pair of interrupt registers
+ * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
+ * disables that specific interrupt, and 0s written are ignored
+ * (reading either one returns the set of enabled interrupts).
+ *
+ * When we take a render frame interrupt, we need to wake the
+ * processes waiting for some frame to be done, and get the next frame
+ * submitted ASAP (so the hardware doesn't sit idle when there's work
+ * to do).
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
+			 V3D_INT_FRDONE)
+
+DECLARE_WAIT_QUEUE_HEAD(render_wait);
+
+static void
+vc4_overflow_mem_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, overflow_mem_work);
+	struct drm_device *dev = vc4->dev;
+	struct vc4_bo *bo;
+
+	bo = vc4_bo_create(dev, 256 * 1024, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate binner overflow mem\n");
+		return;
+	}
+
+	/* If there's a job executing currently, then our previous
+	 * overflow allocation is getting used in that job and we need
+	 * to queue it to be released when the job is done.  But if no
+	 * job is executing at all, then we can free the old overflow
+	 * object direcctly.
+	 *
+	 * No lock necessary for this pointer since we're the only
+	 * ones that update the pointer, and our workqueue won't
+	 * reenter.
+	 */
+	if (vc4->overflow_mem) {
+		struct vc4_exec_info *current_exec;
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+		current_exec = vc4_first_job(vc4);
+		if (current_exec) {
+			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
+			list_add_tail(&vc4->overflow_mem->unref_head,
+				      &current_exec->unref_list);
+			vc4->overflow_mem = NULL;
+		}
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+
+	if (vc4->overflow_mem)
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+	vc4->overflow_mem = bo;
+
+	V3D_WRITE(V3D_BPOA, bo->base.paddr);
+	V3D_WRITE(V3D_BPOS, bo->base.base.size);
+	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
+	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+}
+
+static void
+vc4_irq_finish_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4->finished_seqno++;
+	list_move_tail(&exec->head, &vc4->job_done_list);
+	vc4_submit_next_job(dev);
+
+	wake_up_all(&vc4->job_wait_queue);
+	schedule_work(&vc4->job_done_work);
+}
+
+irqreturn_t
+vc4_irq(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t intctl;
+	irqreturn_t status = IRQ_NONE;
+
+	barrier();
+	intctl = V3D_READ(V3D_INTCTL);
+
+	/* Acknowledge the interrupts we're handling here. The render
+	 * frame done interrupt will be cleared, while OUTOMEM will
+	 * stay high until the underlying cause is cleared.
+	 */
+	V3D_WRITE(V3D_INTCTL, intctl);
+
+	if (intctl & V3D_INT_OUTOMEM) {
+		/* Disable OUTOMEM until the work is done. */
+		V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
+		schedule_work(&vc4->overflow_mem_work);
+		status = IRQ_HANDLED;
+	}
+
+	if (intctl & V3D_INT_FRDONE) {
+		spin_lock(&vc4->job_lock);
+		vc4_irq_finish_job(dev);
+		spin_unlock(&vc4->job_lock);
+		status = IRQ_HANDLED;
+	}
+
+	return status;
+}
+
+void
+vc4_irq_preinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	init_waitqueue_head(&vc4->job_wait_queue);
+	INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
+
+	/* Clear any pending interrupts someone might have left around
+	 * for us.
+	 */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+}
+
+int
+vc4_irq_postinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Enable both the render done and out of memory interrupts. */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	return 0;
+}
+
+void
+vc4_irq_uninstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Disable sending interrupts for our driver's IRQs. */
+	V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
+
+	/* Clear any pending interrupts we might have left. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	cancel_work_sync(&vc4->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void vc4_irq_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+
+	/* Acknowledge any stale IRQs. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	/*
+	 * Turn all our interrupts on.  Binner out of memory is the
+	 * only one we expect to trigger at this point, since we've
+	 * just come from poweron and haven't supplied any overflow
+	 * memory yet.
+	 */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	vc4_irq_finish_job(dev);
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 2e5597d..f95f2df 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -15,6 +15,7 @@
  */
 
 #include "drm_crtc.h"
+#include "drm_atomic.h"
 #include "drm_atomic_helper.h"
 #include "drm_crtc_helper.h"
 #include "drm_plane_helper.h"
@@ -29,10 +30,152 @@
 		drm_fbdev_cma_hotplug_event(vc4->fbdev);
 }
 
+struct vc4_commit {
+	struct drm_device *dev;
+	struct drm_atomic_state *state;
+	struct vc4_seqno_cb cb;
+};
+
+static void
+vc4_atomic_complete_commit(struct vc4_commit *c)
+{
+	struct drm_atomic_state *state = c->state;
+	struct drm_device *dev = state->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	drm_atomic_helper_commit_modeset_disables(dev, state);
+
+	drm_atomic_helper_commit_planes(dev, state, false);
+
+	drm_atomic_helper_commit_modeset_enables(dev, state);
+
+	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	drm_atomic_helper_cleanup_planes(dev, state);
+
+	drm_atomic_state_free(state);
+
+	up(&vc4->async_modeset);
+
+	kfree(c);
+}
+
+static void
+vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
+{
+	struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
+
+	vc4_atomic_complete_commit(c);
+}
+
+static struct vc4_commit *commit_init(struct drm_atomic_state *state)
+{
+	struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
+
+	if (!c)
+		return NULL;
+	c->dev = state->dev;
+	c->state = state;
+
+	return c;
+}
+
+/**
+ * vc4_atomic_commit - commit validated state object
+ * @dev: DRM device
+ * @state: the driver state object
+ * @async: asynchronous commit
+ *
+ * This function commits a with drm_atomic_helper_check() pre-validated state
+ * object. This can still fail when e.g. the framebuffer reservation fails. For
+ * now this doesn't implement asynchronous commits.
+ *
+ * RETURNS
+ * Zero for success or -errno.
+ */
+static int vc4_atomic_commit(struct drm_device *dev,
+			     struct drm_atomic_state *state,
+			     bool async)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret;
+	int i;
+	uint64_t wait_seqno = 0;
+	struct vc4_commit *c;
+
+	c = commit_init(state);
+	if (!c)
+		return -ENOMEM;
+
+	/* Make sure that any outstanding modesets have finished. */
+	ret = down_interruptible(&vc4->async_modeset);
+	if (ret) {
+		kfree(c);
+		return ret;
+	}
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (ret) {
+		kfree(c);
+		up(&vc4->async_modeset);
+		return ret;
+	}
+
+	for (i = 0; i < dev->mode_config.num_total_plane; i++) {
+		struct drm_plane *plane = state->planes[i];
+		struct drm_plane_state *new_state = state->plane_states[i];
+
+		if (!plane)
+			continue;
+
+		if ((plane->state->fb != new_state->fb) && new_state->fb) {
+			struct drm_gem_cma_object *cma_bo =
+				drm_fb_cma_get_gem_obj(new_state->fb, 0);
+			struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+			wait_seqno = max(bo->seqno, wait_seqno);
+		}
+	}
+
+	/*
+	 * This is the point of no return - everything below never fails except
+	 * when the hw goes bonghits. Which means we can commit the new state on
+	 * the software side now.
+	 */
+
+	drm_atomic_helper_swap_state(dev, state);
+
+	/*
+	 * Everything below can be run asynchronously without the need to grab
+	 * any modeset locks at all under one condition: It must be guaranteed
+	 * that the asynchronous work has either been cancelled (if the driver
+	 * supports it, which at least requires that the framebuffers get
+	 * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
+	 * before the new state gets committed on the software side with
+	 * drm_atomic_helper_swap_state().
+	 *
+	 * This scheme allows new atomic state updates to be prepared and
+	 * checked in parallel to the asynchronous completion of the previous
+	 * update. Which is important since compositors need to figure out the
+	 * composition of the next frame right after having submitted the
+	 * current layout.
+	 */
+
+	if (async) {
+		vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
+				   vc4_atomic_complete_commit_seqno_cb);
+	} else {
+		vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
+		vc4_atomic_complete_commit(c);
+	}
+
+	return 0;
+}
+
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
 	.output_poll_changed = vc4_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
-	.atomic_commit = drm_atomic_helper_commit,
+	.atomic_commit = vc4_atomic_commit,
 	.fb_create = drm_fb_cma_create,
 };
 
@@ -41,6 +184,8 @@
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	int ret;
 
+	sema_init(&vc4->async_modeset, 1);
+
 	ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
@@ -51,6 +196,8 @@
 	dev->mode_config.max_height = 2048;
 	dev->mode_config.funcs = &vc4_mode_funcs;
 	dev->mode_config.preferred_depth = 24;
+	dev->mode_config.async_page_flip = true;
+
 	dev->vblank_disable_allowed = true;
 
 	drm_mode_config_reset(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 0000000..0f31cc0
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_PACKET_H
+#define VC4_PACKET_H
+
+#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
+
+enum vc4_packet {
+	VC4_PACKET_HALT = 0,
+	VC4_PACKET_NOP = 1,
+
+	VC4_PACKET_FLUSH = 4,
+	VC4_PACKET_FLUSH_ALL = 5,
+	VC4_PACKET_START_TILE_BINNING = 6,
+	VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+	VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+
+	VC4_PACKET_BRANCH = 16,
+	VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+
+	VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+	VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+	VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+	VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+	VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+	VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+
+	VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+	VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+
+	VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+	VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+
+	VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+
+	VC4_PACKET_GL_SHADER_STATE = 64,
+	VC4_PACKET_NV_SHADER_STATE = 65,
+	VC4_PACKET_VG_SHADER_STATE = 66,
+
+	VC4_PACKET_CONFIGURATION_BITS = 96,
+	VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+	VC4_PACKET_POINT_SIZE = 98,
+	VC4_PACKET_LINE_WIDTH = 99,
+	VC4_PACKET_RHT_X_BOUNDARY = 100,
+	VC4_PACKET_DEPTH_OFFSET = 101,
+	VC4_PACKET_CLIP_WINDOW = 102,
+	VC4_PACKET_VIEWPORT_OFFSET = 103,
+	VC4_PACKET_Z_CLIPPING = 104,
+	VC4_PACKET_CLIPPER_XY_SCALING = 105,
+	VC4_PACKET_CLIPPER_Z_SCALING = 106,
+
+	VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+	VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+	VC4_PACKET_CLEAR_COLORS = 114,
+	VC4_PACKET_TILE_COORDINATES = 115,
+
+	/* Not an actual hardware packet -- this is what we use to put
+	 * references to GEM bos in the command stream, since we need the u32
+	 * int the actual address packet in order to store the offset from the
+	 * start of the BO.
+	 */
+	VC4_PACKET_GEM_HANDLES = 254,
+} __attribute__ ((__packed__));
+
+#define VC4_PACKET_HALT_SIZE						1
+#define VC4_PACKET_NOP_SIZE						1
+#define VC4_PACKET_FLUSH_SIZE						1
+#define VC4_PACKET_FLUSH_ALL_SIZE					1
+#define VC4_PACKET_START_TILE_BINNING_SIZE				1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE				1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE				1
+#define VC4_PACKET_BRANCH_SIZE						5
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE				5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE				1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE			1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE				14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE				10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE				1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE			1
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE				2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE					5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE					5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE					5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE				4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE				5
+#define VC4_PACKET_POINT_SIZE_SIZE					5
+#define VC4_PACKET_LINE_WIDTH_SIZE					5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE					3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE					5
+#define VC4_PACKET_CLIP_WINDOW_SIZE					9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE					5
+#define VC4_PACKET_Z_CLIPPING_SIZE					9
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE				9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE				9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE			16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE			11
+#define VC4_PACKET_CLEAR_COLORS_SIZE					14
+#define VC4_PACKET_TILE_COORDINATES_SIZE				3
+#define VC4_PACKET_GEM_HANDLES_SIZE					9
+
+/* Number of multisamples supported. */
+#define VC4_MAX_SAMPLES							4
+/* Size of a full resolution color or Z tile buffer load/store. */
+#define VC4_TILE_BUFFER_SIZE			(64 * 64 * 4)
+
+/** @{
+ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
+*/
+#define VC4_TILING_FORMAT_LINEAR    0
+#define VC4_TILING_FORMAT_T         1
+#define VC4_TILING_FORMAT_LT        2
+/** @} */
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+#define VC4_LOADSTORE_TILE_BUFFER_EOF                  BIT(3)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS      BIT(1)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR   BIT(0)
+
+/** @} */
+
+/** @{
+ *
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR     BIT(14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR  BIT(13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP         BIT(12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK      VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT     8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888         0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER    1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565           2
+/** @} */
+
+/** @{
+ *
+ * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_MODE_MASK            VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT           6
+#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0         (0 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4     (1 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16    (2 << 6)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK      VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT     4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK      VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT     0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE             0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR            1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS               2
+#define VC4_LOADSTORE_TILE_BUFFER_Z                3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK          4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL             5
+/** @} */
+
+#define VC4_INDEX_BUFFER_U8                        (0 << 4)
+#define VC4_INDEX_BUFFER_U16                       (1 << 4)
+
+/* This flag is only present in NV shader state. */
+#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS         BIT(3)
+#define VC4_SHADER_FLAG_ENABLE_CLIPPING            BIT(2)
+#define VC4_SHADER_FLAG_VS_POINT_SIZE              BIT(1)
+#define VC4_SHADER_FLAG_FS_SINGLE_THREAD           BIT(0)
+
+/** @{ byte 2 of config bits. */
+#define VC4_CONFIG_BITS_EARLY_Z_UPDATE             BIT(1)
+#define VC4_CONFIG_BITS_EARLY_Z                    BIT(0)
+/** @} */
+
+/** @{ byte 1 of config bits. */
+#define VC4_CONFIG_BITS_Z_UPDATE                   BIT(7)
+/** same values in this 3-bit field as PIPE_FUNC_* */
+#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT           4
+#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE        BIT(3)
+
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO    (0 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD        (1 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR         (2 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO       (3 << 1)
+
+#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT       BIT(0)
+/** @} */
+
+/** @{ byte 0 of config bits. */
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X   (1 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X  (2 << 6)
+
+#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES        BIT(4)
+#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET        BIT(3)
+#define VC4_CONFIG_BITS_CW_PRIMITIVES              BIT(2)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK           BIT(1)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT          BIT(0)
+/** @} */
+
+/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+#define VC4_BIN_CONFIG_DB_NON_MS                   BIT(7)
+
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK       VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT      5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK  VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   3
+
+#define VC4_BIN_CONFIG_AUTO_INIT_TSDA              BIT(2)
+#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           BIT(1)
+#define VC4_BIN_CONFIG_MS_MODE_4X                  BIT(0)
+/** @} */
+
+/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+#define VC4_RENDER_CONFIG_DB_NON_MS                BIT(12)
+#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
+#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G      BIT(10)
+#define VC4_RENDER_CONFIG_COVERAGE_MODE            BIT(9)
+#define VC4_RENDER_CONFIG_ENABLE_VG_MASK           BIT(8)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK       VC4_MASK(7, 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT      6
+
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X         (0 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X         (1 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X        (2 << 4)
+
+#define VC4_RENDER_CONFIG_FORMAT_MASK              VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT             2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED   0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888          1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565            2
+
+#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT        BIT(1)
+#define VC4_RENDER_CONFIG_MS_MODE_4X               BIT(0)
+
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX         (1 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY            (3 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS      (0 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES       (1 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT         (3 << 0)
+
+enum vc4_texture_data_type {
+	VC4_TEXTURE_TYPE_RGBA8888 = 0,
+	VC4_TEXTURE_TYPE_RGBX8888 = 1,
+	VC4_TEXTURE_TYPE_RGBA4444 = 2,
+	VC4_TEXTURE_TYPE_RGBA5551 = 3,
+	VC4_TEXTURE_TYPE_RGB565 = 4,
+	VC4_TEXTURE_TYPE_LUMINANCE = 5,
+	VC4_TEXTURE_TYPE_ALPHA = 6,
+	VC4_TEXTURE_TYPE_LUMALPHA = 7,
+	VC4_TEXTURE_TYPE_ETC1 = 8,
+	VC4_TEXTURE_TYPE_S16F = 9,
+	VC4_TEXTURE_TYPE_S8 = 10,
+	VC4_TEXTURE_TYPE_S16 = 11,
+	VC4_TEXTURE_TYPE_BW1 = 12,
+	VC4_TEXTURE_TYPE_A4 = 13,
+	VC4_TEXTURE_TYPE_A1 = 14,
+	VC4_TEXTURE_TYPE_RGBA64 = 15,
+	VC4_TEXTURE_TYPE_RGBA32R = 16,
+	VC4_TEXTURE_TYPE_YUV422R = 17,
+};
+
+#define VC4_TEX_P0_OFFSET_MASK                     VC4_MASK(31, 12)
+#define VC4_TEX_P0_OFFSET_SHIFT                    12
+#define VC4_TEX_P0_CSWIZ_MASK                      VC4_MASK(11, 10)
+#define VC4_TEX_P0_CSWIZ_SHIFT                     10
+#define VC4_TEX_P0_CMMODE_MASK                     VC4_MASK(9, 9)
+#define VC4_TEX_P0_CMMODE_SHIFT                    9
+#define VC4_TEX_P0_FLIPY_MASK                      VC4_MASK(8, 8)
+#define VC4_TEX_P0_FLIPY_SHIFT                     8
+#define VC4_TEX_P0_TYPE_MASK                       VC4_MASK(7, 4)
+#define VC4_TEX_P0_TYPE_SHIFT                      4
+#define VC4_TEX_P0_MIPLVLS_MASK                    VC4_MASK(3, 0)
+#define VC4_TEX_P0_MIPLVLS_SHIFT                   0
+
+#define VC4_TEX_P1_TYPE4_MASK                      VC4_MASK(31, 31)
+#define VC4_TEX_P1_TYPE4_SHIFT                     31
+#define VC4_TEX_P1_HEIGHT_MASK                     VC4_MASK(30, 20)
+#define VC4_TEX_P1_HEIGHT_SHIFT                    20
+#define VC4_TEX_P1_ETCFLIP_MASK                    VC4_MASK(19, 19)
+#define VC4_TEX_P1_ETCFLIP_SHIFT                   19
+#define VC4_TEX_P1_WIDTH_MASK                      VC4_MASK(18, 8)
+#define VC4_TEX_P1_WIDTH_SHIFT                     8
+
+#define VC4_TEX_P1_MAGFILT_MASK                    VC4_MASK(7, 7)
+#define VC4_TEX_P1_MAGFILT_SHIFT                   7
+# define VC4_TEX_P1_MAGFILT_LINEAR                 0
+# define VC4_TEX_P1_MAGFILT_NEAREST                1
+
+#define VC4_TEX_P1_MINFILT_MASK                    VC4_MASK(6, 4)
+#define VC4_TEX_P1_MINFILT_SHIFT                   4
+# define VC4_TEX_P1_MINFILT_LINEAR                 0
+# define VC4_TEX_P1_MINFILT_NEAREST                1
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR          2
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN           3
+# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR           4
+# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN            5
+
+#define VC4_TEX_P1_WRAP_T_MASK                     VC4_MASK(3, 2)
+#define VC4_TEX_P1_WRAP_T_SHIFT                    2
+#define VC4_TEX_P1_WRAP_S_MASK                     VC4_MASK(1, 0)
+#define VC4_TEX_P1_WRAP_S_SHIFT                    0
+# define VC4_TEX_P1_WRAP_REPEAT                    0
+# define VC4_TEX_P1_WRAP_CLAMP                     1
+# define VC4_TEX_P1_WRAP_MIRROR                    2
+# define VC4_TEX_P1_WRAP_BORDER                    3
+
+#define VC4_TEX_P2_PTYPE_MASK                      VC4_MASK(31, 30)
+#define VC4_TEX_P2_PTYPE_SHIFT                     30
+# define VC4_TEX_P2_PTYPE_IGNORED                  0
+# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE          1
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS   2
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS      3
+
+/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
+#define VC4_TEX_P2_CMST_MASK                       VC4_MASK(29, 12)
+#define VC4_TEX_P2_CMST_SHIFT                      12
+#define VC4_TEX_P2_BSLOD_MASK                      VC4_MASK(0, 0)
+#define VC4_TEX_P2_BSLOD_SHIFT                     0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
+#define VC4_TEX_P2_CHEIGHT_MASK                    VC4_MASK(22, 12)
+#define VC4_TEX_P2_CHEIGHT_SHIFT                   12
+#define VC4_TEX_P2_CWIDTH_MASK                     VC4_MASK(10, 0)
+#define VC4_TEX_P2_CWIDTH_SHIFT                    0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
+#define VC4_TEX_P2_CYOFF_MASK                      VC4_MASK(22, 12)
+#define VC4_TEX_P2_CYOFF_SHIFT                     12
+#define VC4_TEX_P2_CXOFF_MASK                      VC4_MASK(10, 0)
+#define VC4_TEX_P2_CXOFF_SHIFT                     0
+
+#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index cdd8b10..0addbad 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -29,6 +29,14 @@
 	u32 *dlist;
 	u32 dlist_size; /* Number of dwords in allocated for the display list */
 	u32 dlist_count; /* Number of used dwords in the display list. */
+
+	/* Offset in the dlist to pointer word 0. */
+	u32 pw0_offset;
+
+	/* Offset where the plane's dlist was last stored in the
+	   hardware at vc4_crtc_atomic_flush() time.
+	*/
+	u32 *hw_dlist;
 };
 
 static inline struct vc4_plane_state *
@@ -70,7 +78,7 @@
 	return state->fb && state->crtc;
 }
 
-struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
+static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
 {
 	struct vc4_plane_state *vc4_state;
 
@@ -97,8 +105,8 @@
 	return &vc4_state->base;
 }
 
-void vc4_plane_destroy_state(struct drm_plane *plane,
-			     struct drm_plane_state *state)
+static void vc4_plane_destroy_state(struct drm_plane *plane,
+				    struct drm_plane_state *state)
 {
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
@@ -108,7 +116,7 @@
 }
 
 /* Called during init to allocate the plane's atomic state. */
-void vc4_plane_reset(struct drm_plane *plane)
+static void vc4_plane_reset(struct drm_plane *plane)
 {
 	struct vc4_plane_state *vc4_state;
 
@@ -157,6 +165,16 @@
 	int crtc_w = state->crtc_w;
 	int crtc_h = state->crtc_h;
 
+	if (state->crtc_w << 16 != state->src_w ||
+	    state->crtc_h << 16 != state->src_h) {
+		/* We don't support scaling yet, which involves
+		 * allocating the LBM memory for scaling temporary
+		 * storage, and putting filter kernels in the HVS
+		 * context.
+		 */
+		return -EINVAL;
+	}
+
 	if (crtc_x < 0) {
 		offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x;
 		crtc_w += crtc_x;
@@ -197,6 +215,8 @@
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
+	vc4_state->pw0_offset = vc4_state->dlist_count;
+
 	/* Pointer Word 0: RGB / Y Pointer */
 	vc4_dlist_write(vc4_state, bo->paddr + offset);
 
@@ -248,6 +268,8 @@
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
 	int i;
 
+	vc4_state->hw_dlist = dlist;
+
 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
 	for (i = 0; i < vc4_state->dlist_count; i++)
 		writel(vc4_state->dlist[i], &dlist[i]);
@@ -262,6 +284,34 @@
 	return vc4_state->dlist_count;
 }
 
+/* Updates the plane to immediately (well, once the FIFO needs
+ * refilling) scan out from at a new framebuffer.
+ */
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+	uint32_t addr;
+
+	/* We're skipping the address adjustment for negative origin,
+	 * because this is only called on the primary plane.
+	 */
+	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
+	addr = bo->paddr + fb->offsets[0];
+
+	/* Write the new address into the hardware immediately.  The
+	 * scanout will start from this address as soon as the FIFO
+	 * needs to refill with pixels.
+	 */
+	writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+
+	/* Also update the CPU-side dlist copy, so that any later
+	 * atomic updates that don't do a new modeset on our plane
+	 * also use our updated address.
+	 */
+	vc4_state->dlist[vc4_state->pw0_offset] = addr;
+}
+
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
 	.prepare_fb = NULL,
 	.cleanup_fb = NULL,
@@ -307,7 +357,7 @@
 	ret = drm_universal_plane_init(dev, plane, 0xff,
 				       &vc4_plane_funcs,
 				       formats, ARRAY_SIZE(formats),
-				       type);
+				       type, NULL);
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
 
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
new file mode 100644
index 0000000..d5c2f3c
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -0,0 +1,264 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_QPU_DEFINES_H
+#define VC4_QPU_DEFINES_H
+
+enum qpu_op_add {
+	QPU_A_NOP,
+	QPU_A_FADD,
+	QPU_A_FSUB,
+	QPU_A_FMIN,
+	QPU_A_FMAX,
+	QPU_A_FMINABS,
+	QPU_A_FMAXABS,
+	QPU_A_FTOI,
+	QPU_A_ITOF,
+	QPU_A_ADD = 12,
+	QPU_A_SUB,
+	QPU_A_SHR,
+	QPU_A_ASR,
+	QPU_A_ROR,
+	QPU_A_SHL,
+	QPU_A_MIN,
+	QPU_A_MAX,
+	QPU_A_AND,
+	QPU_A_OR,
+	QPU_A_XOR,
+	QPU_A_NOT,
+	QPU_A_CLZ,
+	QPU_A_V8ADDS = 30,
+	QPU_A_V8SUBS = 31,
+};
+
+enum qpu_op_mul {
+	QPU_M_NOP,
+	QPU_M_FMUL,
+	QPU_M_MUL24,
+	QPU_M_V8MULD,
+	QPU_M_V8MIN,
+	QPU_M_V8MAX,
+	QPU_M_V8ADDS,
+	QPU_M_V8SUBS,
+};
+
+enum qpu_raddr {
+	QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_R_UNIF = 32,
+	QPU_R_VARY = 35,
+	QPU_R_ELEM_QPU = 38,
+	QPU_R_NOP,
+	QPU_R_XY_PIXEL_COORD = 41,
+	QPU_R_MS_REV_FLAGS = 41,
+	QPU_R_VPM = 48,
+	QPU_R_VPM_LD_BUSY,
+	QPU_R_VPM_LD_WAIT,
+	QPU_R_MUTEX_ACQUIRE,
+};
+
+enum qpu_waddr {
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_W_ACC0 = 32, /* aka r0 */
+	QPU_W_ACC1,
+	QPU_W_ACC2,
+	QPU_W_ACC3,
+	QPU_W_TMU_NOSWAP,
+	QPU_W_ACC5,
+	QPU_W_HOST_INT,
+	QPU_W_NOP,
+	QPU_W_UNIFORMS_ADDRESS,
+	QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
+	QPU_W_MS_FLAGS = 42,
+	QPU_W_REV_FLAG = 42,
+	QPU_W_TLB_STENCIL_SETUP = 43,
+	QPU_W_TLB_Z,
+	QPU_W_TLB_COLOR_MS,
+	QPU_W_TLB_COLOR_ALL,
+	QPU_W_TLB_ALPHA_MASK,
+	QPU_W_VPM,
+	QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
+	QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
+	QPU_W_MUTEX_RELEASE,
+	QPU_W_SFU_RECIP,
+	QPU_W_SFU_RECIPSQRT,
+	QPU_W_SFU_EXP,
+	QPU_W_SFU_LOG,
+	QPU_W_TMU0_S,
+	QPU_W_TMU0_T,
+	QPU_W_TMU0_R,
+	QPU_W_TMU0_B,
+	QPU_W_TMU1_S,
+	QPU_W_TMU1_T,
+	QPU_W_TMU1_R,
+	QPU_W_TMU1_B,
+};
+
+enum qpu_sig_bits {
+	QPU_SIG_SW_BREAKPOINT,
+	QPU_SIG_NONE,
+	QPU_SIG_THREAD_SWITCH,
+	QPU_SIG_PROG_END,
+	QPU_SIG_WAIT_FOR_SCOREBOARD,
+	QPU_SIG_SCOREBOARD_UNLOCK,
+	QPU_SIG_LAST_THREAD_SWITCH,
+	QPU_SIG_COVERAGE_LOAD,
+	QPU_SIG_COLOR_LOAD,
+	QPU_SIG_COLOR_LOAD_END,
+	QPU_SIG_LOAD_TMU0,
+	QPU_SIG_LOAD_TMU1,
+	QPU_SIG_ALPHA_MASK_LOAD,
+	QPU_SIG_SMALL_IMM,
+	QPU_SIG_LOAD_IMM,
+	QPU_SIG_BRANCH
+};
+
+enum qpu_mux {
+	/* hardware mux values */
+	QPU_MUX_R0,
+	QPU_MUX_R1,
+	QPU_MUX_R2,
+	QPU_MUX_R3,
+	QPU_MUX_R4,
+	QPU_MUX_R5,
+	QPU_MUX_A,
+	QPU_MUX_B,
+
+	/* non-hardware mux values */
+	QPU_MUX_IMM,
+};
+
+enum qpu_cond {
+	QPU_COND_NEVER,
+	QPU_COND_ALWAYS,
+	QPU_COND_ZS,
+	QPU_COND_ZC,
+	QPU_COND_NS,
+	QPU_COND_NC,
+	QPU_COND_CS,
+	QPU_COND_CC,
+};
+
+enum qpu_pack_mul {
+	QPU_PACK_MUL_NOP,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_MUL_8888 = 3,
+	QPU_PACK_MUL_8A,
+	QPU_PACK_MUL_8B,
+	QPU_PACK_MUL_8C,
+	QPU_PACK_MUL_8D,
+};
+
+enum qpu_pack_a {
+	QPU_PACK_A_NOP,
+	/* convert to 16 bit float if float input, or to int16. */
+	QPU_PACK_A_16A,
+	QPU_PACK_A_16B,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_A_8888,
+	/* Convert to 8-bit unsigned int. */
+	QPU_PACK_A_8A,
+	QPU_PACK_A_8B,
+	QPU_PACK_A_8C,
+	QPU_PACK_A_8D,
+
+	/* Saturating variants of the previous instructions. */
+	QPU_PACK_A_32_SAT, /* int-only */
+	QPU_PACK_A_16A_SAT, /* int or float */
+	QPU_PACK_A_16B_SAT,
+	QPU_PACK_A_8888_SAT,
+	QPU_PACK_A_8A_SAT,
+	QPU_PACK_A_8B_SAT,
+	QPU_PACK_A_8C_SAT,
+	QPU_PACK_A_8D_SAT,
+};
+
+enum qpu_unpack_r4 {
+	QPU_UNPACK_R4_NOP,
+	QPU_UNPACK_R4_F16A_TO_F32,
+	QPU_UNPACK_R4_F16B_TO_F32,
+	QPU_UNPACK_R4_8D_REP,
+	QPU_UNPACK_R4_8A,
+	QPU_UNPACK_R4_8B,
+	QPU_UNPACK_R4_8C,
+	QPU_UNPACK_R4_8D,
+};
+
+#define QPU_MASK(high, low) \
+	((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
+
+#define QPU_GET_FIELD(word, field) \
+	((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_SIG_SHIFT                   60
+#define QPU_SIG_MASK                    QPU_MASK(63, 60)
+
+#define QPU_UNPACK_SHIFT                57
+#define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
+
+/**
+ * If set, the pack field means PACK_MUL or R4 packing, instead of normal
+ * regfile a packing.
+ */
+#define QPU_PM                          ((uint64_t)1 << 56)
+
+#define QPU_PACK_SHIFT                  52
+#define QPU_PACK_MASK                   QPU_MASK(55, 52)
+
+#define QPU_COND_ADD_SHIFT              49
+#define QPU_COND_ADD_MASK               QPU_MASK(51, 49)
+#define QPU_COND_MUL_SHIFT              46
+#define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
+
+#define QPU_SF                          ((uint64_t)1 << 45)
+
+#define QPU_WADDR_ADD_SHIFT             38
+#define QPU_WADDR_ADD_MASK              QPU_MASK(43, 38)
+#define QPU_WADDR_MUL_SHIFT             32
+#define QPU_WADDR_MUL_MASK              QPU_MASK(37, 32)
+
+#define QPU_OP_MUL_SHIFT                29
+#define QPU_OP_MUL_MASK                 QPU_MASK(31, 29)
+
+#define QPU_RADDR_A_SHIFT               18
+#define QPU_RADDR_A_MASK                QPU_MASK(23, 18)
+#define QPU_RADDR_B_SHIFT               12
+#define QPU_RADDR_B_MASK                QPU_MASK(17, 12)
+#define QPU_SMALL_IMM_SHIFT             12
+#define QPU_SMALL_IMM_MASK              QPU_MASK(17, 12)
+
+#define QPU_ADD_A_SHIFT                 9
+#define QPU_ADD_A_MASK                  QPU_MASK(11, 9)
+#define QPU_ADD_B_SHIFT                 6
+#define QPU_ADD_B_MASK                  QPU_MASK(8, 6)
+#define QPU_MUL_A_SHIFT                 3
+#define QPU_MUL_A_MASK                  QPU_MASK(5, 3)
+#define QPU_MUL_B_SHIFT                 0
+#define QPU_MUL_B_MASK                  QPU_MASK(2, 0)
+
+#define QPU_WS                          ((uint64_t)1 << 44)
+
+#define QPU_OP_ADD_SHIFT                24
+#define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
+
+#endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 9e4e904..4e52a0a 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -154,7 +154,7 @@
 #define V3D_PCTRS14  0x006f4
 #define V3D_PCTR15   0x006f8
 #define V3D_PCTRS15  0x006fc
-#define V3D_BGE      0x00f00
+#define V3D_DBGE     0x00f00
 #define V3D_FDBGO    0x00f04
 #define V3D_FDBGB    0x00f08
 #define V3D_FDBGR    0x00f0c
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 0000000..8a2a312
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace.  We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+	struct drm_gem_cma_object *color_read;
+	struct drm_gem_cma_object *color_write;
+	struct drm_gem_cma_object *zs_read;
+	struct drm_gem_cma_object *zs_write;
+	struct drm_gem_cma_object *msaa_color_write;
+	struct drm_gem_cma_object *msaa_zs_write;
+
+	struct drm_gem_cma_object *rcl;
+	u32 next_offset;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+	*(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+	*(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+	*(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 4;
+}
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+	rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+	rcl_u16(setup,
+		VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+			      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+		VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+	rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Calculates the physical address of the start of a tile in a RCL surface.
+ *
+ * Unlike the other load/store packets,
+ * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
+ * coordinates packet, and instead just store to the address given.
+ */
+static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
+				    struct drm_gem_cma_object *bo,
+				    struct drm_vc4_submit_rcl_surface *surf,
+				    uint8_t x, uint8_t y)
+{
+	return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
+		(DIV_ROUND_UP(exec->args->width, 32) * y + x);
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+				 uint32_t x, uint32_t y)
+{
+	rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+	rcl_u8(setup, x);
+	rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+		      struct vc4_rcl_setup *setup,
+		      uint8_t x, uint8_t y, bool first, bool last)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+
+	/* Note that the load doesn't actually occur until the
+	 * tile coords packet is processed, and only one load
+	 * may be outstanding at a time.
+	 */
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->color_read,
+						    &args->color_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
+		} else {
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->color_read.bits);
+			rcl_u32(setup, setup->color_read->paddr +
+				args->color_read.offset);
+		}
+	}
+
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->zs_read,
+						    &args->zs_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
+		} else {
+			if (setup->color_read) {
+				/* Exec previous load. */
+				vc4_tile_coordinates(setup, x, y);
+				vc4_store_before_load(setup);
+			}
+
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->zs_read.bits);
+			rcl_u32(setup, setup->zs_read->paddr +
+				args->zs_read.offset);
+		}
+	}
+
+	/* Clipping depends on tile coordinates having been
+	 * emitted, so we always need one here.
+	 */
+	vc4_tile_coordinates(setup, x, y);
+
+	/* Wait for the binner before jumping to the first
+	 * tile's lists.
+	 */
+	if (first && has_bin)
+		rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+	if (has_bin) {
+		rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+		rcl_u32(setup, (exec->tile_bo->paddr +
+				exec->tile_alloc_offset +
+				(y * exec->bin_tiles_x + x) * 32));
+	}
+
+	if (setup->msaa_color_write) {
+		bool last_tile_write = (!setup->msaa_zs_write &&
+					!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
+
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_color_write,
+					    &args->msaa_color_write, x, y) |
+			bits);
+	}
+
+	if (setup->msaa_zs_write) {
+		bool last_tile_write = (!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
+
+		if (setup->msaa_color_write)
+			vc4_tile_coordinates(setup, x, y);
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_zs_write,
+					    &args->msaa_zs_write, x, y) |
+			bits);
+	}
+
+	if (setup->zs_write) {
+		bool last_tile_write = !setup->color_write;
+
+		if (setup->msaa_color_write || setup->msaa_zs_write)
+			vc4_tile_coordinates(setup, x, y);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->zs_write.bits |
+			(last_tile_write ?
+			 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
+		rcl_u32(setup,
+			(setup->zs_write->paddr + args->zs_write.offset) |
+			((last && last_tile_write) ?
+			 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+	}
+
+	if (setup->color_write) {
+		if (setup->msaa_color_write || setup->msaa_zs_write ||
+		    setup->zs_write) {
+			vc4_tile_coordinates(setup, x, y);
+		}
+
+		if (last)
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+		else
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+	}
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+			     struct vc4_rcl_setup *setup)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	uint8_t min_x_tile = args->min_x_tile;
+	uint8_t min_y_tile = args->min_y_tile;
+	uint8_t max_x_tile = args->max_x_tile;
+	uint8_t max_y_tile = args->max_y_tile;
+	uint8_t xtiles = max_x_tile - min_x_tile + 1;
+	uint8_t ytiles = max_y_tile - min_y_tile + 1;
+	uint8_t x, y;
+	uint32_t size, loop_body_size;
+
+	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		size += VC4_PACKET_CLEAR_COLORS_SIZE +
+			VC4_PACKET_TILE_COORDINATES_SIZE +
+			VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	}
+
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			if (setup->color_read &&
+			    !(args->color_read.flags &
+			      VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
+				loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+				loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+			}
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+
+	if (has_bin) {
+		size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+		loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+	}
+
+	if (setup->msaa_color_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+	if (setup->msaa_zs_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+
+	if (setup->zs_write)
+		loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	if (setup->color_write)
+		loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+
+	/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
+	loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
+		((setup->msaa_color_write != NULL) +
+		 (setup->msaa_zs_write != NULL) +
+		 (setup->color_write != NULL) +
+		 (setup->zs_write != NULL) - 1);
+
+	size += xtiles * ytiles * loop_body_size;
+
+	setup->rcl = &vc4_bo_create(dev, size, true)->base;
+	if (!setup->rcl)
+		return -ENOMEM;
+	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+		      &exec->unref_list);
+
+	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+	rcl_u32(setup,
+		(setup->color_write ? (setup->color_write->paddr +
+				       args->color_write.offset) :
+		 0));
+	rcl_u16(setup, args->width);
+	rcl_u16(setup, args->height);
+	rcl_u16(setup, args->color_write.bits);
+
+	/* The tile buffer gets cleared when the previous tile is stored.  If
+	 * the clear values changed between frames, then the tile buffer has
+	 * stale clear values in it, so we have to do a store in None mode (no
+	 * writes) so that we trigger the tile buffer clear.
+	 */
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+		rcl_u32(setup, args->clear_color[0]);
+		rcl_u32(setup, args->clear_color[1]);
+		rcl_u32(setup, args->clear_z);
+		rcl_u8(setup, args->clear_s);
+
+		vc4_tile_coordinates(setup, 0, 0);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+		rcl_u32(setup, 0); /* no address, since we're in None mode */
+	}
+
+	for (y = min_y_tile; y <= max_y_tile; y++) {
+		for (x = min_x_tile; x <= max_x_tile; x++) {
+			bool first = (x == min_x_tile && y == min_y_tile);
+			bool last = (x == max_x_tile && y == max_y_tile);
+
+			emit_tile(exec, setup, x, y, first, last);
+		}
+	}
+
+	BUG_ON(setup->next_offset != size);
+	exec->ct1ca = setup->rcl->paddr;
+	exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+	return 0;
+}
+
+static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
+				     struct drm_gem_cma_object *obj,
+				     struct drm_vc4_submit_rcl_surface *surf)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
+
+	if (surf->offset > obj->base.size) {
+		DRM_ERROR("surface offset %d > BO size %zd\n",
+			  surf->offset, obj->base.size);
+		return -EINVAL;
+	}
+
+	if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
+	    render_tiles_stride * args->max_y_tile + args->max_x_tile) {
+		DRM_ERROR("MSAA tile %d, %d out of bounds "
+			  "(bo size %zd, offset %d).\n",
+			  args->max_x_tile, args->max_y_tile,
+			  obj->base.size,
+			  surf->offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
+				      struct drm_gem_cma_object **obj,
+				      struct drm_vc4_submit_rcl_surface *surf)
+{
+	if (surf->flags != 0 || surf->bits != 0) {
+		DRM_ERROR("MSAA surface had nonzero flags/bits\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("MSAA write must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	return vc4_full_res_bounds_check(exec, *obj, surf);
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+				 struct drm_gem_cma_object **obj,
+				 struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_TILING);
+	uint8_t buffer = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+	int cpp;
+	int ret;
+
+	if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		DRM_ERROR("Extra flags set\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		if (surf == &exec->args->zs_write) {
+			DRM_ERROR("general zs write may not be a full-res.\n");
+			return -EINVAL;
+		}
+
+		if (surf->bits != 0) {
+			DRM_ERROR("load/store general bits set with "
+				  "full res load/store.\n");
+			return -EINVAL;
+		}
+
+		ret = vc4_full_res_bounds_check(exec, *obj, surf);
+		if (!ret)
+			return ret;
+
+		return 0;
+	}
+
+	if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+		DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+		if (format != 0) {
+			DRM_ERROR("No color format should be set for ZS\n");
+			return -EINVAL;
+		}
+		cpp = 4;
+	} else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+		switch (format) {
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+			cpp = 2;
+			break;
+		case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+			cpp = 4;
+			break;
+		default:
+			DRM_ERROR("Bad tile buffer format\n");
+			return -EINVAL;
+		}
+	} else {
+		DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+		return -EINVAL;
+	}
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("load/store buffer must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
+				    struct vc4_rcl_setup *setup,
+				    struct drm_gem_cma_object **obj,
+				    struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_MEMORY_FORMAT);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_FORMAT);
+	int cpp;
+
+	if (surf->flags != 0) {
+		DRM_ERROR("No flags supported on render config.\n");
+		return -EINVAL;
+	}
+
+	if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_MS_MODE_4X |
+			   VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
+		DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	switch (format) {
+	case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+	case VC4_RENDER_CONFIG_FORMAT_BGR565:
+		cpp = 2;
+		break;
+	case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+		cpp = 4;
+		break;
+	default:
+		DRM_ERROR("Bad tile buffer format\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_rcl_setup setup = {0};
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	int ret;
+
+	if (args->min_x_tile > args->max_x_tile ||
+	    args->min_y_tile > args->max_y_tile) {
+		DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+			  args->min_x_tile, args->min_y_tile,
+			  args->max_x_tile, args->max_y_tile);
+		return -EINVAL;
+	}
+
+	if (has_bin &&
+	    (args->max_x_tile > exec->bin_tiles_x ||
+	     args->max_y_tile > exec->bin_tiles_y)) {
+		DRM_ERROR("Render tiles (%d,%d) outside of bin config "
+			  "(%d,%d)\n",
+			  args->max_x_tile, args->max_y_tile,
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	ret = vc4_rcl_render_config_surface_setup(exec, &setup,
+						  &setup.color_write,
+						  &args->color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
+					 &args->msaa_color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
+					 &args->msaa_zs_write);
+	if (ret)
+		return ret;
+
+	/* We shouldn't even have the job submitted to us if there's no
+	 * surface to write out.
+	 */
+	if (!setup.color_write && !setup.zs_write &&
+	    !setup.msaa_color_write && !setup.msaa_zs_write) {
+		DRM_ERROR("RCL requires color or Z/S write\n");
+		return -EINVAL;
+	}
+
+	return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 0000000..ad7b1ea
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VC4_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vc4
+#define TRACE_INCLUDE_FILE vc4_trace
+
+TRACE_EVENT(vc4_wait_for_seqno_begin,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
+	    TP_ARGS(dev, seqno, timeout),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     __field(u64, timeout)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   __entry->timeout = timeout;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu, timeout=%llu",
+		      __entry->dev, __entry->seqno, __entry->timeout)
+);
+
+TRACE_EVENT(vc4_wait_for_seqno_end,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev, __entry->seqno)
+);
+
+#endif /* _VC4_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 0000000..e6278f2
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "vc4_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "vc4_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
new file mode 100644
index 0000000..424d515
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "linux/component.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#ifdef CONFIG_DEBUG_FS
+#define REGDEF(reg) { reg, #reg }
+static const struct {
+	uint32_t reg;
+	const char *name;
+} vc4_reg_defs[] = {
+	REGDEF(V3D_IDENT0),
+	REGDEF(V3D_IDENT1),
+	REGDEF(V3D_IDENT2),
+	REGDEF(V3D_SCRATCH),
+	REGDEF(V3D_L2CACTL),
+	REGDEF(V3D_SLCACTL),
+	REGDEF(V3D_INTCTL),
+	REGDEF(V3D_INTENA),
+	REGDEF(V3D_INTDIS),
+	REGDEF(V3D_CT0CS),
+	REGDEF(V3D_CT1CS),
+	REGDEF(V3D_CT0EA),
+	REGDEF(V3D_CT1EA),
+	REGDEF(V3D_CT0CA),
+	REGDEF(V3D_CT1CA),
+	REGDEF(V3D_CT00RA0),
+	REGDEF(V3D_CT01RA0),
+	REGDEF(V3D_CT0LC),
+	REGDEF(V3D_CT1LC),
+	REGDEF(V3D_CT0PC),
+	REGDEF(V3D_CT1PC),
+	REGDEF(V3D_PCS),
+	REGDEF(V3D_BFC),
+	REGDEF(V3D_RFC),
+	REGDEF(V3D_BPCA),
+	REGDEF(V3D_BPCS),
+	REGDEF(V3D_BPOA),
+	REGDEF(V3D_BPOS),
+	REGDEF(V3D_BXCF),
+	REGDEF(V3D_SQRSV0),
+	REGDEF(V3D_SQRSV1),
+	REGDEF(V3D_SQCNTL),
+	REGDEF(V3D_SRQPC),
+	REGDEF(V3D_SRQUA),
+	REGDEF(V3D_SRQUL),
+	REGDEF(V3D_SRQCS),
+	REGDEF(V3D_VPACNTL),
+	REGDEF(V3D_VPMBASE),
+	REGDEF(V3D_PCTRC),
+	REGDEF(V3D_PCTRE),
+	REGDEF(V3D_PCTR0),
+	REGDEF(V3D_PCTRS0),
+	REGDEF(V3D_PCTR1),
+	REGDEF(V3D_PCTRS1),
+	REGDEF(V3D_PCTR2),
+	REGDEF(V3D_PCTRS2),
+	REGDEF(V3D_PCTR3),
+	REGDEF(V3D_PCTRS3),
+	REGDEF(V3D_PCTR4),
+	REGDEF(V3D_PCTRS4),
+	REGDEF(V3D_PCTR5),
+	REGDEF(V3D_PCTRS5),
+	REGDEF(V3D_PCTR6),
+	REGDEF(V3D_PCTRS6),
+	REGDEF(V3D_PCTR7),
+	REGDEF(V3D_PCTRS7),
+	REGDEF(V3D_PCTR8),
+	REGDEF(V3D_PCTRS8),
+	REGDEF(V3D_PCTR9),
+	REGDEF(V3D_PCTRS9),
+	REGDEF(V3D_PCTR10),
+	REGDEF(V3D_PCTRS10),
+	REGDEF(V3D_PCTR11),
+	REGDEF(V3D_PCTRS11),
+	REGDEF(V3D_PCTR12),
+	REGDEF(V3D_PCTRS12),
+	REGDEF(V3D_PCTR13),
+	REGDEF(V3D_PCTRS13),
+	REGDEF(V3D_PCTR14),
+	REGDEF(V3D_PCTRS14),
+	REGDEF(V3D_PCTR15),
+	REGDEF(V3D_PCTRS15),
+	REGDEF(V3D_DBGE),
+	REGDEF(V3D_FDBGO),
+	REGDEF(V3D_FDBGB),
+	REGDEF(V3D_FDBGR),
+	REGDEF(V3D_FDBGS),
+	REGDEF(V3D_ERRSTAT),
+};
+
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
+			   V3D_READ(vc4_reg_defs[i].reg));
+	}
+
+	return 0;
+}
+
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t ident1 = V3D_READ(V3D_IDENT1);
+	uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+	uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
+	uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+	seq_printf(m, "Revision:   %d\n",
+		   VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
+	seq_printf(m, "Slices:     %d\n", nslc);
+	seq_printf(m, "TMUs:       %d\n", nslc * tups);
+	seq_printf(m, "QPUs:       %d\n", nslc * qups);
+	seq_printf(m, "Semaphores: %d\n",
+		   VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ * Asks the firmware to turn on power to the V3D engine.
+ *
+ * This may be doable with just the clocks interface, though this
+ * packet does some other register setup from the firmware, too.
+ */
+int
+vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
+{
+	if (on)
+		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
+	else
+		return pm_generic_resume(&vc4->v3d->pdev->dev);
+}
+
+static void vc4_v3d_init_hw(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Take all the memory that would have been reserved for user
+	 * QPU programs, since we don't have an interface for running
+	 * them, anyway.
+	 */
+	V3D_WRITE(V3D_VPMBASE, 0);
+}
+
+static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	struct vc4_v3d *v3d = NULL;
+	int ret;
+
+	v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
+	if (!v3d)
+		return -ENOMEM;
+
+	v3d->pdev = pdev;
+
+	v3d->regs = vc4_ioremap_regs(pdev, 0);
+	if (IS_ERR(v3d->regs))
+		return PTR_ERR(v3d->regs);
+
+	vc4->v3d = v3d;
+
+	if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
+		DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
+			  V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
+		return -EINVAL;
+	}
+
+	/* Reset the binner overflow address/size at setup, to be sure
+	 * we don't reuse an old one.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	vc4_v3d_init_hw(drm);
+
+	ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
+	if (ret) {
+		DRM_ERROR("Failed to install IRQ handler\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vc4_v3d_unbind(struct device *dev, struct device *master,
+			   void *data)
+{
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+
+	drm_irq_uninstall(drm);
+
+	/* Disable the binner's overflow memory address, so the next
+	 * driver probe (if any) doesn't try to reuse our old
+	 * allocation.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	vc4->v3d = NULL;
+}
+
+static const struct component_ops vc4_v3d_ops = {
+	.bind   = vc4_v3d_bind,
+	.unbind = vc4_v3d_unbind,
+};
+
+static int vc4_v3d_dev_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &vc4_v3d_ops);
+}
+
+static int vc4_v3d_dev_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &vc4_v3d_ops);
+	return 0;
+}
+
+static const struct of_device_id vc4_v3d_dt_match[] = {
+	{ .compatible = "brcm,vc4-v3d" },
+	{}
+};
+
+struct platform_driver vc4_v3d_driver = {
+	.probe = vc4_v3d_dev_probe,
+	.remove = vc4_v3d_dev_remove,
+	.driver = {
+		.name = "vc4_v3d",
+		.of_match_table = vc4_v3d_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 0000000..0fb5b99
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Command list validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory.  So, a user with
+ * access to execute command lists could escalate privilege by
+ * overwriting system memory (drawing to it as a framebuffer) or
+ * reading system memory it shouldn't (reading it as a texture, or
+ * uniform data, or vertex data).
+ *
+ * This validates command lists to ensure that all accesses are within
+ * the bounds of the GEM objects referenced.  It explicitly whitelists
+ * packets, and looks at the offsets in any address fields to make
+ * sure they're constrained within the BOs they reference.
+ *
+ * Note that because of the validation that's happening anyway, this
+ * is where GEM relocation processing happens.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+#define VALIDATE_ARGS \
+	struct vc4_exec_info *exec,			\
+	void *validated,				\
+	void *untrusted
+
+/** Return the width in pixels of a 64-byte microtile. */
+static uint32_t
+utile_width(int cpp)
+{
+	switch (cpp) {
+	case 1:
+	case 2:
+		return 8;
+	case 4:
+		return 4;
+	case 8:
+		return 2;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+static uint32_t
+utile_height(int cpp)
+{
+	switch (cpp) {
+	case 1:
+		return 8;
+	case 2:
+	case 4:
+	case 8:
+		return 4;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/**
+ * The texture unit decides what tiling format a particular miplevel is using
+ * this function, so we lay out our miptrees accordingly.
+ */
+static bool
+size_is_lt(uint32_t width, uint32_t height, int cpp)
+{
+	return (width <= 4 * utile_width(cpp) ||
+		height <= 4 * utile_height(cpp));
+}
+
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
+{
+	struct drm_gem_cma_object *obj;
+	struct vc4_bo *bo;
+
+	if (hindex >= exec->bo_count) {
+		DRM_ERROR("BO index %d greater than BO count %d\n",
+			  hindex, exec->bo_count);
+		return NULL;
+	}
+	obj = exec->bo[hindex];
+	bo = to_vc4_bo(&obj->base);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Trying to use shader BO as something other than "
+			  "a shader\n");
+		return NULL;
+	}
+
+	return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
+}
+
+static bool
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
+{
+	/* Note that the untrusted pointer passed to these functions is
+	 * incremented past the packet byte.
+	 */
+	return (untrusted - 1 == exec->bin_u + pos);
+}
+
+static uint32_t
+gl_shader_rec_size(uint32_t pointer_bits)
+{
+	uint32_t attribute_count = pointer_bits & 7;
+	bool extended = pointer_bits & 8;
+
+	if (attribute_count == 0)
+		attribute_count = 8;
+
+	if (extended)
+		return 100 + attribute_count * 4;
+	else
+		return 36 + attribute_count * 8;
+}
+
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+		   uint32_t offset, uint8_t tiling_format,
+		   uint32_t width, uint32_t height, uint8_t cpp)
+{
+	uint32_t aligned_width, aligned_height, stride, size;
+	uint32_t utile_w = utile_width(cpp);
+	uint32_t utile_h = utile_height(cpp);
+
+	/* The shaded vertex format stores signed 12.4 fixed point
+	 * (-2048,2047) offsets from the viewport center, so we should
+	 * never have a render target larger than 4096.  The texture
+	 * unit can only sample from 2048x2048, so it's even more
+	 * restricted.  This lets us avoid worrying about overflow in
+	 * our math.
+	 */
+	if (width > 4096 || height > 4096) {
+		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+		return false;
+	}
+
+	switch (tiling_format) {
+	case VC4_TILING_FORMAT_LINEAR:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = height;
+		break;
+	case VC4_TILING_FORMAT_T:
+		aligned_width = round_up(width, utile_w * 8);
+		aligned_height = round_up(height, utile_h * 8);
+		break;
+	case VC4_TILING_FORMAT_LT:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = round_up(height, utile_h);
+		break;
+	default:
+		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
+		return false;
+	}
+
+	stride = aligned_width * cpp;
+	size = stride * aligned_height;
+
+	if (size + offset < size ||
+	    size + offset > fbo->base.size) {
+		DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+			  width, height,
+			  aligned_width, aligned_height,
+			  size, offset, fbo->base.size);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+validate_flush(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+	exec->found_flush = true;
+
+	return 0;
+}
+
+static int
+validate_start_tile_binning(VALIDATE_ARGS)
+{
+	if (exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+	exec->found_start_tile_binning_packet = true;
+
+	if (!exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+		DRM_ERROR("Bin CL must end with "
+			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+	exec->found_increment_semaphore_packet = true;
+
+	return 0;
+}
+
+static int
+validate_indexed_prim_list(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *ib;
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t offset = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index = *(uint32_t *)(untrusted + 9);
+	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	ib = vc4_use_handle(exec, 0);
+	if (!ib)
+		return -EINVAL;
+
+	if (offset > ib->base.size ||
+	    (ib->base.size - offset) / index_size < length) {
+		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+			  offset, length, index_size, ib->base.size);
+		return -EINVAL;
+	}
+
+	*(uint32_t *)(validated + 5) = ib->paddr + offset;
+
+	return 0;
+}
+
+static int
+validate_gl_array_primitive(VALIDATE_ARGS)
+{
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t base_index = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (length + base_index < length) {
+		DRM_ERROR("primitive vertex count overflow\n");
+		return -EINVAL;
+	}
+	max_index = length + base_index - 1;
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	return 0;
+}
+
+static int
+validate_gl_shader_state(VALIDATE_ARGS)
+{
+	uint32_t i = exec->shader_state_count++;
+
+	if (i >= exec->shader_state_size) {
+		DRM_ERROR("More requests for shader states than declared\n");
+		return -EINVAL;
+	}
+
+	exec->shader_state[i].addr = *(uint32_t *)untrusted;
+	exec->shader_state[i].max_index = 0;
+
+	if (exec->shader_state[i].addr & ~0xf) {
+		DRM_ERROR("high bits set in GL shader rec reference\n");
+		return -EINVAL;
+	}
+
+	*(uint32_t *)validated = (exec->shader_rec_p +
+				  exec->shader_state[i].addr);
+
+	exec->shader_rec_p +=
+		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
+
+	return 0;
+}
+
+static int
+validate_tile_binning_config(VALIDATE_ARGS)
+{
+	struct drm_device *dev = exec->exec_bo->base.dev;
+	struct vc4_bo *tile_bo;
+	uint8_t flags;
+	uint32_t tile_state_size, tile_alloc_size;
+	uint32_t tile_count;
+
+	if (exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+	exec->found_tile_binning_mode_config_packet = true;
+
+	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
+	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
+	flags = *(uint8_t *)(untrusted + 14);
+
+	if (exec->bin_tiles_x == 0 ||
+	    exec->bin_tiles_y == 0) {
+		DRM_ERROR("Tile binning config of %dx%d too small\n",
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
+		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
+		DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
+		return -EINVAL;
+	}
+
+	/* The tile state data array is 48 bytes per tile, and we put it at
+	 * the start of a BO containing both it and the tile alloc.
+	 */
+	tile_state_size = 48 * tile_count;
+
+	/* Since the tile alloc array will follow us, align. */
+	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+
+	*(uint8_t *)(validated + 14) =
+		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+	/* Initial block size. */
+	tile_alloc_size = 32 * tile_count;
+
+	/*
+	 * The initial allocation gets rounded to the next 256 bytes before
+	 * the hardware starts fulfilling further allocations.
+	 */
+	tile_alloc_size = roundup(tile_alloc_size, 256);
+
+	/* Add space for the extra allocations.  This is what gets used first,
+	 * before overflow memory.  It must have at least 4096 bytes, but we
+	 * want to avoid overflow memory usage if possible.
+	 */
+	tile_alloc_size += 1024 * 1024;
+
+	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
+				true);
+	exec->tile_bo = &tile_bo->base;
+	if (!exec->tile_bo)
+		return -ENOMEM;
+	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
+
+	/* tile alloc address. */
+	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+					exec->tile_alloc_offset);
+	/* tile alloc size. */
+	*(uint32_t *)(validated + 4) = tile_alloc_size;
+	/* tile state address. */
+	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
+
+	return 0;
+}
+
+static int
+validate_gem_handles(VALIDATE_ARGS)
+{
+	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
+	return 0;
+}
+
+#define VC4_DEFINE_PACKET(packet, func) \
+	[packet] = { packet ## _SIZE, #packet, func }
+
+static const struct cmd_info {
+	uint16_t len;
+	const char *name;
+	int (*func)(struct vc4_exec_info *exec, void *validated,
+		    void *untrusted);
+} cmd_info[] = {
+	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
+			  validate_start_tile_binning),
+	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
+			  validate_increment_semaphore),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
+			  validate_indexed_prim_list),
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
+			  validate_gl_array_primitive),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
+	/* Note: The docs say this was also 105, but it was 106 in the
+	 * initial userland code drop.
+	 */
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
+			  validate_tile_binning_config),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
+};
+
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec)
+{
+	uint32_t len = exec->args->bin_cl_size;
+	uint32_t dst_offset = 0;
+	uint32_t src_offset = 0;
+
+	while (src_offset < len) {
+		void *dst_pkt = validated + dst_offset;
+		void *src_pkt = unvalidated + src_offset;
+		u8 cmd = *(uint8_t *)src_pkt;
+		const struct cmd_info *info;
+
+		if (cmd >= ARRAY_SIZE(cmd_info)) {
+			DRM_ERROR("0x%08x: packet %d out of bounds\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		info = &cmd_info[cmd];
+		if (!info->name) {
+			DRM_ERROR("0x%08x: packet %d invalid\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		if (src_offset + info->len > len) {
+			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+				  "exceeds bounds (0x%08x)\n",
+				  src_offset, cmd, info->name, info->len,
+				  src_offset + len);
+			return -EINVAL;
+		}
+
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			memcpy(dst_pkt, src_pkt, info->len);
+
+		if (info->func && info->func(exec,
+					     dst_pkt + 1,
+					     src_pkt + 1)) {
+			DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
+				  src_offset, cmd, info->name);
+			return -EINVAL;
+		}
+
+		src_offset += info->len;
+		/* GEM handle loading doesn't produce HW packets. */
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			dst_offset += info->len;
+
+		/* When the CL hits halt, it'll stop reading anything else. */
+		if (cmd == VC4_PACKET_HALT)
+			break;
+	}
+
+	exec->ct0ea = exec->ct0ca + dst_offset;
+
+	if (!exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+
+	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
+	 * semaphore is used to trigger the render CL to start up, and the
+	 * FLUSH is what caps the bin lists with
+	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+	 * render CL when they get called to) and actually triggers the queued
+	 * semaphore increment.
+	 */
+	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+			  "VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool
+reloc_tex(struct vc4_exec_info *exec,
+	  void *uniform_data_u,
+	  struct vc4_texture_sample_info *sample,
+	  uint32_t texture_handle_index)
+
+{
+	struct drm_gem_cma_object *tex;
+	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
+	uint32_t p2 = (sample->p_offset[2] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
+	uint32_t p3 = (sample->p_offset[3] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
+	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
+	uint32_t cpp, tiling_format, utile_w, utile_h;
+	uint32_t i;
+	uint32_t cube_map_stride = 0;
+	enum vc4_texture_data_type type;
+
+	tex = vc4_use_bo(exec, texture_handle_index);
+	if (!tex)
+		return false;
+
+	if (sample->is_direct) {
+		uint32_t remaining_size = tex->base.size - p0;
+
+		if (p0 > tex->base.size - 4) {
+			DRM_ERROR("UBO offset greater than UBO size\n");
+			goto fail;
+		}
+		if (p1 > remaining_size - 4) {
+			DRM_ERROR("UBO clamp would allow reads "
+				  "outside of UBO\n");
+			goto fail;
+		}
+		*validated_p0 = tex->paddr + p0;
+		return true;
+	}
+
+	if (width == 0)
+		width = 2048;
+	if (height == 0)
+		height = 2048;
+
+	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
+			if (cube_map_stride) {
+				DRM_ERROR("Cube map stride set twice\n");
+				goto fail;
+			}
+
+			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
+		}
+		if (!cube_map_stride) {
+			DRM_ERROR("Cube map stride not set\n");
+			goto fail;
+		}
+	}
+
+	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
+
+	switch (type) {
+	case VC4_TEXTURE_TYPE_RGBA8888:
+	case VC4_TEXTURE_TYPE_RGBX8888:
+	case VC4_TEXTURE_TYPE_RGBA32R:
+		cpp = 4;
+		break;
+	case VC4_TEXTURE_TYPE_RGBA4444:
+	case VC4_TEXTURE_TYPE_RGBA5551:
+	case VC4_TEXTURE_TYPE_RGB565:
+	case VC4_TEXTURE_TYPE_LUMALPHA:
+	case VC4_TEXTURE_TYPE_S16F:
+	case VC4_TEXTURE_TYPE_S16:
+		cpp = 2;
+		break;
+	case VC4_TEXTURE_TYPE_LUMINANCE:
+	case VC4_TEXTURE_TYPE_ALPHA:
+	case VC4_TEXTURE_TYPE_S8:
+		cpp = 1;
+		break;
+	case VC4_TEXTURE_TYPE_ETC1:
+	case VC4_TEXTURE_TYPE_BW1:
+	case VC4_TEXTURE_TYPE_A4:
+	case VC4_TEXTURE_TYPE_A1:
+	case VC4_TEXTURE_TYPE_RGBA64:
+	case VC4_TEXTURE_TYPE_YUV422R:
+	default:
+		DRM_ERROR("Texture format %d unsupported\n", type);
+		goto fail;
+	}
+	utile_w = utile_width(cpp);
+	utile_h = utile_height(cpp);
+
+	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
+		tiling_format = VC4_TILING_FORMAT_LINEAR;
+	} else {
+		if (size_is_lt(width, height, cpp))
+			tiling_format = VC4_TILING_FORMAT_LT;
+		else
+			tiling_format = VC4_TILING_FORMAT_T;
+	}
+
+	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+				tiling_format, width, height, cpp)) {
+		goto fail;
+	}
+
+	/* The mipmap levels are stored before the base of the texture.  Make
+	 * sure there is actually space in the BO.
+	 */
+	for (i = 1; i <= miplevels; i++) {
+		uint32_t level_width = max(width >> i, 1u);
+		uint32_t level_height = max(height >> i, 1u);
+		uint32_t aligned_width, aligned_height;
+		uint32_t level_size;
+
+		/* Once the levels get small enough, they drop from T to LT. */
+		if (tiling_format == VC4_TILING_FORMAT_T &&
+		    size_is_lt(level_width, level_height, cpp)) {
+			tiling_format = VC4_TILING_FORMAT_LT;
+		}
+
+		switch (tiling_format) {
+		case VC4_TILING_FORMAT_T:
+			aligned_width = round_up(level_width, utile_w * 8);
+			aligned_height = round_up(level_height, utile_h * 8);
+			break;
+		case VC4_TILING_FORMAT_LT:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = round_up(level_height, utile_h);
+			break;
+		default:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = level_height;
+			break;
+		}
+
+		level_size = aligned_width * cpp * aligned_height;
+
+		if (offset < level_size) {
+			DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
+				  "overflowed buffer bounds (offset %d)\n",
+				  i, level_width, level_height,
+				  aligned_width, aligned_height,
+				  level_size, offset);
+			goto fail;
+		}
+
+		offset -= level_size;
+	}
+
+	*validated_p0 = tex->paddr + p0;
+
+	return true;
+ fail:
+	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+	return false;
+}
+
+static int
+validate_gl_shader_rec(struct drm_device *dev,
+		       struct vc4_exec_info *exec,
+		       struct vc4_shader_state *state)
+{
+	uint32_t *src_handles;
+	void *pkt_u, *pkt_v;
+	static const uint32_t shader_reloc_offsets[] = {
+		4, /* fs */
+		16, /* vs */
+		28, /* cs */
+	};
+	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+	struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+	uint32_t nr_attributes, nr_relocs, packet_size;
+	int i;
+
+	nr_attributes = state->addr & 0x7;
+	if (nr_attributes == 0)
+		nr_attributes = 8;
+	packet_size = gl_shader_rec_size(state->addr);
+
+	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
+	if (nr_relocs * 4 > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs reading %d handles "
+			  "from %d bytes left\n",
+			  nr_relocs, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	src_handles = exec->shader_rec_u;
+	exec->shader_rec_u += nr_relocs * 4;
+	exec->shader_rec_size -= nr_relocs * 4;
+
+	if (packet_size > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs copying %db packet "
+			  "from %d bytes left\n",
+			  packet_size, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	pkt_u = exec->shader_rec_u;
+	pkt_v = exec->shader_rec_v;
+	memcpy(pkt_v, pkt_u, packet_size);
+	exec->shader_rec_u += packet_size;
+	/* Shader recs have to be aligned to 16 bytes (due to the attribute
+	 * flags being in the low bytes), so round the next validated shader
+	 * rec address up.  This should be safe, since we've got so many
+	 * relocations in a shader rec packet.
+	 */
+	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
+	exec->shader_rec_v += roundup(packet_size, 16);
+	exec->shader_rec_size -= packet_size;
+
+	if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+		DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		if (src_handles[i] > exec->bo_count) {
+			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+			return -EINVAL;
+		}
+
+		bo[i] = exec->bo[src_handles[i]];
+		if (!bo[i])
+			return -EINVAL;
+	}
+	for (i = shader_reloc_count; i < nr_relocs; i++) {
+		bo[i] = vc4_use_bo(exec, src_handles[i]);
+		if (!bo[i])
+			return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		struct vc4_validated_shader_info *validated_shader;
+		uint32_t o = shader_reloc_offsets[i];
+		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
+		uint32_t *texture_handles_u;
+		void *uniform_data_u;
+		uint32_t tex;
+
+		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
+
+		if (src_offset != 0) {
+			DRM_ERROR("Shaders must be at offset 0 of "
+				  "the BO.\n");
+			return -EINVAL;
+		}
+
+		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+		if (!validated_shader)
+			return -EINVAL;
+
+		if (validated_shader->uniforms_src_size >
+		    exec->uniforms_size) {
+			DRM_ERROR("Uniforms src buffer overflow\n");
+			return -EINVAL;
+		}
+
+		texture_handles_u = exec->uniforms_u;
+		uniform_data_u = (texture_handles_u +
+				  validated_shader->num_texture_samples);
+
+		memcpy(exec->uniforms_v, uniform_data_u,
+		       validated_shader->uniforms_size);
+
+		for (tex = 0;
+		     tex < validated_shader->num_texture_samples;
+		     tex++) {
+			if (!reloc_tex(exec,
+				       uniform_data_u,
+				       &validated_shader->texture_samples[tex],
+				       texture_handles_u[tex])) {
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+		exec->uniforms_u += validated_shader->uniforms_src_size;
+		exec->uniforms_v += validated_shader->uniforms_size;
+		exec->uniforms_p += validated_shader->uniforms_size;
+	}
+
+	for (i = 0; i < nr_attributes; i++) {
+		struct drm_gem_cma_object *vbo =
+			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
+		uint32_t o = 36 + i * 8;
+		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
+		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
+		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
+		uint32_t max_index;
+
+		if (state->addr & 0x8)
+			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
+
+		if (vbo->base.size < offset ||
+		    vbo->base.size - offset < attr_size) {
+			DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
+				  offset, attr_size, vbo->base.size);
+			return -EINVAL;
+		}
+
+		if (stride != 0) {
+			max_index = ((vbo->base.size - offset - attr_size) /
+				     stride);
+			if (state->max_index > max_index) {
+				DRM_ERROR("primitives use index %d out of "
+					  "supplied %d\n",
+					  state->max_index, max_index);
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
+	}
+
+	return 0;
+}
+
+int
+vc4_validate_shader_recs(struct drm_device *dev,
+			 struct vc4_exec_info *exec)
+{
+	uint32_t i;
+	int ret = 0;
+
+	for (i = 0; i < exec->shader_state_count; i++) {
+		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
new file mode 100644
index 0000000..f67124b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory, so a user with
+ * access to execute shaders could escalate privilege by overwriting
+ * system memory (using the VPM write address register in the
+ * general-purpose DMA mode) or reading system memory it shouldn't
+ * (reading it as a texture, or uniform data, or vertex data).
+ *
+ * This walks over a shader BO, ensuring that its accesses are
+ * appropriately bounded, and recording how many texture accesses are
+ * made and where so that we can do relocations for them in the
+ * uniform stream.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_qpu_defines.h"
+
+struct vc4_shader_validation_state {
+	struct vc4_texture_sample_info tmu_setup[2];
+	int tmu_write_count[2];
+
+	/* For registers that were last written to by a MIN instruction with
+	 * one argument being a uniform, the address of the uniform.
+	 * Otherwise, ~0.
+	 *
+	 * This is used for the validation of direct address memory reads.
+	 */
+	uint32_t live_min_clamp_offsets[32 + 32 + 4];
+	bool live_max_clamp_regs[32 + 32 + 4];
+};
+
+static uint32_t
+waddr_to_live_reg_index(uint32_t waddr, bool is_b)
+{
+	if (waddr < 32) {
+		if (is_b)
+			return 32 + waddr;
+		else
+			return waddr;
+	} else if (waddr <= QPU_W_ACC3) {
+		return 64 + waddr - QPU_W_ACC0;
+	} else {
+		return ~0;
+	}
+}
+
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+	if (add_a == QPU_MUX_A)
+		return raddr_a;
+	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
+		return 32 + raddr_b;
+	else if (add_a <= QPU_MUX_R3)
+		return 64 + add_a;
+	else
+		return ~0;
+}
+
+static bool
+is_tmu_submit(uint32_t waddr)
+{
+	return (waddr == QPU_W_TMU0_S ||
+		waddr == QPU_W_TMU1_S);
+}
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+	return (waddr >= QPU_W_TMU0_S &&
+		waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+record_texture_sample(struct vc4_validated_shader_info *validated_shader,
+		      struct vc4_shader_validation_state *validation_state,
+		      int tmu)
+{
+	uint32_t s = validated_shader->num_texture_samples;
+	int i;
+	struct vc4_texture_sample_info *temp_samples;
+
+	temp_samples = krealloc(validated_shader->texture_samples,
+				(s + 1) * sizeof(*temp_samples),
+				GFP_KERNEL);
+	if (!temp_samples)
+		return false;
+
+	memcpy(&temp_samples[s],
+	       &validation_state->tmu_setup[tmu],
+	       sizeof(*temp_samples));
+
+	validated_shader->num_texture_samples = s + 1;
+	validated_shader->texture_samples = temp_samples;
+
+	for (i = 0; i < 4; i++)
+		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
+
+	return true;
+}
+
+static bool
+check_tmu_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	int tmu = waddr > QPU_W_TMU0_B;
+	bool submit = is_tmu_submit(waddr);
+	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (is_direct) {
+		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+		uint32_t clamp_reg, clamp_offset;
+
+		if (sig == QPU_SIG_SMALL_IMM) {
+			DRM_ERROR("direct TMU read used small immediate\n");
+			return false;
+		}
+
+		/* Make sure that this texture load is an add of the base
+		 * address of the UBO to a clamped offset within the UBO.
+		 */
+		if (is_mul ||
+		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+			DRM_ERROR("direct TMU load wasn't an add\n");
+			return false;
+		}
+
+		/* We assert that the the clamped address is the first
+		 * argument, and the UBO base address is the second argument.
+		 * This is arbitrary, but simpler than supporting flipping the
+		 * two either way.
+		 */
+		clamp_reg = raddr_add_a_to_live_reg_index(inst);
+		if (clamp_reg == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
+		if (clamp_offset == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		/* Store the clamp value's offset in p1 (see reloc_tex() in
+		 * vc4_validate.c).
+		 */
+		validation_state->tmu_setup[tmu].p_offset[1] =
+			clamp_offset;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("direct TMU load didn't add to a uniform\n");
+			return false;
+		}
+
+		validation_state->tmu_setup[tmu].is_direct = true;
+	} else {
+		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
+					      raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("uniform read in the same instruction as "
+				  "texture setup.\n");
+			return false;
+		}
+	}
+
+	if (validation_state->tmu_write_count[tmu] >= 4) {
+		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+			  tmu);
+		return false;
+	}
+	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+		validated_shader->uniforms_size;
+	validation_state->tmu_write_count[tmu]++;
+	/* Since direct uses a RADDR uniform reference, it will get counted in
+	 * check_instruction_reads()
+	 */
+	if (!is_direct)
+		validated_shader->uniforms_size += 4;
+
+	if (submit) {
+		if (!record_texture_sample(validated_shader,
+					   validation_state, tmu)) {
+			return false;
+		}
+
+		validation_state->tmu_write_count[tmu] = 0;
+	}
+
+	return true;
+}
+
+static bool
+check_reg_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+
+	switch (waddr) {
+	case QPU_W_UNIFORMS_ADDRESS:
+		/* XXX: We'll probably need to support this for reladdr, but
+		 * it's definitely a security-related one.
+		 */
+		DRM_ERROR("uniforms address load unsupported\n");
+		return false;
+
+	case QPU_W_TLB_COLOR_MS:
+	case QPU_W_TLB_COLOR_ALL:
+	case QPU_W_TLB_Z:
+		/* These only interact with the tile buffer, not main memory,
+		 * so they're safe.
+		 */
+		return true;
+
+	case QPU_W_TMU0_S:
+	case QPU_W_TMU0_T:
+	case QPU_W_TMU0_R:
+	case QPU_W_TMU0_B:
+	case QPU_W_TMU1_S:
+	case QPU_W_TMU1_T:
+	case QPU_W_TMU1_R:
+	case QPU_W_TMU1_B:
+		return check_tmu_write(inst, validated_shader, validation_state,
+				       is_mul);
+
+	case QPU_W_HOST_INT:
+	case QPU_W_TMU_NOSWAP:
+	case QPU_W_TLB_ALPHA_MASK:
+	case QPU_W_MUTEX_RELEASE:
+		/* XXX: I haven't thought about these, so don't support them
+		 * for now.
+		 */
+		DRM_ERROR("Unsupported waddr %d\n", waddr);
+		return false;
+
+	case QPU_W_VPM_ADDR:
+		DRM_ERROR("General VPM DMA unsupported\n");
+		return false;
+
+	case QPU_W_VPM:
+	case QPU_W_VPMVCD_SETUP:
+		/* We allow VPM setup in general, even including VPM DMA
+		 * configuration setup, because the (unsafe) DMA can only be
+		 * triggered by QPU_W_VPM_ADDR writes.
+		 */
+		return true;
+
+	case QPU_W_TLB_STENCIL_SETUP:
+		return true;
+	}
+
+	return true;
+}
+
+static void
+track_live_clamps(uint64_t inst,
+		  struct vc4_validated_shader_info *validated_shader,
+		  struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	bool ws = inst & QPU_WS;
+	uint32_t lri_add_a, lri_add, lri_mul;
+	bool add_a_is_min_0;
+
+	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+	 * before we clear previous live state.
+	 */
+	lri_add_a = raddr_add_a_to_live_reg_index(inst);
+	add_a_is_min_0 = (lri_add_a != ~0 &&
+			  validation_state->live_max_clamp_regs[lri_add_a]);
+
+	/* Clear live state for registers written by our instruction. */
+	lri_add = waddr_to_live_reg_index(waddr_add, ws);
+	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+	if (lri_mul != ~0) {
+		validation_state->live_max_clamp_regs[lri_mul] = false;
+		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+	}
+	if (lri_add != ~0) {
+		validation_state->live_max_clamp_regs[lri_add] = false;
+		validation_state->live_min_clamp_offsets[lri_add] = ~0;
+	} else {
+		/* Nothing further to do for live tracking, since only ADDs
+		 * generate new live clamp registers.
+		 */
+		return;
+	}
+
+	/* Now, handle remaining live clamp tracking for the ADD operation. */
+
+	if (cond_add != QPU_COND_ALWAYS)
+		return;
+
+	if (op_add == QPU_A_MAX) {
+		/* Track live clamps of a value to a minimum of 0 (in either
+		 * arg).
+		 */
+		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+			return;
+		}
+
+		validation_state->live_max_clamp_regs[lri_add] = true;
+	} else if (op_add == QPU_A_MIN) {
+		/* Track live clamps of a value clamped to a minimum of 0 and
+		 * a maximum of some uniform's offset.
+		 */
+		if (!add_a_is_min_0)
+			return;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+		      sig != QPU_SIG_SMALL_IMM)) {
+			return;
+		}
+
+		validation_state->live_min_clamp_offsets[lri_add] =
+			validated_shader->uniforms_size;
+	}
+}
+
+static bool
+check_instruction_writes(uint64_t inst,
+			 struct vc4_validated_shader_info *validated_shader,
+			 struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	bool ok;
+
+	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+		DRM_ERROR("ADD and MUL both set up textures\n");
+		return false;
+	}
+
+	ok = (check_reg_write(inst, validated_shader, validation_state,
+			      false) &&
+	      check_reg_write(inst, validated_shader, validation_state,
+			      true));
+
+	track_live_clamps(inst, validated_shader, validation_state);
+
+	return ok;
+}
+
+static bool
+check_instruction_reads(uint64_t inst,
+			struct vc4_validated_shader_info *validated_shader)
+{
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (raddr_a == QPU_R_UNIF ||
+	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
+		/* This can't overflow the uint32_t, because we're reading 8
+		 * bytes of instruction to increment by 4 here, so we'd
+		 * already be OOM.
+		 */
+		validated_shader->uniforms_size += 4;
+	}
+
+	return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
+{
+	bool found_shader_end = false;
+	int shader_end_ip = 0;
+	uint32_t ip, max_ip;
+	uint64_t *shader;
+	struct vc4_validated_shader_info *validated_shader;
+	struct vc4_shader_validation_state validation_state;
+	int i;
+
+	memset(&validation_state, 0, sizeof(validation_state));
+
+	for (i = 0; i < 8; i++)
+		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
+	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+		validation_state.live_min_clamp_offsets[i] = ~0;
+
+	shader = shader_obj->vaddr;
+	max_ip = shader_obj->base.size / sizeof(uint64_t);
+
+	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
+	if (!validated_shader)
+		return NULL;
+
+	for (ip = 0; ip < max_ip; ip++) {
+		uint64_t inst = shader[ip];
+		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+		switch (sig) {
+		case QPU_SIG_NONE:
+		case QPU_SIG_WAIT_FOR_SCOREBOARD:
+		case QPU_SIG_SCOREBOARD_UNLOCK:
+		case QPU_SIG_COLOR_LOAD:
+		case QPU_SIG_LOAD_TMU0:
+		case QPU_SIG_LOAD_TMU1:
+		case QPU_SIG_PROG_END:
+		case QPU_SIG_SMALL_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad write at ip %d\n", ip);
+				goto fail;
+			}
+
+			if (!check_instruction_reads(inst, validated_shader))
+				goto fail;
+
+			if (sig == QPU_SIG_PROG_END) {
+				found_shader_end = true;
+				shader_end_ip = ip;
+			}
+
+			break;
+
+		case QPU_SIG_LOAD_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+				goto fail;
+			}
+			break;
+
+		default:
+			DRM_ERROR("Unsupported QPU signal %d at "
+				  "instruction %d\n", sig, ip);
+			goto fail;
+		}
+
+		/* There are two delay slots after program end is signaled
+		 * that are still executed, then we're finished.
+		 */
+		if (found_shader_end && ip == shader_end_ip + 2)
+			break;
+	}
+
+	if (ip == max_ip) {
+		DRM_ERROR("shader failed to terminate before "
+			  "shader BO end at %zd\n",
+			  shader_obj->base.size);
+		goto fail;
+	}
+
+	/* Again, no chance of integer overflow here because the worst case
+	 * scenario is 8 bytes of uniforms plus handles per 8-byte
+	 * instruction.
+	 */
+	validated_shader->uniforms_src_size =
+		(validated_shader->uniforms_size +
+		 4 * validated_shader->num_texture_samples);
+
+	return validated_shader;
+
+fail:
+	if (validated_shader) {
+		kfree(validated_shader->texture_samples);
+		kfree(validated_shader);
+	}
+	return NULL;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index f545913..a165f03 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -215,7 +215,7 @@
 int
 virtio_gpu_framebuffer_init(struct drm_device *dev,
 			    struct virtio_gpu_framebuffer *vgfb,
-			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    const struct drm_mode_fb_cmd2 *mode_cmd,
 			    struct drm_gem_object *obj)
 {
 	int ret;
@@ -374,16 +374,6 @@
 	.best_encoder = virtio_gpu_best_encoder,
 };
 
-static void virtio_gpu_conn_save(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
-static void virtio_gpu_conn_restore(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
 static enum drm_connector_status virtio_gpu_conn_detect(
 			struct drm_connector *connector,
 			bool force)
@@ -409,8 +399,6 @@
 
 static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.save = virtio_gpu_conn_save,
-	.restore = virtio_gpu_conn_restore,
 	.detect = virtio_gpu_conn_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = virtio_gpu_conn_destroy,
@@ -443,7 +431,7 @@
 	if (IS_ERR(plane))
 		return PTR_ERR(plane);
 	drm_crtc_init_with_planes(dev, crtc, plane, NULL,
-				  &virtio_gpu_crtc_funcs);
+				  &virtio_gpu_crtc_funcs, NULL);
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 	drm_crtc_helper_add(crtc, &virtio_gpu_crtc_helper_funcs);
 	plane->crtc = crtc;
@@ -453,7 +441,7 @@
 	drm_connector_helper_add(connector, &virtio_gpu_conn_helper_funcs);
 
 	drm_encoder_init(dev, encoder, &virtio_gpu_enc_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs);
 	encoder->possible_crtcs = 1 << index;
 
@@ -465,7 +453,7 @@
 static struct drm_framebuffer *
 virtio_gpu_user_framebuffer_create(struct drm_device *dev,
 				   struct drm_file *file_priv,
-				   struct drm_mode_fb_cmd2 *mode_cmd)
+				   const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj = NULL;
 	struct virtio_gpu_framebuffer *virtio_gpu_fb;
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 79f0abe..8f486f4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -328,7 +328,7 @@
 /* virtio_gpu_display.c */
 int virtio_gpu_framebuffer_init(struct drm_device *dev,
 				struct virtio_gpu_framebuffer *vgfb,
-				struct drm_mode_fb_cmd2 *mode_cmd,
+				const struct drm_mode_fb_cmd2 *mode_cmd,
 				struct drm_gem_object *obj);
 int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
index 6a81e08..2242a80 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fb.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fb.c
@@ -32,7 +32,6 @@
 struct virtio_gpu_fbdev {
 	struct drm_fb_helper           helper;
 	struct virtio_gpu_framebuffer  vgfb;
-	struct list_head	       fbdev_list;
 	struct virtio_gpu_device       *vgdev;
 	struct delayed_work            work;
 };
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 4a74129..572fb35 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -107,7 +107,7 @@
 				       &virtio_gpu_plane_funcs,
 				       virtio_gpu_formats,
 				       ARRAY_SIZE(virtio_gpu_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret)
 		goto err_plane_init;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 9fcd7f8..9394c35 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -930,7 +930,7 @@
 
 static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 						 struct drm_file *file_priv,
-						 struct drm_mode_fb_cmd2 *mode_cmd2)
+						 const struct drm_mode_fb_cmd2 *mode_cmd2)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
@@ -1331,14 +1331,6 @@
 	return 0;
 }
 
-void vmw_du_crtc_save(struct drm_crtc *crtc)
-{
-}
-
-void vmw_du_crtc_restore(struct drm_crtc *crtc)
-{
-}
-
 void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
 			   uint32_t start, uint32_t size)
@@ -1360,14 +1352,6 @@
 	return 0;
 }
 
-void vmw_du_connector_save(struct drm_connector *connector)
-{
-}
-
-void vmw_du_connector_restore(struct drm_connector *connector)
-{
-}
-
 enum drm_connector_status
 vmw_du_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -1554,7 +1538,7 @@
 		drm_mode_probed_add(connector, mode);
 	}
 
-	drm_mode_connector_list_update(connector, true);
+	drm_mode_connector_list_update(connector);
 	/* Move the prefered mode first, help apps pick the right mode. */
 	drm_mode_sort(&connector->modes);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index bb63e4d..2aff5e5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -295,8 +295,6 @@
 }
 
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -329,8 +327,6 @@
 
 static struct drm_connector_funcs vmw_legacy_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
 	.detect = vmw_du_connector_detect,
 	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
@@ -367,7 +363,7 @@
 	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index b96d1ab..6bb7af3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -531,8 +531,6 @@
 }
 
 static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -565,10 +563,6 @@
 
 static struct drm_connector_funcs vmw_sou_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
-	.detect = vmw_du_connector_detect,
-	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_sou_connector_destroy,
 };
@@ -603,7 +597,7 @@
 	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index b1fc1c0..45e72c2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1041,8 +1041,6 @@
  *  Screen Target CRTC dispatch table
  */
 static struct drm_crtc_funcs vmw_stdu_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -1101,8 +1099,6 @@
 
 static struct drm_connector_funcs vmw_stdu_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
 	.detect = vmw_du_connector_detect,
 	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
@@ -1149,7 +1145,7 @@
 	connector->status = vmw_du_connector_detect(connector, false);
 
 	drm_encoder_init(dev, encoder, &vmw_stdu_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index ba47b30..f2e13eb 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -28,6 +28,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/of_device.h>
+#include <linux/of_graph.h>
 
 #include <drm/drm_fourcc.h>
 
@@ -993,12 +994,26 @@
 struct ipu_platform_reg {
 	struct ipu_client_platformdata pdata;
 	const char *name;
-	int reg_offset;
 };
 
+/* These must be in the order of the corresponding device tree port nodes */
 static const struct ipu_platform_reg client_reg[] = {
 	{
 		.pdata = {
+			.csi = 0,
+			.dma[0] = IPUV3_CHANNEL_CSI0,
+			.dma[1] = -EINVAL,
+		},
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
+			.csi = 1,
+			.dma[0] = IPUV3_CHANNEL_CSI1,
+			.dma[1] = -EINVAL,
+		},
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
 			.di = 0,
 			.dc = 5,
 			.dp = IPU_DP_FLOW_SYNC_BG,
@@ -1015,22 +1030,6 @@
 			.dma[1] = -EINVAL,
 		},
 		.name = "imx-ipuv3-crtc",
-	}, {
-		.pdata = {
-			.csi = 0,
-			.dma[0] = IPUV3_CHANNEL_CSI0,
-			.dma[1] = -EINVAL,
-		},
-		.reg_offset = IPU_CM_CSI0_REG_OFS,
-		.name = "imx-ipuv3-camera",
-	}, {
-		.pdata = {
-			.csi = 1,
-			.dma[0] = IPUV3_CHANNEL_CSI1,
-			.dma[1] = -EINVAL,
-		},
-		.reg_offset = IPU_CM_CSI1_REG_OFS,
-		.name = "imx-ipuv3-camera",
 	},
 };
 
@@ -1051,22 +1050,30 @@
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
-		struct resource res;
 
-		if (reg->reg_offset) {
-			memset(&res, 0, sizeof(res));
-			res.flags = IORESOURCE_MEM;
-			res.start = ipu_base + ipu->devtype->cm_ofs + reg->reg_offset;
-			res.end = res.start + PAGE_SIZE - 1;
-			pdev = platform_device_register_resndata(dev, reg->name,
-				id++, &res, 1, &reg->pdata, sizeof(reg->pdata));
-		} else {
-			pdev = platform_device_register_data(dev, reg->name,
-				id++, &reg->pdata, sizeof(reg->pdata));
+		pdev = platform_device_alloc(reg->name, id++);
+		if (!pdev) {
+			ret = -ENOMEM;
+			goto err_register;
 		}
 
-		if (IS_ERR(pdev)) {
-			ret = PTR_ERR(pdev);
+		pdev->dev.parent = dev;
+
+		/* Associate subdevice with the corresponding port node */
+		pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
+		if (!pdev->dev.of_node) {
+			dev_err(dev, "missing port@%d node in %s\n", i,
+				dev->of_node->full_name);
+			ret = -ENODEV;
+			goto err_register;
+		}
+
+		ret = platform_device_add_data(pdev, &reg->pdata,
+					       sizeof(reg->pdata));
+		if (!ret)
+			ret = platform_device_add(pdev);
+		if (ret) {
+			platform_device_put(pdev);
 			goto err_register;
 		}
 	}
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ac1feea..9024a3d 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -609,6 +609,7 @@
 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
 #define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306
+#define USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS 0xc24d
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C01A	0xc01a
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C05A	0xc05a
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C06A	0xc06a
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index c20ac76..c690fae 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -665,8 +665,9 @@
 	struct lg_drv_data *drv_data;
 	int ret;
 
-	/* Only work with the 1st interface (G29 presents multiple) */
-	if (iface_num != 0) {
+	/* G29 only work with the 1st interface */
+	if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) &&
+	    (iface_num != 0)) {
 		dbg_hid("%s: ignoring ifnum %d\n", __func__, iface_num);
 		return -ENODEV;
 	}
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 94bb137..2324520 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -84,6 +84,7 @@
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A, HID_QUIRK_ALWAYS_POLL },
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 8b29949..01a4f05 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2481,7 +2481,7 @@
 		if (features->pktlen == WACOM_PKGLEN_BBTOUCH3) {
 			if (features->touch_max)
 				features->device_type |= WACOM_DEVICETYPE_TOUCH;
-			if (features->type >= INTUOSHT || features->type <= BAMBOO_PT)
+			if (features->type >= INTUOSHT && features->type <= BAMBOO_PT)
 				features->device_type |= WACOM_DEVICETYPE_PAD;
 
 			features->x_max = 4096;
@@ -3213,7 +3213,8 @@
 	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 static const struct wacom_features wacom_features_0x336 =
 	{ "Wacom DTU1141", 23472, 13203, 1023, 0,
-	  DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 };
+	  DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4,
+	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 static const struct wacom_features wacom_features_0x57 =
 	{ "Wacom DTK2241", 95640, 54060, 2047, 63,
 	  DTK, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 6,
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 842b004..8f59f05 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -324,6 +324,7 @@
 config SENSORS_ARM_SCPI
 	tristate "ARM SCPI Sensors"
 	depends on ARM_SCPI_PROTOCOL
+	depends on THERMAL || !THERMAL_OF
 	help
 	  This driver provides support for temperature, voltage, current
 	  and power sensors available on ARM Ltd's SCP based platforms. The
@@ -1471,6 +1472,7 @@
 config SENSORS_INA2XX
 	tristate "Texas Instruments INA219 and compatibles"
 	depends on I2C
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for INA219, INA220, INA226,
 	  INA230, and INA231 power monitor chips.
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 1f5e956..0af7fd3 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -537,7 +537,7 @@
 static int applesmc_init_smcreg_try(void)
 {
 	struct applesmc_registers *s = &smcreg;
-	bool left_light_sensor, right_light_sensor;
+	bool left_light_sensor = 0, right_light_sensor = 0;
 	unsigned int count;
 	u8 tmp[1];
 	int ret;
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 2c1241b..7e20567 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -117,7 +117,7 @@
 	struct scpi_ops *scpi_ops;
 	struct device *hwdev, *dev = &pdev->dev;
 	struct scpi_sensors *scpi_sensors;
-	int ret;
+	int ret, idx;
 
 	scpi_ops = get_scpi_ops();
 	if (!scpi_ops)
@@ -146,8 +146,8 @@
 
 	scpi_sensors->scpi_ops = scpi_ops;
 
-	for (i = 0; i < nr_sensors; i++) {
-		struct sensor_data *sensor = &scpi_sensors->data[i];
+	for (i = 0, idx = 0; i < nr_sensors; i++) {
+		struct sensor_data *sensor = &scpi_sensors->data[idx];
 
 		ret = scpi_ops->sensor_get_info(i, &sensor->info);
 		if (ret)
@@ -183,7 +183,7 @@
 			num_power++;
 			break;
 		default:
-			break;
+			continue;
 		}
 
 		sensor->dev_attr_input.attr.mode = S_IRUGO;
@@ -194,11 +194,12 @@
 		sensor->dev_attr_label.show = scpi_show_label;
 		sensor->dev_attr_label.attr.name = sensor->label;
 
-		scpi_sensors->attrs[i << 1] = &sensor->dev_attr_input.attr;
-		scpi_sensors->attrs[(i << 1) + 1] = &sensor->dev_attr_label.attr;
+		scpi_sensors->attrs[idx << 1] = &sensor->dev_attr_input.attr;
+		scpi_sensors->attrs[(idx << 1) + 1] = &sensor->dev_attr_label.attr;
 
-		sysfs_attr_init(scpi_sensors->attrs[i << 1]);
-		sysfs_attr_init(scpi_sensors->attrs[(i << 1) + 1]);
+		sysfs_attr_init(scpi_sensors->attrs[idx << 1]);
+		sysfs_attr_init(scpi_sensors->attrs[(idx << 1) + 1]);
+		idx++;
 	}
 
 	scpi_sensors->group.attrs = scpi_sensors->attrs;
@@ -236,8 +237,8 @@
 
 		zone->sensor_id = i;
 		zone->scpi_sensors = scpi_sensors;
-		zone->tzd = thermal_zone_of_sensor_register(dev, i, zone,
-							    &scpi_sensor_ops);
+		zone->tzd = thermal_zone_of_sensor_register(dev,
+				sensor->info.sensor_id, zone, &scpi_sensor_ops);
 		/*
 		 * The call to thermal_zone_of_sensor_register returns
 		 * an error for sensors that are not associated with
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e24c2b6..7b0aa82 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -126,6 +126,7 @@
 	    Sunrise Point-LP (PCH)
 	    DNV (SOC)
 	    Broxton (SOC)
+	    Lewisburg (PCH)
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-i801.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index c306751..f62d697 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -62,6 +62,8 @@
  * Sunrise Point-LP (PCH)	0x9d23	32	hard	yes	yes	yes
  * DNV (SOC)			0x19df	32	hard	yes	yes	yes
  * Broxton (SOC)		0x5ad4	32	hard	yes	yes	yes
+ * Lewisburg (PCH)		0xa1a3	32	hard	yes	yes	yes
+ * Lewisburg Supersku (PCH)	0xa223	32	hard	yes	yes	yes
  *
  * Features supported by this driver:
  * Software PEC				no
@@ -206,6 +208,8 @@
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS	0x9d23
 #define PCI_DEVICE_ID_INTEL_DNV_SMBUS			0x19df
 #define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS		0x5ad4
+#define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS		0xa1a3
+#define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS	0xa223
 
 struct i801_mux_config {
 	char *gpio_chip;
@@ -869,6 +873,8 @@
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_DNV_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROXTON_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS) },
 	{ 0, }
 };
 
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 1e4d99d..9bb0b05 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -50,6 +50,7 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_data/i2c-imx.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index e23a7b0..0b20449 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -662,8 +662,10 @@
 
 static void xiic_start_xfer(struct xiic_i2c *i2c)
 {
-
+	spin_lock(&i2c->lock);
+	xiic_reinit(i2c);
 	__xiic_start_xfer(i2c);
+	spin_unlock(&i2c->lock);
 }
 
 static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 040af5c..ba8eb08 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -715,7 +715,7 @@
 		if (wakeirq > 0 && wakeirq != client->irq)
 			status = dev_pm_set_dedicated_wake_irq(dev, wakeirq);
 		else if (client->irq > 0)
-			status = dev_pm_set_wake_irq(dev, wakeirq);
+			status = dev_pm_set_wake_irq(dev, client->irq);
 		else
 			status = 0;
 
diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index eea0c79..4d960d3 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -101,7 +101,7 @@
 #define AD7795_CH_AIN1M_AIN1M	8 /* AIN1(-) - AIN1(-) */
 
 /* ID Register Bit Designations (AD7793_REG_ID) */
-#define AD7785_ID		0xB
+#define AD7785_ID		0x3
 #define AD7792_ID		0xA
 #define AD7793_ID		0xB
 #define AD7794_ID		0xF
diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index 599cde3..b10f629 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -106,6 +106,13 @@
 
 #define DEFAULT_SAMPLE_TIME		1000
 
+/* V at 25°C of 696 mV */
+#define VF610_VTEMP25_3V0		950
+/* V at 25°C of 699 mV */
+#define VF610_VTEMP25_3V3		867
+/* Typical sensor slope coefficient at all temperatures */
+#define VF610_TEMP_SLOPE_COEFF		1840
+
 enum clk_sel {
 	VF610_ADCIOC_BUSCLK_SET,
 	VF610_ADCIOC_ALTCLK_SET,
@@ -197,6 +204,8 @@
 		adc_feature->clk_div = 8;
 	}
 
+	adck_rate = ipg_rate / adc_feature->clk_div;
+
 	/*
 	 * Determine the long sample time adder value to be used based
 	 * on the default minimum sample time provided.
@@ -221,7 +230,6 @@
 	 * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode
 	 * LSTAdder(Long Sample Time): 3, 5, 7, 9, 13, 17, 21, 25 ADCK cycles
 	 */
-	adck_rate = ipg_rate / info->adc_feature.clk_div;
 	for (i = 0; i < ARRAY_SIZE(vf610_hw_avgs); i++)
 		info->sample_freq_avail[i] =
 			adck_rate / (6 + vf610_hw_avgs[i] *
@@ -663,11 +671,13 @@
 			break;
 		case IIO_TEMP:
 			/*
-			* Calculate in degree Celsius times 1000
-			* Using sensor slope of 1.84 mV/°C and
-			* V at 25°C of 696 mV
-			*/
-			*val = 25000 - ((int)info->value - 864) * 1000000 / 1840;
+			 * Calculate in degree Celsius times 1000
+			 * Using the typical sensor slope of 1.84 mV/°C
+			 * and VREFH_ADC at 3.3V, V at 25°C of 699 mV
+			 */
+			*val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) *
+					1000000 / VF610_TEMP_SLOPE_COEFF;
+
 			break;
 		default:
 			mutex_unlock(&indio_dev->mlock);
diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c
index 0370624..02e636a 100644
--- a/drivers/iio/adc/xilinx-xadc-core.c
+++ b/drivers/iio/adc/xilinx-xadc-core.c
@@ -841,6 +841,7 @@
 			case XADC_REG_VCCINT:
 			case XADC_REG_VCCAUX:
 			case XADC_REG_VREFP:
+			case XADC_REG_VREFN:
 			case XADC_REG_VCCBRAM:
 			case XADC_REG_VCCPINT:
 			case XADC_REG_VCCPAUX:
diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
index 9e4d2c18..81ca008 100644
--- a/drivers/iio/dac/ad5064.c
+++ b/drivers/iio/dac/ad5064.c
@@ -113,12 +113,16 @@
 	ID_AD5065,
 	ID_AD5628_1,
 	ID_AD5628_2,
+	ID_AD5629_1,
+	ID_AD5629_2,
 	ID_AD5648_1,
 	ID_AD5648_2,
 	ID_AD5666_1,
 	ID_AD5666_2,
 	ID_AD5668_1,
 	ID_AD5668_2,
+	ID_AD5669_1,
+	ID_AD5669_2,
 };
 
 static int ad5064_write(struct ad5064_state *st, unsigned int cmd,
@@ -291,7 +295,7 @@
 	{ },
 };
 
-#define AD5064_CHANNEL(chan, addr, bits) {			\
+#define AD5064_CHANNEL(chan, addr, bits, _shift) {		\
 	.type = IIO_VOLTAGE,					\
 	.indexed = 1,						\
 	.output = 1,						\
@@ -303,36 +307,39 @@
 		.sign = 'u',					\
 		.realbits = (bits),				\
 		.storagebits = 16,				\
-		.shift = 20 - bits,				\
+		.shift = (_shift),				\
 	},							\
 	.ext_info = ad5064_ext_info,				\
 }
 
-#define DECLARE_AD5064_CHANNELS(name, bits) \
+#define DECLARE_AD5064_CHANNELS(name, bits, shift) \
 const struct iio_chan_spec name[] = { \
-	AD5064_CHANNEL(0, 0, bits), \
-	AD5064_CHANNEL(1, 1, bits), \
-	AD5064_CHANNEL(2, 2, bits), \
-	AD5064_CHANNEL(3, 3, bits), \
-	AD5064_CHANNEL(4, 4, bits), \
-	AD5064_CHANNEL(5, 5, bits), \
-	AD5064_CHANNEL(6, 6, bits), \
-	AD5064_CHANNEL(7, 7, bits), \
+	AD5064_CHANNEL(0, 0, bits, shift), \
+	AD5064_CHANNEL(1, 1, bits, shift), \
+	AD5064_CHANNEL(2, 2, bits, shift), \
+	AD5064_CHANNEL(3, 3, bits, shift), \
+	AD5064_CHANNEL(4, 4, bits, shift), \
+	AD5064_CHANNEL(5, 5, bits, shift), \
+	AD5064_CHANNEL(6, 6, bits, shift), \
+	AD5064_CHANNEL(7, 7, bits, shift), \
 }
 
-#define DECLARE_AD5065_CHANNELS(name, bits) \
+#define DECLARE_AD5065_CHANNELS(name, bits, shift) \
 const struct iio_chan_spec name[] = { \
-	AD5064_CHANNEL(0, 0, bits), \
-	AD5064_CHANNEL(1, 3, bits), \
+	AD5064_CHANNEL(0, 0, bits, shift), \
+	AD5064_CHANNEL(1, 3, bits, shift), \
 }
 
-static DECLARE_AD5064_CHANNELS(ad5024_channels, 12);
-static DECLARE_AD5064_CHANNELS(ad5044_channels, 14);
-static DECLARE_AD5064_CHANNELS(ad5064_channels, 16);
+static DECLARE_AD5064_CHANNELS(ad5024_channels, 12, 8);
+static DECLARE_AD5064_CHANNELS(ad5044_channels, 14, 6);
+static DECLARE_AD5064_CHANNELS(ad5064_channels, 16, 4);
 
-static DECLARE_AD5065_CHANNELS(ad5025_channels, 12);
-static DECLARE_AD5065_CHANNELS(ad5045_channels, 14);
-static DECLARE_AD5065_CHANNELS(ad5065_channels, 16);
+static DECLARE_AD5065_CHANNELS(ad5025_channels, 12, 8);
+static DECLARE_AD5065_CHANNELS(ad5045_channels, 14, 6);
+static DECLARE_AD5065_CHANNELS(ad5065_channels, 16, 4);
+
+static DECLARE_AD5064_CHANNELS(ad5629_channels, 12, 4);
+static DECLARE_AD5064_CHANNELS(ad5669_channels, 16, 0);
 
 static const struct ad5064_chip_info ad5064_chip_info_tbl[] = {
 	[ID_AD5024] = {
@@ -382,6 +389,18 @@
 		.channels = ad5024_channels,
 		.num_channels = 8,
 	},
+	[ID_AD5629_1] = {
+		.shared_vref = true,
+		.internal_vref = 2500000,
+		.channels = ad5629_channels,
+		.num_channels = 8,
+	},
+	[ID_AD5629_2] = {
+		.shared_vref = true,
+		.internal_vref = 5000000,
+		.channels = ad5629_channels,
+		.num_channels = 8,
+	},
 	[ID_AD5648_1] = {
 		.shared_vref = true,
 		.internal_vref = 2500000,
@@ -418,6 +437,18 @@
 		.channels = ad5064_channels,
 		.num_channels = 8,
 	},
+	[ID_AD5669_1] = {
+		.shared_vref = true,
+		.internal_vref = 2500000,
+		.channels = ad5669_channels,
+		.num_channels = 8,
+	},
+	[ID_AD5669_2] = {
+		.shared_vref = true,
+		.internal_vref = 5000000,
+		.channels = ad5669_channels,
+		.num_channels = 8,
+	},
 };
 
 static inline unsigned int ad5064_num_vref(struct ad5064_state *st)
@@ -597,10 +628,16 @@
 	unsigned int addr, unsigned int val)
 {
 	struct i2c_client *i2c = to_i2c_client(st->dev);
+	int ret;
 
 	st->data.i2c[0] = (cmd << 4) | addr;
 	put_unaligned_be16(val, &st->data.i2c[1]);
-	return i2c_master_send(i2c, st->data.i2c, 3);
+
+	ret = i2c_master_send(i2c, st->data.i2c, 3);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int ad5064_i2c_probe(struct i2c_client *i2c,
@@ -616,12 +653,12 @@
 }
 
 static const struct i2c_device_id ad5064_i2c_ids[] = {
-	{"ad5629-1", ID_AD5628_1},
-	{"ad5629-2", ID_AD5628_2},
-	{"ad5629-3", ID_AD5628_2}, /* similar enough to ad5629-2 */
-	{"ad5669-1", ID_AD5668_1},
-	{"ad5669-2", ID_AD5668_2},
-	{"ad5669-3", ID_AD5668_2}, /* similar enough to ad5669-2 */
+	{"ad5629-1", ID_AD5629_1},
+	{"ad5629-2", ID_AD5629_2},
+	{"ad5629-3", ID_AD5629_2}, /* similar enough to ad5629-2 */
+	{"ad5669-1", ID_AD5669_1},
+	{"ad5669-2", ID_AD5669_2},
+	{"ad5669-3", ID_AD5669_2}, /* similar enough to ad5669-2 */
 	{}
 };
 MODULE_DEVICE_TABLE(i2c, ad5064_i2c_ids);
diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c
index 12128d1..71991b5 100644
--- a/drivers/iio/humidity/si7020.c
+++ b/drivers/iio/humidity/si7020.c
@@ -50,10 +50,10 @@
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		ret = i2c_smbus_read_word_data(*client,
-					       chan->type == IIO_TEMP ?
-					       SI7020CMD_TEMP_HOLD :
-					       SI7020CMD_RH_HOLD);
+		ret = i2c_smbus_read_word_swapped(*client,
+						  chan->type == IIO_TEMP ?
+						  SI7020CMD_TEMP_HOLD :
+						  SI7020CMD_RH_HOLD);
 		if (ret < 0)
 			return ret;
 		*val = ret >> 2;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index cbe198c..471ee36 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -216,6 +216,7 @@
 	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
 	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags, nr_pages, i;
+	unsigned long *entry;
 	int rc = 0;
 
 	if (dma_addr < s390_domain->domain.geometry.aperture_start ||
@@ -228,8 +229,12 @@
 
 	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
 	for (i = 0; i < nr_pages; i++) {
-		dma_update_cpu_trans(s390_domain->dma_table, page_addr,
-				     dma_addr, flags);
+		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+		if (!entry) {
+			rc = -ENOMEM;
+			goto undo_cpu_trans;
+		}
+		dma_update_cpu_trans(entry, page_addr, flags);
 		page_addr += PAGE_SIZE;
 		dma_addr += PAGE_SIZE;
 	}
@@ -242,6 +247,20 @@
 			break;
 	}
 	spin_unlock(&s390_domain->list_lock);
+
+undo_cpu_trans:
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
+		flags = ZPCI_PTE_INVALID;
+		while (i-- > 0) {
+			page_addr -= PAGE_SIZE;
+			dma_addr -= PAGE_SIZE;
+			entry = dma_walk_cpu_trans(s390_domain->dma_table,
+						   dma_addr);
+			if (!entry)
+				break;
+			dma_update_cpu_trans(entry, page_addr, flags);
+		}
+	}
 	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
 
 	return rc;
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 44a077f..f174ce0 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -84,12 +84,15 @@
 		writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i);
 
 	/*
-	 * Disable all interrupts.  Leave the PPI and SGIs alone
-	 * as they are enabled by redistributor registers.
+	 * Deactivate and disable all SPIs. Leave the PPI and SGIs
+	 * alone as they are in the redistributor registers on GICv3.
 	 */
-	for (i = 32; i < gic_irqs; i += 32)
+	for (i = 32; i < gic_irqs; i += 32) {
 		writel_relaxed(GICD_INT_EN_CLR_X32,
-					base + GIC_DIST_ENABLE_CLEAR + i / 8);
+			       base + GIC_DIST_ACTIVE_CLEAR + i / 8);
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+			       base + GIC_DIST_ENABLE_CLEAR + i / 8);
+	}
 
 	if (sync_access)
 		sync_access();
@@ -102,7 +105,9 @@
 	/*
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
+	 * Make sure everything is deactivated.
 	 */
+	writel_relaxed(GICD_INT_EN_CLR_X32, base + GIC_DIST_ACTIVE_CLEAR);
 	writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR);
 	writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET);
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 515c823..abf2ffa 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -73,9 +73,11 @@
 	union gic_base cpu_base;
 #ifdef CONFIG_CPU_PM
 	u32 saved_spi_enable[DIV_ROUND_UP(1020, 32)];
+	u32 saved_spi_active[DIV_ROUND_UP(1020, 32)];
 	u32 saved_spi_conf[DIV_ROUND_UP(1020, 16)];
 	u32 saved_spi_target[DIV_ROUND_UP(1020, 4)];
 	u32 __percpu *saved_ppi_enable;
+	u32 __percpu *saved_ppi_active;
 	u32 __percpu *saved_ppi_conf;
 #endif
 	struct irq_domain *domain;
@@ -566,6 +568,10 @@
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
 		gic_data[gic_nr].saved_spi_enable[i] =
 			readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+		gic_data[gic_nr].saved_spi_active[i] =
+			readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4);
 }
 
 /*
@@ -604,9 +610,19 @@
 		writel_relaxed(gic_data[gic_nr].saved_spi_target[i],
 			dist_base + GIC_DIST_TARGET + i * 4);
 
-	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++)
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) {
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+			dist_base + GIC_DIST_ENABLE_CLEAR + i * 4);
 		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
 			dist_base + GIC_DIST_ENABLE_SET + i * 4);
+	}
+
+	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 32); i++) {
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+			dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4);
+		writel_relaxed(gic_data[gic_nr].saved_spi_active[i],
+			dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+	}
 
 	writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
 }
@@ -631,6 +647,10 @@
 	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
 		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ENABLE_SET + i * 4);
 
+	ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+		ptr[i] = readl_relaxed(dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+
 	ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
 	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
 		ptr[i] = readl_relaxed(dist_base + GIC_DIST_CONFIG + i * 4);
@@ -654,8 +674,18 @@
 		return;
 
 	ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_enable);
-	for (i = 0; i < DIV_ROUND_UP(32, 32); i++)
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++) {
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+			       dist_base + GIC_DIST_ENABLE_CLEAR + i * 4);
 		writel_relaxed(ptr[i], dist_base + GIC_DIST_ENABLE_SET + i * 4);
+	}
+
+	ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_active);
+	for (i = 0; i < DIV_ROUND_UP(32, 32); i++) {
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+			       dist_base + GIC_DIST_ACTIVE_CLEAR + i * 4);
+		writel_relaxed(ptr[i], dist_base + GIC_DIST_ACTIVE_SET + i * 4);
+	}
 
 	ptr = raw_cpu_ptr(gic_data[gic_nr].saved_ppi_conf);
 	for (i = 0; i < DIV_ROUND_UP(32, 16); i++)
@@ -710,6 +740,10 @@
 		sizeof(u32));
 	BUG_ON(!gic->saved_ppi_enable);
 
+	gic->saved_ppi_active = __alloc_percpu(DIV_ROUND_UP(32, 32) * 4,
+		sizeof(u32));
+	BUG_ON(!gic->saved_ppi_active);
+
 	gic->saved_ppi_conf = __alloc_percpu(DIV_ROUND_UP(32, 16) * 4,
 		sizeof(u32));
 	BUG_ON(!gic->saved_ppi_conf);
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index b33f53b..bf04d2a 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1896,7 +1896,7 @@
 				ptr--;
 				*ptr++ = '\n';
 				*ptr = 0;
-				HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+				HiSax_putstatus(cs, NULL, cs->dlog);
 			} else
 				HiSax_putstatus(cs, "LogEcho: ",
 						"warning Frame too big (%d)",
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 4a48255..90449e1 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -901,7 +901,7 @@
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+					HiSax_putstatus(cs, NULL, cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3);
 			}
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index b1fad81..13b2151 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -674,7 +674,7 @@
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+					HiSax_putstatus(cs, NULL, cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len);
 			}
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index b420f8b..ba4beb2 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -1179,7 +1179,7 @@
 		dp--;
 		*dp++ = '\n';
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+		HiSax_putstatus(cs, NULL, cs->dlog);
 	} else
 		HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size);
 }
@@ -1246,7 +1246,7 @@
 	}
 	if (finish) {
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+		HiSax_putstatus(cs, NULL, cs->dlog);
 		return;
 	}
 	if ((0xfe & buf[0]) == PROTO_DIS_N0) {	/* 1TR6 */
@@ -1509,5 +1509,5 @@
 		dp += sprintf(dp, "Unknown protocol %x!", buf[0]);
 	}
 	*dp = 0;
-	HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+	HiSax_putstatus(cs, NULL, cs->dlog);
 }
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index f659e60..86ce887 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -123,6 +123,26 @@
 }
 EXPORT_SYMBOL(nvm_unregister_mgr);
 
+/* register with device with a supported manager */
+static int register_mgr(struct nvm_dev *dev)
+{
+	struct nvmm_type *mt;
+	int ret = 0;
+
+	list_for_each_entry(mt, &nvm_mgrs, list) {
+		ret = mt->register_mgr(dev);
+		if (ret > 0) {
+			dev->mt = mt;
+			break; /* successfully initialized */
+		}
+	}
+
+	if (!ret)
+		pr_info("nvm: no compatible nvm manager found.\n");
+
+	return ret;
+}
+
 static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 {
 	struct nvm_dev *dev;
@@ -160,11 +180,6 @@
 }
 EXPORT_SYMBOL(nvm_erase_blk);
 
-static void nvm_core_free(struct nvm_dev *dev)
-{
-	kfree(dev);
-}
-
 static int nvm_core_init(struct nvm_dev *dev)
 {
 	struct nvm_id *id = &dev->identity;
@@ -179,12 +194,21 @@
 	dev->sec_size = grp->csecs;
 	dev->oob_size = grp->sos;
 	dev->sec_per_pg = grp->fpg_sz / grp->csecs;
-	dev->addr_mode = id->ppat;
-	dev->addr_format = id->ppaf;
+	memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
 
 	dev->plane_mode = NVM_PLANE_SINGLE;
 	dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size;
 
+	if (grp->mtype != 0) {
+		pr_err("nvm: memory type not supported\n");
+		return -EINVAL;
+	}
+
+	if (grp->fmtype != 0 && grp->fmtype != 1) {
+		pr_err("nvm: flash type not supported\n");
+		return -EINVAL;
+	}
+
 	if (grp->mpos & 0x020202)
 		dev->plane_mode = NVM_PLANE_DOUBLE;
 	if (grp->mpos & 0x040404)
@@ -213,21 +237,17 @@
 
 	if (dev->mt)
 		dev->mt->unregister_mgr(dev);
-
-	nvm_core_free(dev);
 }
 
 static int nvm_init(struct nvm_dev *dev)
 {
-	struct nvmm_type *mt;
-	int ret = 0;
+	int ret = -EINVAL;
 
 	if (!dev->q || !dev->ops)
-		return -EINVAL;
+		return ret;
 
 	if (dev->ops->identity(dev->q, &dev->identity)) {
 		pr_err("nvm: device could not be identified\n");
-		ret = -EINVAL;
 		goto err;
 	}
 
@@ -251,21 +271,13 @@
 		goto err;
 	}
 
-	/* register with device with a supported manager */
-	list_for_each_entry(mt, &nvm_mgrs, list) {
-		ret = mt->register_mgr(dev);
-		if (ret < 0)
-			goto err; /* initialization failed */
-		if (ret > 0) {
-			dev->mt = mt;
-			break; /* successfully initialized */
-		}
-	}
-
-	if (!ret) {
-		pr_info("nvm: no compatible manager found.\n");
+	down_write(&nvm_lock);
+	ret = register_mgr(dev);
+	up_write(&nvm_lock);
+	if (ret < 0)
+		goto err;
+	if (!ret)
 		return 0;
-	}
 
 	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
 			dev->name, dev->sec_per_pg, dev->nr_planes,
@@ -273,7 +285,6 @@
 			dev->nr_chnls);
 	return 0;
 err:
-	nvm_free(dev);
 	pr_err("nvm: failed to initialize nvm\n");
 	return ret;
 }
@@ -308,22 +319,26 @@
 	if (ret)
 		goto err_init;
 
-	down_write(&nvm_lock);
-	list_add(&dev->devices, &nvm_devices);
-	up_write(&nvm_lock);
+	if (dev->ops->max_phys_sect > 256) {
+		pr_info("nvm: max sectors supported is 256.\n");
+		ret = -EINVAL;
+		goto err_init;
+	}
 
 	if (dev->ops->max_phys_sect > 1) {
 		dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
 								"ppalist");
 		if (!dev->ppalist_pool) {
 			pr_err("nvm: could not create ppa pool\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto err_init;
 		}
-	} else if (dev->ops->max_phys_sect > 256) {
-		pr_info("nvm: max sectors supported is 256.\n");
-		return -EINVAL;
 	}
 
+	down_write(&nvm_lock);
+	list_add(&dev->devices, &nvm_devices);
+	up_write(&nvm_lock);
+
 	return 0;
 err_init:
 	kfree(dev);
@@ -333,19 +348,22 @@
 
 void nvm_unregister(char *disk_name)
 {
-	struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
+	struct nvm_dev *dev;
 
+	down_write(&nvm_lock);
+	dev = nvm_find_nvm_dev(disk_name);
 	if (!dev) {
 		pr_err("nvm: could not find device %s to unregister\n",
 								disk_name);
+		up_write(&nvm_lock);
 		return;
 	}
 
-	nvm_exit(dev);
-
-	down_write(&nvm_lock);
 	list_del(&dev->devices);
 	up_write(&nvm_lock);
+
+	nvm_exit(dev);
+	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);
 
@@ -358,38 +376,30 @@
 {
 	struct nvm_ioctl_create_simple *s = &create->conf.s;
 	struct request_queue *tqueue;
-	struct nvmm_type *mt;
 	struct gendisk *tdisk;
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
 	void *targetdata;
 	int ret = 0;
 
+	down_write(&nvm_lock);
 	if (!dev->mt) {
-		/* register with device with a supported NVM manager */
-		list_for_each_entry(mt, &nvm_mgrs, list) {
-			ret = mt->register_mgr(dev);
-			if (ret < 0)
-				return ret; /* initialization failed */
-			if (ret > 0) {
-				dev->mt = mt;
-				break; /* successfully initialized */
-			}
-		}
-
-		if (!ret) {
-			pr_info("nvm: no compatible nvm manager found.\n");
-			return -ENODEV;
+		ret = register_mgr(dev);
+		if (!ret)
+			ret = -ENODEV;
+		if (ret < 0) {
+			up_write(&nvm_lock);
+			return ret;
 		}
 	}
 
 	tt = nvm_find_target_type(create->tgttype);
 	if (!tt) {
 		pr_err("nvm: target type %s not found\n", create->tgttype);
+		up_write(&nvm_lock);
 		return -EINVAL;
 	}
 
-	down_write(&nvm_lock);
 	list_for_each_entry(t, &dev->online_targets, list) {
 		if (!strcmp(create->tgtname, t->disk->disk_name)) {
 			pr_err("nvm: target name already exists.\n");
@@ -457,11 +467,11 @@
 	lockdep_assert_held(&nvm_lock);
 
 	del_gendisk(tdisk);
+	blk_cleanup_queue(q);
+
 	if (tt->exit)
 		tt->exit(tdisk->private_data);
 
-	blk_cleanup_queue(q);
-
 	put_disk(tdisk);
 
 	list_del(&t->list);
@@ -473,7 +483,9 @@
 	struct nvm_dev *dev;
 	struct nvm_ioctl_create_simple *s;
 
+	down_write(&nvm_lock);
 	dev = nvm_find_nvm_dev(create->dev);
+	up_write(&nvm_lock);
 	if (!dev) {
 		pr_err("nvm: device not found\n");
 		return -EINVAL;
@@ -532,7 +544,9 @@
 		return -EINVAL;
 	}
 
+	down_write(&nvm_lock);
 	dev = nvm_find_nvm_dev(devname);
+	up_write(&nvm_lock);
 	if (!dev) {
 		pr_err("nvm: device not found\n");
 		return -EINVAL;
@@ -541,7 +555,7 @@
 	if (!dev->mt)
 		return 0;
 
-	dev->mt->free_blocks_print(dev);
+	dev->mt->lun_info_print(dev);
 
 	return 0;
 }
@@ -677,8 +691,10 @@
 	info->tgtsize = tgt_iter;
 	up_write(&nvm_lock);
 
-	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
+	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
+		kfree(info);
 		return -EFAULT;
+	}
 
 	kfree(info);
 	return 0;
@@ -721,8 +737,11 @@
 
 	devices->nr_devices = i;
 
-	if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
+	if (copy_to_user(arg, devices,
+			 sizeof(struct nvm_ioctl_get_devices))) {
+		kfree(devices);
 		return -EFAULT;
+	}
 
 	kfree(devices);
 	return 0;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index ae1fb2b..35dde84 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -60,23 +60,27 @@
 		lun->vlun.lun_id = i % dev->luns_per_chnl;
 		lun->vlun.chnl_id = i / dev->luns_per_chnl;
 		lun->vlun.nr_free_blocks = dev->blks_per_lun;
+		lun->vlun.nr_inuse_blocks = 0;
+		lun->vlun.nr_bad_blocks = 0;
 	}
 	return 0;
 }
 
-static int gennvm_block_bb(u32 lun_id, void *bb_bitmap, unsigned int nr_blocks,
+static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 								void *private)
 {
 	struct gen_nvm *gn = private;
-	struct gen_lun *lun = &gn->luns[lun_id];
+	struct nvm_dev *dev = gn->dev;
+	struct gen_lun *lun;
 	struct nvm_block *blk;
 	int i;
 
-	if (unlikely(bitmap_empty(bb_bitmap, nr_blocks)))
-		return 0;
+	lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
 
-	i = -1;
-	while ((i = find_next_bit(bb_bitmap, nr_blocks, i + 1)) < nr_blocks) {
+	for (i = 0; i < nr_blocks; i++) {
+		if (blks[i] == 0)
+			continue;
+
 		blk = &lun->vlun.blocks[i];
 		if (!blk) {
 			pr_err("gennvm: BB data is out of bounds.\n");
@@ -84,6 +88,7 @@
 		}
 
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
 	}
 
 	return 0;
@@ -136,6 +141,7 @@
 			list_move_tail(&blk->list, &lun->used_list);
 			blk->type = 1;
 			lun->vlun.nr_free_blocks--;
+			lun->vlun.nr_inuse_blocks++;
 		}
 	}
 
@@ -164,15 +170,25 @@
 			block->id = cur_block_id++;
 
 			/* First block is reserved for device */
-			if (unlikely(lun_iter == 0 && blk_iter == 0))
+			if (unlikely(lun_iter == 0 && blk_iter == 0)) {
+				lun->vlun.nr_free_blocks--;
 				continue;
+			}
 
 			list_add_tail(&block->list, &lun->free_list);
 		}
 
 		if (dev->ops->get_bb_tbl) {
-			ret = dev->ops->get_bb_tbl(dev->q, lun->vlun.id,
-					dev->blks_per_lun, gennvm_block_bb, gn);
+			struct ppa_addr ppa;
+
+			ppa.ppa = 0;
+			ppa.g.ch = lun->vlun.chnl_id;
+			ppa.g.lun = lun->vlun.id;
+			ppa = generic_to_dev_addr(dev, ppa);
+
+			ret = dev->ops->get_bb_tbl(dev, ppa,
+						dev->blks_per_lun,
+						gennvm_block_bb, gn);
 			if (ret)
 				pr_err("gennvm: could not read BB table\n");
 		}
@@ -190,6 +206,14 @@
 	return 0;
 }
 
+static void gennvm_free(struct nvm_dev *dev)
+{
+	gennvm_blocks_free(dev);
+	gennvm_luns_free(dev);
+	kfree(dev->mp);
+	dev->mp = NULL;
+}
+
 static int gennvm_register(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn;
@@ -199,6 +223,7 @@
 	if (!gn)
 		return -ENOMEM;
 
+	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
 	dev->mp = gn;
 
@@ -216,16 +241,13 @@
 
 	return 1;
 err:
-	kfree(gn);
+	gennvm_free(dev);
 	return ret;
 }
 
 static void gennvm_unregister(struct nvm_dev *dev)
 {
-	gennvm_blocks_free(dev);
-	gennvm_luns_free(dev);
-	kfree(dev->mp);
-	dev->mp = NULL;
+	gennvm_free(dev);
 }
 
 static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
@@ -254,6 +276,7 @@
 	blk->type = 1;
 
 	lun->vlun.nr_free_blocks--;
+	lun->vlun.nr_inuse_blocks++;
 
 	spin_unlock(&vlun->lock);
 out:
@@ -271,16 +294,21 @@
 	case 1:
 		list_move_tail(&blk->list, &lun->free_list);
 		lun->vlun.nr_free_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 		blk->type = 0;
 		break;
 	case 2:
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		pr_err("gennvm: erroneous block type (%lu -> %u)\n",
 							blk->id, blk->type);
 		list_move_tail(&blk->list, &lun->bb_list);
+		lun->vlun.nr_bad_blocks++;
+		lun->vlun.nr_inuse_blocks--;
 	}
 
 	spin_unlock(&vlun->lock);
@@ -292,10 +320,10 @@
 
 	if (rqd->nr_pages > 1) {
 		for (i = 0; i < rqd->nr_pages; i++)
-			rqd->ppa_list[i] = addr_to_generic_mode(dev,
+			rqd->ppa_list[i] = dev_to_generic_addr(dev,
 							rqd->ppa_list[i]);
 	} else {
-		rqd->ppa_addr = addr_to_generic_mode(dev, rqd->ppa_addr);
+		rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
 	}
 }
 
@@ -305,10 +333,10 @@
 
 	if (rqd->nr_pages > 1) {
 		for (i = 0; i < rqd->nr_pages; i++)
-			rqd->ppa_list[i] = generic_to_addr_mode(dev,
+			rqd->ppa_list[i] = generic_to_dev_addr(dev,
 							rqd->ppa_list[i]);
 	} else {
-		rqd->ppa_addr = generic_to_addr_mode(dev, rqd->ppa_addr);
+		rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
 	}
 }
 
@@ -354,10 +382,10 @@
 {
 	int i;
 
-	if (!dev->ops->set_bb)
+	if (!dev->ops->set_bb_tbl)
 		return;
 
-	if (dev->ops->set_bb(dev->q, rqd, 1))
+	if (dev->ops->set_bb_tbl(dev->q, rqd, 1))
 		return;
 
 	gennvm_addr_to_generic_mode(dev, rqd);
@@ -440,15 +468,24 @@
 	return &gn->luns[lunid].vlun;
 }
 
-static void gennvm_free_blocks_print(struct nvm_dev *dev)
+static void gennvm_lun_info_print(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
 	struct gen_lun *lun;
 	unsigned int i;
 
-	gennvm_for_each_lun(gn, lun, i)
-		pr_info("%s: lun%8u\t%u\n",
-					dev->name, i, lun->vlun.nr_free_blocks);
+
+	gennvm_for_each_lun(gn, lun, i) {
+		spin_lock(&lun->vlun.lock);
+
+		pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+				dev->name, i,
+				lun->vlun.nr_free_blocks,
+				lun->vlun.nr_inuse_blocks,
+				lun->vlun.nr_bad_blocks);
+
+		spin_unlock(&lun->vlun.lock);
+	}
 }
 
 static struct nvmm_type gennvm = {
@@ -466,7 +503,7 @@
 	.erase_blk	= gennvm_erase_blk,
 
 	.get_lun	= gennvm_get_lun,
-	.free_blocks_print = gennvm_free_blocks_print,
+	.lun_info_print = gennvm_lun_info_print,
 };
 
 static int __init gennvm_module_init(void)
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index d23bd35..9c24b5b 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -35,6 +35,8 @@
 };
 
 struct gen_nvm {
+	struct nvm_dev *dev;
+
 	int nr_luns;
 	struct gen_lun *luns;
 };
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 7ba64c8..75e59c3 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -123,12 +123,42 @@
 	return blk->id * rrpc->dev->pgs_per_blk;
 }
 
+static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, dev->sec_per_pg);
+	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, dev->pgs_per_blk);
+	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, dev->blks_per_lun);
+	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, dev->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
 static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
 {
 	struct ppa_addr paddr;
 
 	paddr.ppa = addr;
-	return __linear_to_generic_addr(dev, paddr);
+	return linear_to_generic_addr(dev, paddr);
 }
 
 /* requires lun->lock taken */
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 917d47e..3147c8d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -112,7 +112,8 @@
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
-	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
+	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
+	     DM_CRYPT_EXIT_THREAD};
 
 /*
  * The fields in here must be read only after initialization.
@@ -1203,20 +1204,18 @@
 		if (!RB_EMPTY_ROOT(&cc->write_tree))
 			goto pop_from_list;
 
+		if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
+			spin_unlock_irq(&cc->write_thread_wait.lock);
+			break;
+		}
+
 		__set_current_state(TASK_INTERRUPTIBLE);
 		__add_wait_queue(&cc->write_thread_wait, &wait);
 
 		spin_unlock_irq(&cc->write_thread_wait.lock);
 
-		if (unlikely(kthread_should_stop())) {
-			set_task_state(current, TASK_RUNNING);
-			remove_wait_queue(&cc->write_thread_wait, &wait);
-			break;
-		}
-
 		schedule();
 
-		set_task_state(current, TASK_RUNNING);
 		spin_lock_irq(&cc->write_thread_wait.lock);
 		__remove_wait_queue(&cc->write_thread_wait, &wait);
 		goto continue_locked;
@@ -1531,8 +1530,13 @@
 	if (!cc)
 		return;
 
-	if (cc->write_thread)
+	if (cc->write_thread) {
+		spin_lock_irq(&cc->write_thread_wait.lock);
+		set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
+		wake_up_locked(&cc->write_thread_wait);
+		spin_unlock_irq(&cc->write_thread_wait.lock);
 		kthread_stop(cc->write_thread);
+	}
 
 	if (cc->io_queue)
 		destroy_workqueue(cc->io_queue);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aaa6caa..cfa29f5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1537,32 +1537,34 @@
 		struct block_device **bdev, fmode_t *mode)
 {
 	struct multipath *m = ti->private;
-	struct pgpath *pgpath;
 	unsigned long flags;
 	int r;
 
-	r = 0;
-
 	spin_lock_irqsave(&m->lock, flags);
 
 	if (!m->current_pgpath)
 		__choose_pgpath(m, 0);
 
-	pgpath = m->current_pgpath;
-
-	if (pgpath) {
-		*bdev = pgpath->path.dev->bdev;
-		*mode = pgpath->path.dev->mode;
+	if (m->current_pgpath) {
+		if (!m->queue_io) {
+			*bdev = m->current_pgpath->path.dev->bdev;
+			*mode = m->current_pgpath->path.dev->mode;
+			r = 0;
+		} else {
+			/* pg_init has not started or completed */
+			r = -ENOTCONN;
+		}
+	} else {
+		/* No path is available */
+		if (m->queue_if_no_path)
+			r = -ENOTCONN;
+		else
+			r = -EIO;
 	}
 
-	if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path))
-		r = -ENOTCONN;
-	else if (!*bdev)
-		r = -EIO;
-
 	spin_unlock_irqrestore(&m->lock, flags);
 
-	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
+	if (r == -ENOTCONN) {
 		spin_lock_irqsave(&m->lock, flags);
 		if (!m->current_pg) {
 			/* Path status changed, redo selection */
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 3897b90..63903a5 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2432,6 +2432,7 @@
 	case PM_WRITE:
 		if (old_mode != new_mode)
 			notify_of_pool_mode_change(pool, "write");
+		pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
 		dm_pool_metadata_read_write(pool->pmd);
 		pool->process_bio = process_bio;
 		pool->process_discard = process_discard_bio;
@@ -4249,10 +4250,9 @@
 {
 	struct thin_c *tc = ti->private;
 	struct pool *pool = tc->pool;
-	struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md);
 
-	if (!pool_limits->discard_granularity)
-		return; /* pool's discard support is disabled */
+	if (!pool->pf.discard_enabled)
+		return;
 
 	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
 	limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6e15f35..5df4048 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -591,7 +591,7 @@
 
 out:
 	dm_put_live_table(md, *srcu_idx);
-	if (r == -ENOTCONN) {
+	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
 		msleep(10);
 		goto retry;
 	}
@@ -603,9 +603,10 @@
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	struct dm_target *tgt;
+	struct block_device *tgt_bdev = NULL;
 	int srcu_idx, r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
 	if (r < 0)
 		return r;
 
@@ -620,7 +621,7 @@
 			goto out;
 	}
 
-	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+	r =  __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
 out:
 	dm_put_live_table(md, srcu_idx);
 	return r;
diff --git a/drivers/media/pci/cx23885/cx23885-core.c b/drivers/media/pci/cx23885/cx23885-core.c
index 35759a9..e8f8472 100644
--- a/drivers/media/pci/cx23885/cx23885-core.c
+++ b/drivers/media/pci/cx23885/cx23885-core.c
@@ -1992,9 +1992,9 @@
 		(unsigned long long)pci_resource_start(pci_dev, 0));
 
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+	err = pci_set_dma_mask(pci_dev, 0xffffffff);
+	if (err) {
 		printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
-		err = -EIO;
 		goto fail_context;
 	}
 
diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c
index dbc695f..0042803 100644
--- a/drivers/media/pci/cx25821/cx25821-core.c
+++ b/drivers/media/pci/cx25821/cx25821-core.c
@@ -1319,7 +1319,8 @@
 		dev->pci_lat, (unsigned long long)dev->base_io_addr);
 
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+	err = pci_set_dma_mask(pci_dev, 0xffffffff);
+	if (err) {
 		pr_err("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
 		err = -EIO;
 		goto fail_irq;
diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c
index 0ed1b65..1b5268f 100644
--- a/drivers/media/pci/cx88/cx88-alsa.c
+++ b/drivers/media/pci/cx88/cx88-alsa.c
@@ -890,9 +890,9 @@
 		return err;
 	}
 
-	if (!pci_set_dma_mask(pci,DMA_BIT_MASK(32))) {
+	err = pci_set_dma_mask(pci,DMA_BIT_MASK(32));
+	if (err) {
 		dprintk(0, "%s/1: Oops: no 32bit PCI DMA ???\n",core->name);
-		err = -EIO;
 		cx88_core_put(core, pci);
 		return err;
 	}
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index 9db7767..f34c229 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -393,7 +393,8 @@
 	if (pci_enable_device(dev->pci))
 		return -EIO;
 	pci_set_master(dev->pci);
-	if (!pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32))) {
+	err = pci_set_dma_mask(dev->pci,DMA_BIT_MASK(32));
+	if (err) {
 		printk("%s/2: Oops: no 32bit PCI DMA ???\n",dev->core->name);
 		return -EIO;
 	}
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index 0de1ad5..aef9acf 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -1314,9 +1314,9 @@
 	       dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0));
 
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32))) {
+	err = pci_set_dma_mask(pci_dev,DMA_BIT_MASK(32));
+	if (err) {
 		printk("%s/0: Oops: no 32bit PCI DMA ???\n",core->name);
-		err = -EIO;
 		goto fail_core;
 	}
 	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
index 60b2d46..3fdbd81 100644
--- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
+++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c
@@ -810,7 +810,7 @@
 		"%s(): board vendor 0x%x, revision 0x%x\n",
 		__func__, board_vendor, board_revision);
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+	if (pci_set_dma_mask(pci_dev, 0xffffffff) < 0) {
 		dev_err(&pci_dev->dev,
 			"%s(): 32bit PCI DMA is not supported\n", __func__);
 		goto pci_detect_err;
diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index e79d63e..f720cea 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -951,9 +951,9 @@
 	       pci_name(pci_dev), dev->pci_rev, pci_dev->irq,
 	       dev->pci_lat,(unsigned long long)pci_resource_start(pci_dev,0));
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+	err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
+	if (err) {
 		pr_warn("%s: Oops: no 32bit PCI DMA ???\n", dev->name);
-		err = -EIO;
 		goto fail1;
 	}
 
diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c
index 8f36b48..8bbd092 100644
--- a/drivers/media/pci/saa7164/saa7164-core.c
+++ b/drivers/media/pci/saa7164/saa7164-core.c
@@ -1264,9 +1264,9 @@
 
 	pci_set_master(pci_dev);
 	/* TODO */
-	if (!pci_set_dma_mask(pci_dev, 0xffffffff)) {
+	err = pci_set_dma_mask(pci_dev, 0xffffffff);
+	if (err) {
 		printk("%s/0: Oops: no 32bit PCI DMA ???\n", dev->name);
-		err = -EIO;
 		goto fail_irq;
 	}
 
diff --git a/drivers/media/pci/tw68/tw68-core.c b/drivers/media/pci/tw68/tw68-core.c
index 8c5655d..4e77618 100644
--- a/drivers/media/pci/tw68/tw68-core.c
+++ b/drivers/media/pci/tw68/tw68-core.c
@@ -257,9 +257,9 @@
 		dev->name, pci_name(pci_dev), dev->pci_rev, pci_dev->irq,
 		dev->pci_lat, (u64)pci_resource_start(pci_dev, 0));
 	pci_set_master(pci_dev);
-	if (!pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32))) {
+	err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
+	if (err) {
 		pr_info("%s: Oops: no 32bit PCI DMA ???\n", dev->name);
-		err = -EIO;
 		goto fail1;
 	}
 
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 23b6c8e..d848616 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -65,8 +65,7 @@
 #define MMC_SANITIZE_REQ_TIMEOUT 240000
 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
 
-#define mmc_req_rel_wr(req)	(((req->cmd_flags & REQ_FUA) || \
-				  (req->cmd_flags & REQ_META)) && \
+#define mmc_req_rel_wr(req)	((req->cmd_flags & REQ_FUA) && \
 				  (rq_data_dir(req) == WRITE))
 #define PACKED_CMD_VER	0x01
 #define PACKED_CMD_WR	0x02
@@ -1467,13 +1466,9 @@
 
 	/*
 	 * Reliable writes are used to implement Forced Unit Access and
-	 * REQ_META accesses, and are supported only on MMCs.
-	 *
-	 * XXX: this really needs a good explanation of why REQ_META
-	 * is treated special.
+	 * are supported only on MMCs.
 	 */
-	bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
-			  (req->cmd_flags & REQ_META)) &&
+	bool do_rel_wr = (req->cmd_flags & REQ_FUA) &&
 		(rq_data_dir(req) == WRITE) &&
 		(md->flags & MMC_BLK_REL_WR);
 
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index c793fda..3a9a79e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1040,71 +1040,6 @@
 	return err;
 }
 
-static int mmc_select_hs400(struct mmc_card *card)
-{
-	struct mmc_host *host = card->host;
-	int err = 0;
-	u8 val;
-
-	/*
-	 * HS400 mode requires 8-bit bus width
-	 */
-	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
-	      host->ios.bus_width == MMC_BUS_WIDTH_8))
-		return 0;
-
-	/*
-	 * Before switching to dual data rate operation for HS400,
-	 * it is required to convert from HS200 mode to HS mode.
-	 */
-	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-	mmc_set_bus_speed(card);
-
-	val = EXT_CSD_TIMING_HS |
-	      card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
-	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			   EXT_CSD_HS_TIMING, val,
-			   card->ext_csd.generic_cmd6_time,
-			   true, true, true);
-	if (err) {
-		pr_err("%s: switch to high-speed from hs200 failed, err:%d\n",
-			mmc_hostname(host), err);
-		return err;
-	}
-
-	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			 EXT_CSD_BUS_WIDTH,
-			 EXT_CSD_DDR_BUS_WIDTH_8,
-			 card->ext_csd.generic_cmd6_time);
-	if (err) {
-		pr_err("%s: switch to bus width for hs400 failed, err:%d\n",
-			mmc_hostname(host), err);
-		return err;
-	}
-
-	val = EXT_CSD_TIMING_HS400 |
-	      card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
-	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			   EXT_CSD_HS_TIMING, val,
-			   card->ext_csd.generic_cmd6_time,
-			   true, true, true);
-	if (err) {
-		pr_err("%s: switch to hs400 failed, err:%d\n",
-			 mmc_hostname(host), err);
-		return err;
-	}
-
-	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
-	mmc_set_bus_speed(card);
-
-	return 0;
-}
-
-int mmc_hs200_to_hs400(struct mmc_card *card)
-{
-	return mmc_select_hs400(card);
-}
-
 /* Caller must hold re-tuning */
 static int mmc_switch_status(struct mmc_card *card)
 {
@@ -1118,6 +1053,97 @@
 	return mmc_switch_status_error(card->host, status);
 }
 
+static int mmc_select_hs400(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	bool send_status = true;
+	unsigned int max_dtr;
+	int err = 0;
+	u8 val;
+
+	/*
+	 * HS400 mode requires 8-bit bus width
+	 */
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	      host->ios.bus_width == MMC_BUS_WIDTH_8))
+		return 0;
+
+	if (host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+		send_status = false;
+
+	/* Reduce frequency to HS frequency */
+	max_dtr = card->ext_csd.hs_max_dtr;
+	mmc_set_clock(host, max_dtr);
+
+	/* Switch card to HS mode */
+	val = EXT_CSD_TIMING_HS |
+	      card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, val,
+			   card->ext_csd.generic_cmd6_time,
+			   true, send_status, true);
+	if (err) {
+		pr_err("%s: switch to high-speed from hs200 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	/* Set host controller to HS timing */
+	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+	if (!send_status) {
+		err = mmc_switch_status(card);
+		if (err)
+			goto out_err;
+	}
+
+	/* Switch card to DDR */
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_BUS_WIDTH,
+			 EXT_CSD_DDR_BUS_WIDTH_8,
+			 card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_err("%s: switch to bus width for hs400 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	/* Switch card to HS400 */
+	val = EXT_CSD_TIMING_HS400 |
+	      card->drive_strength << EXT_CSD_DRV_STR_SHIFT;
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, val,
+			   card->ext_csd.generic_cmd6_time,
+			   true, send_status, true);
+	if (err) {
+		pr_err("%s: switch to hs400 failed, err:%d\n",
+			 mmc_hostname(host), err);
+		return err;
+	}
+
+	/* Set host controller to HS400 timing and frequency */
+	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
+	mmc_set_bus_speed(card);
+
+	if (!send_status) {
+		err = mmc_switch_status(card);
+		if (err)
+			goto out_err;
+	}
+
+	return 0;
+
+out_err:
+	pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
+	       __func__, err);
+	return err;
+}
+
+int mmc_hs200_to_hs400(struct mmc_card *card)
+{
+	return mmc_select_hs400(card);
+}
+
 int mmc_hs400_to_hs200(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
@@ -1219,6 +1245,8 @@
 static int mmc_select_hs200(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
+	bool send_status = true;
+	unsigned int old_timing;
 	int err = -EINVAL;
 	u8 val;
 
@@ -1234,6 +1262,9 @@
 
 	mmc_select_driver_type(card);
 
+	if (host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+		send_status = false;
+
 	/*
 	 * Set the bus width(4 or 8) with host's support and
 	 * switch to HS200 mode if bus width is set successfully.
@@ -1245,11 +1276,25 @@
 		err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				   EXT_CSD_HS_TIMING, val,
 				   card->ext_csd.generic_cmd6_time,
-				   true, true, true);
-		if (!err)
-			mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+				   true, send_status, true);
+		if (err)
+			goto err;
+		old_timing = host->ios.timing;
+		mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+		if (!send_status) {
+			err = mmc_switch_status(card);
+			/*
+			 * mmc_select_timing() assumes timing has not changed if
+			 * it is a switch error.
+			 */
+			if (err == -EBADMSG)
+				mmc_set_timing(host, old_timing);
+		}
 	}
 err:
+	if (err)
+		pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
+		       __func__, err);
 	return err;
 }
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index af71de5..1dee533 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -473,6 +473,7 @@
 
 config MMC_GOLDFISH
 	tristate "goldfish qemu Multimedia Card Interface support"
+	depends on HAS_DMA
 	depends on GOLDFISH || COMPILE_TEST
 	help
 	  This selects the Goldfish Multimedia card Interface emulation
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 39568cc..33dfd7e 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1276,7 +1276,7 @@
 	int start = 0, len = 0;
 	int start_final = 0, len_final = 0;
 	u8 final_phase = 0xff;
-	struct msdc_delay_phase delay_phase;
+	struct msdc_delay_phase delay_phase = { 0, };
 
 	if (delay == 0) {
 		dev_err(host->dev, "phase error: [map:%x]\n", delay);
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 8cadd74..ce08896 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -805,7 +805,7 @@
 		goto out;
 	} else {
 		mmc->caps |= host->pdata->gpio_card_ro_invert ?
-			MMC_CAP2_RO_ACTIVE_HIGH : 0;
+			0 : MMC_CAP2_RO_ACTIVE_HIGH;
 	}
 
 	if (gpio_is_valid(gpio_cd))
diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index dc4e844..5a99a93 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c
@@ -25,6 +25,7 @@
 
 #include <linux/gpio.h>
 
+#include <asm/mach-jz4740/gpio.h>
 #include <asm/mach-jz4740/jz4740_nand.h>
 
 #define JZ_REG_NAND_CTRL	0x50
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index cc74142..ece544e 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3110,7 +3110,7 @@
  */
 static void nand_shutdown(struct mtd_info *mtd)
 {
-	nand_get_device(mtd, FL_SHUTDOWN);
+	nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
 /* Set default functions */
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 57dadd5..1deb8ff 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -501,8 +501,6 @@
 			cf->data[2] |= CAN_ERR_PROT_FORM;
 		else if (status & SER)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
-		else
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 	}
 
 	priv->can.state = state;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 5d214d1..f91b094 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -962,7 +962,6 @@
 	 * type of the last error to occur on the CAN bus
 	 */
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-	cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 
 	switch (lec_type) {
 	case LEC_STUFF_ERROR:
@@ -975,8 +974,7 @@
 		break;
 	case LEC_ACK_ERROR:
 		netdev_dbg(dev, "ack error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
-				CAN_ERR_PROT_LOC_ACK_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		break;
 	case LEC_BIT1_ERROR:
 		netdev_dbg(dev, "bit1 error\n");
@@ -988,8 +986,7 @@
 		break;
 	case LEC_CRC_ERROR:
 		netdev_dbg(dev, "CRC error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-				CAN_ERR_PROT_LOC_CRC_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		break;
 	default:
 		break;
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 70a8cbb..1e37313 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -578,7 +578,7 @@
 				cf->data[2] |= CAN_ERR_PROT_BIT0;
 				break;
 			case STAT_LEC_CRC:
-				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 				break;
 			}
 		}
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 868fe94..41c0fc9 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -535,13 +535,13 @@
 	if (reg_esr & FLEXCAN_ESR_ACK_ERR) {
 		netdev_dbg(dev, "ACK_ERR irq\n");
 		cf->can_id |= CAN_ERR_ACK;
-		cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		tx_errors = 1;
 	}
 	if (reg_esr & FLEXCAN_ESR_CRC_ERR) {
 		netdev_dbg(dev, "CRC_ERR irq\n");
 		cf->data[2] |= CAN_ERR_PROT_BIT;
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		rx_errors = 1;
 	}
 	if (reg_esr & FLEXCAN_ESR_FRM_ERR) {
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index c1e8536..5d04f54 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1096,7 +1096,6 @@
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index ef65517..39cf911 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -487,7 +487,6 @@
 	 * type of the last error to occur on the CAN bus
 	 */
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-	cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 
 	switch (lec_type) {
 	case LEC_STUFF_ERROR:
@@ -500,8 +499,7 @@
 		break;
 	case LEC_ACK_ERROR:
 		netdev_dbg(dev, "ack error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
-				CAN_ERR_PROT_LOC_ACK_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		break;
 	case LEC_BIT1_ERROR:
 		netdev_dbg(dev, "bit1 error\n");
@@ -513,8 +511,7 @@
 		break;
 	case LEC_CRC_ERROR:
 		netdev_dbg(dev, "CRC error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-				CAN_ERR_PROT_LOC_CRC_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		break;
 	default:
 		break;
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index e187ca7..c131788 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -559,8 +559,7 @@
 		stats->rx_errors++;
 		break;
 	case PCH_CRC_ERR:
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-			       CAN_ERR_PROT_LOC_CRC_DEL;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		priv->can.can_stats.bus_error++;
 		stats->rx_errors++;
 		break;
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
index 7bd5419..bc46be3 100644
--- a/drivers/net/can/rcar_can.c
+++ b/drivers/net/can/rcar_can.c
@@ -241,17 +241,16 @@
 		u8 ecsr;
 
 		netdev_dbg(priv->ndev, "Bus error interrupt:\n");
-		if (skb) {
+		if (skb)
 			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
-			cf->data[2] = CAN_ERR_PROT_UNSPEC;
-		}
+
 		ecsr = readb(&priv->regs->ecsr);
 		if (ecsr & RCAR_CAN_ECSR_ADEF) {
 			netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
 			tx_errors++;
 			writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
 			if (skb)
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK_DEL;
 		}
 		if (ecsr & RCAR_CAN_ECSR_BE0F) {
 			netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
@@ -272,7 +271,7 @@
 			rx_errors++;
 			writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
 			if (skb)
-				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		}
 		if (ecsr & RCAR_CAN_ECSR_AEF) {
 			netdev_dbg(priv->ndev, "ACK Error\n");
@@ -280,7 +279,7 @@
 			writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
 			if (skb) {
 				cf->can_id |= CAN_ERR_ACK;
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			}
 		}
 		if (ecsr & RCAR_CAN_ECSR_FEF) {
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 7b92e91..8dda3b7 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -218,6 +218,9 @@
 	priv->write_reg(priv, SJA1000_RXERR, 0x0);
 	priv->read_reg(priv, SJA1000_ECC);
 
+	/* clear interrupt flags */
+	priv->read_reg(priv, SJA1000_IR);
+
 	/* leave reset mode */
 	set_normal_mode(dev);
 }
@@ -446,7 +449,6 @@
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index d9a42c6..68ef0a4 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -575,7 +575,6 @@
 				cf->data[2] |= CAN_ERR_PROT_STUFF;
 				break;
 			default:
-				cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 				cf->data[3] = (ecc & SUN4I_STA_ERR_SEG_CODE)
 					       >> 16;
 				break;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index cf345cb..680d1ff 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -722,7 +722,6 @@
 	if (err_status & HECC_BUS_ERROR) {
 		++priv->can.can_stats.bus_error;
 		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
-		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 		if (err_status & HECC_CANES_FE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
 			cf->data[2] |= CAN_ERR_PROT_FORM;
@@ -737,13 +736,11 @@
 		}
 		if (err_status & HECC_CANES_CRCE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
-			cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-					CAN_ERR_PROT_LOC_CRC_DEL;
+			cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		}
 		if (err_status & HECC_CANES_ACKE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
-			cf->data[3] |= CAN_ERR_PROT_LOC_ACK |
-					CAN_ERR_PROT_LOC_ACK_DEL;
+			cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		}
 	}
 
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 2d39038..fc5b756 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -377,7 +377,6 @@
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & SJA1000_ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 0e5a449..113e64f 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -282,7 +282,6 @@
 				cf->data[2] |= CAN_ERR_PROT_STUFF;
 				break;
 			default:
-				cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 				cf->data[3] = ecc & SJA1000_ECC_SEG;
 				break;
 			}
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 8b17a90..022bfa1 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -944,10 +944,9 @@
 			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
 
 			if (es->leaf.error_factor & M16C_EF_ACKE)
-				cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			if (es->leaf.error_factor & M16C_EF_CRCE)
-				cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-						CAN_ERR_PROT_LOC_CRC_DEL);
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 			if (es->leaf.error_factor & M16C_EF_FORME)
 				cf->data[2] |= CAN_ERR_PROT_FORM;
 			if (es->leaf.error_factor & M16C_EF_STFE)
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index de95b1c..a731720 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -401,9 +401,7 @@
 		tx_errors = 1;
 		break;
 	case USB_8DEV_STATUSMSG_CRC:
-		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-			       CAN_ERR_PROT_LOC_CRC_DEL;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		rx_errors = 1;
 		break;
 	case USB_8DEV_STATUSMSG_BIT0:
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index fc55e8e..51670b3 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -608,17 +608,15 @@
 
 	/* Check for error interrupt */
 	if (isr & XCAN_IXR_ERROR_MASK) {
-		if (skb) {
+		if (skb)
 			cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
-		}
 
 		/* Check for Ack error interrupt */
 		if (err_status & XCAN_ESR_ACKER_MASK) {
 			stats->tx_errors++;
 			if (skb) {
 				cf->can_id |= CAN_ERR_ACK;
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			}
 		}
 
@@ -654,8 +652,7 @@
 			stats->rx_errors++;
 			if (skb) {
 				cf->can_id |= CAN_ERR_PROT;
-				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ |
-						CAN_ERR_PROT_LOC_CRC_DEL;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 			}
 		}
 			priv->can.can_stats.bus_error++;
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 9093577..0527f48 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -15,9 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/phy.h>
 #include <net/dsa.h>
-
-#define REG_PORT(p)		(8 + (p))
-#define REG_GLOBAL		0x0f
+#include "mv88e6060.h"
 
 static int reg_read(struct dsa_switch *ds, int addr, int reg)
 {
@@ -67,13 +65,14 @@
 	if (bus == NULL)
 		return NULL;
 
-	ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
+	ret = mdiobus_read(bus, sw_addr + REG_PORT(0), PORT_SWITCH_ID);
 	if (ret >= 0) {
-		if (ret == 0x0600)
+		if (ret == PORT_SWITCH_ID_6060)
 			return "Marvell 88E6060 (A0)";
-		if (ret == 0x0601 || ret == 0x0602)
+		if (ret == PORT_SWITCH_ID_6060_R1 ||
+		    ret == PORT_SWITCH_ID_6060_R2)
 			return "Marvell 88E6060 (B0)";
-		if ((ret & 0xfff0) == 0x0600)
+		if ((ret & PORT_SWITCH_ID_6060_MASK) == PORT_SWITCH_ID_6060)
 			return "Marvell 88E6060";
 	}
 
@@ -87,22 +86,26 @@
 	unsigned long timeout;
 
 	/* Set all ports to the disabled state. */
-	for (i = 0; i < 6; i++) {
-		ret = REG_READ(REG_PORT(i), 0x04);
-		REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
+	for (i = 0; i < MV88E6060_PORTS; i++) {
+		ret = REG_READ(REG_PORT(i), PORT_CONTROL);
+		REG_WRITE(REG_PORT(i), PORT_CONTROL,
+			  ret & ~PORT_CONTROL_STATE_MASK);
 	}
 
 	/* Wait for transmit queues to drain. */
 	usleep_range(2000, 4000);
 
 	/* Reset the switch. */
-	REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
+	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+		  GLOBAL_ATU_CONTROL_SWRESET |
+		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = REG_READ(REG_GLOBAL, 0x00);
-		if ((ret & 0x8000) == 0x0000)
+		ret = REG_READ(REG_GLOBAL, GLOBAL_STATUS);
+		if (ret & GLOBAL_STATUS_INIT_READY)
 			break;
 
 		usleep_range(1000, 2000);
@@ -119,13 +122,15 @@
 	 * set the maximum frame size to 1536 bytes, and mask all
 	 * interrupt sources.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
+	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536);
 
 	/* Enable automatic address learning, set the address
 	 * database size to 1024 entries, and set the default aging
 	 * time to 5 minutes.
 	 */
-	REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
+	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
+		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
+		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
 
 	return 0;
 }
@@ -139,25 +144,30 @@
 	 * state to Forwarding.  Additionally, if this is the CPU
 	 * port, enable Ingress and Egress Trailer tagging mode.
 	 */
-	REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ?  0x4103 : 0x0003);
+	REG_WRITE(addr, PORT_CONTROL,
+		  dsa_is_cpu_port(ds, p) ?
+			PORT_CONTROL_TRAILER |
+			PORT_CONTROL_INGRESS_MODE |
+			PORT_CONTROL_STATE_FORWARDING :
+			PORT_CONTROL_STATE_FORWARDING);
 
 	/* Port based VLAN map: give each port its own address
 	 * database, allow the CPU port to talk to each of the 'real'
 	 * ports, and allow each of the 'real' ports to only talk to
 	 * the CPU port.
 	 */
-	REG_WRITE(addr, 0x06,
-			((p & 0xf) << 12) |
-			 (dsa_is_cpu_port(ds, p) ?
-				ds->phys_port_mask :
-				(1 << ds->dst->cpu_port)));
+	REG_WRITE(addr, PORT_VLAN_MAP,
+		  ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) |
+		   (dsa_is_cpu_port(ds, p) ?
+			ds->phys_port_mask :
+			BIT(ds->dst->cpu_port)));
 
 	/* Port Association Vector: when learning source addresses
 	 * of packets, add the address to the address database using
 	 * a port bitmap that has only the bit for this port set and
 	 * the other bits clear.
 	 */
-	REG_WRITE(addr, 0x0b, 1 << p);
+	REG_WRITE(addr, PORT_ASSOC_VECTOR, BIT(p));
 
 	return 0;
 }
@@ -177,7 +187,7 @@
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < 6; i++) {
+	for (i = 0; i < MV88E6060_PORTS; i++) {
 		ret = mv88e6060_setup_port(ds, i);
 		if (ret < 0)
 			return ret;
@@ -188,16 +198,17 @@
 
 static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
 {
-	REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
-	REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
-	REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
+	/* Use the same MAC Address as FD Pause frames for all ports */
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, (addr[0] << 9) | addr[1]);
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
+	REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
 
 	return 0;
 }
 
 static int mv88e6060_port_to_phy_addr(int port)
 {
-	if (port >= 0 && port <= 5)
+	if (port >= 0 && port < MV88E6060_PORTS)
 		return port;
 	return -1;
 }
@@ -225,54 +236,6 @@
 	return reg_write(ds, addr, regnum, val);
 }
 
-static void mv88e6060_poll_link(struct dsa_switch *ds)
-{
-	int i;
-
-	for (i = 0; i < DSA_MAX_PORTS; i++) {
-		struct net_device *dev;
-		int uninitialized_var(port_status);
-		int link;
-		int speed;
-		int duplex;
-		int fc;
-
-		dev = ds->ports[i];
-		if (dev == NULL)
-			continue;
-
-		link = 0;
-		if (dev->flags & IFF_UP) {
-			port_status = reg_read(ds, REG_PORT(i), 0x00);
-			if (port_status < 0)
-				continue;
-
-			link = !!(port_status & 0x1000);
-		}
-
-		if (!link) {
-			if (netif_carrier_ok(dev)) {
-				netdev_info(dev, "link down\n");
-				netif_carrier_off(dev);
-			}
-			continue;
-		}
-
-		speed = (port_status & 0x0100) ? 100 : 10;
-		duplex = (port_status & 0x0200) ? 1 : 0;
-		fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
-
-		if (!netif_carrier_ok(dev)) {
-			netdev_info(dev,
-				    "link up, %d Mb/s, %s duplex, flow control %sabled\n",
-				    speed,
-				    duplex ? "full" : "half",
-				    fc ? "en" : "dis");
-			netif_carrier_on(dev);
-		}
-	}
-}
-
 static struct dsa_switch_driver mv88e6060_switch_driver = {
 	.tag_protocol	= DSA_TAG_PROTO_TRAILER,
 	.probe		= mv88e6060_probe,
@@ -280,7 +243,6 @@
 	.set_addr	= mv88e6060_set_addr,
 	.phy_read	= mv88e6060_phy_read,
 	.phy_write	= mv88e6060_phy_write,
-	.poll_link	= mv88e6060_poll_link,
 };
 
 static int __init mv88e6060_init(void)
diff --git a/drivers/net/dsa/mv88e6060.h b/drivers/net/dsa/mv88e6060.h
new file mode 100644
index 0000000..cc9b2ed
--- /dev/null
+++ b/drivers/net/dsa/mv88e6060.h
@@ -0,0 +1,111 @@
+/*
+ * drivers/net/dsa/mv88e6060.h - Marvell 88e6060 switch chip support
+ * Copyright (c) 2015 Neil Armstrong
+ *
+ * Based on mv88e6xxx.h
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __MV88E6060_H
+#define __MV88E6060_H
+
+#define MV88E6060_PORTS	6
+
+#define REG_PORT(p)		(0x8 + (p))
+#define PORT_STATUS		0x00
+#define PORT_STATUS_PAUSE_EN	BIT(15)
+#define PORT_STATUS_MY_PAUSE	BIT(14)
+#define PORT_STATUS_FC		(PORT_STATUS_MY_PAUSE | PORT_STATUS_PAUSE_EN)
+#define PORT_STATUS_RESOLVED	BIT(13)
+#define PORT_STATUS_LINK	BIT(12)
+#define PORT_STATUS_PORTMODE	BIT(11)
+#define PORT_STATUS_PHYMODE	BIT(10)
+#define PORT_STATUS_DUPLEX	BIT(9)
+#define PORT_STATUS_SPEED	BIT(8)
+#define PORT_SWITCH_ID		0x03
+#define PORT_SWITCH_ID_6060	0x0600
+#define PORT_SWITCH_ID_6060_MASK	0xfff0
+#define PORT_SWITCH_ID_6060_R1	0x0601
+#define PORT_SWITCH_ID_6060_R2	0x0602
+#define PORT_CONTROL		0x04
+#define PORT_CONTROL_FORCE_FLOW_CTRL	BIT(15)
+#define PORT_CONTROL_TRAILER	BIT(14)
+#define PORT_CONTROL_HEADER	BIT(11)
+#define PORT_CONTROL_INGRESS_MODE	BIT(8)
+#define PORT_CONTROL_VLAN_TUNNEL	BIT(7)
+#define PORT_CONTROL_STATE_MASK	0x03
+#define PORT_CONTROL_STATE_DISABLED	0x00
+#define PORT_CONTROL_STATE_BLOCKING	0x01
+#define PORT_CONTROL_STATE_LEARNING	0x02
+#define PORT_CONTROL_STATE_FORWARDING	0x03
+#define PORT_VLAN_MAP		0x06
+#define PORT_VLAN_MAP_DBNUM_SHIFT	12
+#define PORT_VLAN_MAP_TABLE_MASK	0x1f
+#define PORT_ASSOC_VECTOR	0x0b
+#define PORT_ASSOC_VECTOR_MONITOR	BIT(15)
+#define PORT_ASSOC_VECTOR_PAV_MASK	0x1f
+#define PORT_RX_CNTR		0x10
+#define PORT_TX_CNTR		0x11
+
+#define REG_GLOBAL		0x0f
+#define GLOBAL_STATUS		0x00
+#define GLOBAL_STATUS_SW_MODE_MASK	(0x3 << 12)
+#define GLOBAL_STATUS_SW_MODE_0	(0x0 << 12)
+#define GLOBAL_STATUS_SW_MODE_1	(0x1 << 12)
+#define GLOBAL_STATUS_SW_MODE_2	(0x2 << 12)
+#define GLOBAL_STATUS_SW_MODE_3	(0x3 << 12)
+#define GLOBAL_STATUS_INIT_READY	BIT(11)
+#define GLOBAL_STATUS_ATU_FULL		BIT(3)
+#define GLOBAL_STATUS_ATU_DONE		BIT(2)
+#define GLOBAL_STATUS_PHY_INT	BIT(1)
+#define GLOBAL_STATUS_EEINT	BIT(0)
+#define GLOBAL_MAC_01		0x01
+#define GLOBAL_MAC_01_DIFF_ADDR	BIT(8)
+#define GLOBAL_MAC_23		0x02
+#define GLOBAL_MAC_45		0x03
+#define GLOBAL_CONTROL		0x04
+#define GLOBAL_CONTROL_DISCARD_EXCESS	BIT(13)
+#define GLOBAL_CONTROL_MAX_FRAME_1536	BIT(10)
+#define GLOBAL_CONTROL_RELOAD_EEPROM	BIT(9)
+#define GLOBAL_CONTROL_CTRMODE		BIT(8)
+#define GLOBAL_CONTROL_ATU_FULL_EN	BIT(3)
+#define GLOBAL_CONTROL_ATU_DONE_EN	BIT(2)
+#define GLOBAL_CONTROL_PHYINT_EN	BIT(1)
+#define GLOBAL_CONTROL_EEPROM_DONE_EN	BIT(0)
+#define GLOBAL_ATU_CONTROL	0x0a
+#define GLOBAL_ATU_CONTROL_SWRESET	BIT(15)
+#define GLOBAL_ATU_CONTROL_LEARNDIS	BIT(14)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_256	(0x0 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_512	(0x1 << 12)
+#define GLOBAL_ATU_CONTROL_ATUSIZE_1024	(0x2 << 12)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_SHIFT	4
+#define GLOBAL_ATU_CONTROL_ATE_AGE_MASK	(0xff << 4)
+#define GLOBAL_ATU_CONTROL_ATE_AGE_5MIN	(0x13 << 4)
+#define GLOBAL_ATU_OP		0x0b
+#define GLOBAL_ATU_OP_BUSY	BIT(15)
+#define GLOBAL_ATU_OP_NOP		(0 << 12)
+#define GLOBAL_ATU_OP_FLUSH_ALL	((1 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED	((2 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_LOAD_DB		((3 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_GET_NEXT_DB	((4 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_DB		((5 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_OP_FLUSH_UNLOCKED_DB ((6 << 12) | GLOBAL_ATU_OP_BUSY)
+#define GLOBAL_ATU_DATA		0x0c
+#define GLOBAL_ATU_DATA_PORT_VECTOR_MASK	0x3f0
+#define GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT	4
+#define GLOBAL_ATU_DATA_STATE_MASK		0x0f
+#define GLOBAL_ATU_DATA_STATE_UNUSED		0x00
+#define GLOBAL_ATU_DATA_STATE_UC_STATIC		0x0e
+#define GLOBAL_ATU_DATA_STATE_UC_LOCKED		0x0f
+#define GLOBAL_ATU_DATA_STATE_MC_STATIC		0x07
+#define GLOBAL_ATU_DATA_STATE_MC_LOCKED		0x0e
+#define GLOBAL_ATU_MAC_01	0x0d
+#define GLOBAL_ATU_MAC_23	0x0e
+#define GLOBAL_ATU_MAC_45	0x0f
+
+#endif
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 05aa759..31c5e47 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -29,6 +29,7 @@
 source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
+source "drivers/net/ethernet/aurora/Kconfig"
 source "drivers/net/ethernet/cadence/Kconfig"
 source "drivers/net/ethernet/adi/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
@@ -78,7 +79,6 @@
 source "drivers/net/ethernet/intel/Kconfig"
 source "drivers/net/ethernet/i825xx/Kconfig"
 source "drivers/net/ethernet/xscale/Kconfig"
-source "drivers/net/ethernet/icplus/Kconfig"
 
 config JME
 	tristate "JMicron(R) PCI-Express Gigabit Ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index ddfc808..071f84e 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -15,6 +15,7 @@
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
+obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
 obj-$(CONFIG_NET_CADENCE) += cadence/
 obj-$(CONFIG_NET_BFIN) += adi/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
@@ -41,7 +42,6 @@
 obj-$(CONFIG_NET_VENDOR_INTEL) += intel/
 obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/
 obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/
-obj-$(CONFIG_IP1000) += icplus/
 obj-$(CONFIG_JME) += jme.o
 obj-$(CONFIG_KORINA) += korina.o
 obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e2afabf..7ccebae 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1500,10 +1500,11 @@
 		return -ENODEV;
 	}
 
-	if (!pci_set_dma_mask(pdev, PCNET32_DMA_MASK)) {
+	err = pci_set_dma_mask(pdev, PCNET32_DMA_MASK);
+	if (err) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
 			pr_err("architecture does not support 32bit PCI busmaster DMA\n");
-		return -ENODEV;
+		return err;
 	}
 	if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) {
 		if (pcnet32_debug & NETIF_MSG_PROBE)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 991412c..9147a01 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -450,12 +450,12 @@
 		return NETDEV_TX_OK;
 	}
 
-	pdata->ring_ops->wr_cmd(tx_ring, count);
 	skb_tx_timestamp(skb);
 
 	pdata->stats.tx_packets++;
 	pdata->stats.tx_bytes += skb->len;
 
+	pdata->ring_ops->wr_cmd(tx_ring, count);
 	return NETDEV_TX_OK;
 }
 
@@ -688,10 +688,10 @@
 	mac_ops->tx_enable(pdata);
 	mac_ops->rx_enable(pdata);
 
+	xgene_enet_napi_enable(pdata);
 	ret = xgene_enet_register_irq(ndev);
 	if (ret)
 		return ret;
-	xgene_enet_napi_enable(pdata);
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
 		phy_start(pdata->phy_dev);
@@ -715,13 +715,13 @@
 	else
 		cancel_delayed_work_sync(&pdata->link_work);
 
-	xgene_enet_napi_disable(pdata);
-	xgene_enet_free_irq(ndev);
-	xgene_enet_process_ring(pdata->rx_ring, -1);
-
 	mac_ops->tx_disable(pdata);
 	mac_ops->rx_disable(pdata);
 
+	xgene_enet_free_irq(ndev);
+	xgene_enet_napi_disable(pdata);
+	xgene_enet_process_ring(pdata->rx_ring, -1);
+
 	return 0;
 }
 
@@ -1474,30 +1474,33 @@
 	}
 	ndev->hw_features = ndev->features;
 
-	ret = register_netdev(ndev);
-	if (ret) {
-		netdev_err(ndev, "Failed to register netdev\n");
-		goto err;
-	}
-
 	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (ret) {
 		netdev_err(ndev, "No usable DMA configuration\n");
 		goto err;
 	}
 
+	ret = register_netdev(ndev);
+	if (ret) {
+		netdev_err(ndev, "Failed to register netdev\n");
+		goto err;
+	}
+
 	ret = xgene_enet_init_hw(pdata);
 	if (ret)
 		goto err;
 
-	xgene_enet_napi_add(pdata);
 	mac_ops = pdata->mac_ops;
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
 		ret = xgene_enet_mdio_config(pdata);
-	else
+		if (ret)
+			goto err;
+	} else {
 		INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state);
+	}
 
-	return ret;
+	xgene_enet_napi_add(pdata);
+	return 0;
 err:
 	unregister_netdev(ndev);
 	free_netdev(ndev);
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c8af3ce..bd377a6 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1534,6 +1534,8 @@
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
+	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h
index af006b4..0959e68 100644
--- a/drivers/net/ethernet/atheros/alx/reg.h
+++ b/drivers/net/ethernet/atheros/alx/reg.h
@@ -37,6 +37,7 @@
 
 #define ALX_DEV_ID_AR8161				0x1091
 #define ALX_DEV_ID_E2200				0xe091
+#define ALX_DEV_ID_E2400				0xe0a1
 #define ALX_DEV_ID_AR8162				0x1090
 #define ALX_DEV_ID_AR8171				0x10A1
 #define ALX_DEV_ID_AR8172				0x10A0
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
new file mode 100644
index 0000000..a3c7106
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Kconfig
@@ -0,0 +1,20 @@
+config NET_VENDOR_AURORA
+	bool "Aurora VLSI devices"
+	help
+	  If you have a network (Ethernet) device belonging to this class,
+	  say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  questions about Aurora devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_AURORA
+
+config AURORA_NB8800
+	tristate "Aurora AU-NB8800 support"
+	select PHYLIB
+	help
+	 Support for the AU-NB8800 gigabit Ethernet controller.
+
+endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
new file mode 100644
index 0000000..6cb528a
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
new file mode 100644
index 0000000..ecc4a33
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * Mostly rewritten, based on driver from Sigma Designs.  Original
+ * copyright notice below.
+ *
+ *
+ * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
+ *
+ * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/dma-mapping.h>
+#include <linux/phy.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <asm/barrier.h>
+
+#include "nb8800.h"
+
+static void nb8800_tx_done(struct net_device *dev);
+static int nb8800_dma_stop(struct net_device *dev);
+
+static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
+{
+	return readb_relaxed(priv->base + reg);
+}
+
+static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
+{
+	return readl_relaxed(priv->base + reg);
+}
+
+static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
+{
+	writeb_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
+{
+	writew_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
+{
+	writel_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readb(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writeb(priv, reg, new);
+}
+
+static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readl(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writel(priv, reg, new);
+}
+
+static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
+			       bool set)
+{
+	nb8800_maskb(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, 0);
+}
+
+static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
+			       bool set)
+{
+	nb8800_maskl(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, 0);
+}
+
+static int nb8800_mdio_wait(struct mii_bus *bus)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+
+	return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
+					 val, !(val & MDIO_CMD_GO), 1, 1000);
+}
+
+static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
+{
+	struct nb8800_priv *priv = bus->priv;
+	int err;
+
+	err = nb8800_mdio_wait(bus);
+	if (err)
+		return err;
+
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
+	udelay(10);
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
+
+	return nb8800_mdio_wait(bus);
+}
+
+static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+	int err;
+
+	err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
+	if (err)
+		return err;
+
+	val = nb8800_readl(priv, NB8800_MDIO_STS);
+	if (val & MDIO_STS_ERR)
+		return 0xffff;
+
+	return val & 0xffff;
+}
+
+static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
+		MDIO_CMD_DATA(val) | MDIO_CMD_WR;
+
+	return nb8800_mdio_cmd(bus, cmd);
+}
+
+static void nb8800_mac_tx(struct net_device *dev, bool enable)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
+		cpu_relax();
+
+	nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
+}
+
+static void nb8800_mac_rx(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
+}
+
+static void nb8800_mac_af(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
+}
+
+static void nb8800_start_rx(struct net_device *dev)
+{
+	nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
+}
+
+static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
+	int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
+	dma_addr_t dma_addr;
+	struct page *page;
+	unsigned long offset;
+	void *data;
+
+	data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
+	if (!data)
+		return -ENOMEM;
+
+	page = virt_to_head_page(data);
+	offset = data - page_address(page);
+
+	dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
+				DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		skb_free_frag(data);
+		return -ENOMEM;
+	}
+
+	rxb->page = page;
+	rxb->offset = offset;
+	rxd->desc.s_addr = dma_addr;
+
+	return 0;
+}
+
+static void nb8800_receive(struct net_device *dev, unsigned int i,
+			   unsigned int len)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct page *page = priv->rx_bufs[i].page;
+	int offset = priv->rx_bufs[i].offset;
+	void *data = page_address(page) + offset;
+	dma_addr_t dma = rxd->desc.s_addr;
+	struct sk_buff *skb;
+	unsigned int size;
+	int err;
+
+	size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
+
+	skb = napi_alloc_skb(&priv->napi, size);
+	if (!skb) {
+		netdev_err(dev, "rx skb allocation failed\n");
+		dev->stats.rx_dropped++;
+		return;
+	}
+
+	if (len <= RX_COPYBREAK) {
+		dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, len), data, len);
+		dma_sync_single_for_device(&dev->dev, dma, len,
+					   DMA_FROM_DEVICE);
+	} else {
+		err = nb8800_alloc_rx(dev, i, true);
+		if (err) {
+			netdev_err(dev, "rx buffer allocation failed\n");
+			dev->stats.rx_dropped++;
+			return;
+		}
+
+		dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				offset + RX_COPYHDR, len - RX_COPYHDR,
+				RX_BUF_SIZE);
+	}
+
+	skb->protocol = eth_type_trans(skb, dev);
+	napi_gro_receive(&priv->napi, skb);
+}
+
+static void nb8800_rx_error(struct net_device *dev, u32 report)
+{
+	if (report & RX_LENGTH_ERR)
+		dev->stats.rx_length_errors++;
+
+	if (report & RX_FCS_ERR)
+		dev->stats.rx_crc_errors++;
+
+	if (report & RX_FIFO_OVERRUN)
+		dev->stats.rx_fifo_errors++;
+
+	if (report & RX_ALIGNMENT_ERROR)
+		dev->stats.rx_frame_errors++;
+
+	dev->stats.rx_errors++;
+}
+
+static int nb8800_poll(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	unsigned int last = priv->rx_eoc;
+	unsigned int next;
+	int work = 0;
+
+	nb8800_tx_done(dev);
+
+again:
+	while (work < budget) {
+		struct nb8800_rx_buf *rxb;
+		unsigned int len;
+
+		next = (last + 1) % RX_DESC_COUNT;
+
+		rxb = &priv->rx_bufs[next];
+		rxd = &priv->rx_descs[next];
+
+		if (!rxd->report)
+			break;
+
+		len = RX_BYTES_TRANSFERRED(rxd->report);
+
+		if (IS_RX_ERROR(rxd->report))
+			nb8800_rx_error(dev, rxd->report);
+		else
+			nb8800_receive(dev, next, len);
+
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += len;
+
+		if (rxd->report & RX_MULTICAST_PKT)
+			dev->stats.multicast++;
+
+		rxd->report = 0;
+		last = next;
+		work++;
+	}
+
+	if (work) {
+		priv->rx_descs[last].desc.config |= DESC_EOC;
+		wmb();	/* ensure new EOC is written before clearing old */
+		priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
+		priv->rx_eoc = last;
+		nb8800_start_rx(dev);
+	}
+
+	if (work < budget) {
+		nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+		/* If a packet arrived after we last checked but
+		 * before writing RX_ITR, the interrupt will be
+		 * delayed, so we retrieve it now.
+		 */
+		if (priv->rx_descs[next].report)
+			goto again;
+
+		napi_complete_done(napi, work);
+	}
+
+	return work;
+}
+
+static void __nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb;
+	u32 txc_cr;
+
+	txb = &priv->tx_bufs[priv->tx_queue];
+	if (!txb->ready)
+		return;
+
+	txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
+	if (txc_cr & TCR_EN)
+		return;
+
+	nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+	wmb();		/* ensure desc addr is written before starting DMA */
+	nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
+
+	priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
+}
+
+static void nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock_irq(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock_irq(&priv->tx_lock);
+}
+
+static void nb8800_tx_dma_start_irq(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock(&priv->tx_lock);
+}
+
+static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_desc *txd;
+	struct nb8800_tx_buf *txb;
+	struct nb8800_dma_desc *desc;
+	dma_addr_t dma_addr;
+	unsigned int dma_len;
+	unsigned int align;
+	unsigned int next;
+
+	if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	align = (8 - (uintptr_t)skb->data) & 7;
+
+	dma_len = skb->len - align;
+	dma_addr = dma_map_single(&dev->dev, skb->data + align,
+				  dma_len, DMA_TO_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		netdev_err(dev, "tx dma mapping error\n");
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		skb->xmit_more = 0;
+	}
+
+	next = priv->tx_next;
+	txb = &priv->tx_bufs[next];
+	txd = &priv->tx_descs[next];
+	desc = &txd->desc[0];
+
+	next = (next + 1) % TX_DESC_COUNT;
+
+	if (align) {
+		memcpy(txd->buf, skb->data, align);
+
+		desc->s_addr =
+			txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+		desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
+		desc->config = DESC_BTS(2) | DESC_DS | align;
+
+		desc++;
+	}
+
+	desc->s_addr = dma_addr;
+	desc->n_addr = priv->tx_bufs[next].dma_desc;
+	desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
+
+	if (!skb->xmit_more)
+		desc->config |= DESC_EOC;
+
+	txb->skb = skb;
+	txb->dma_addr = dma_addr;
+	txb->dma_len = dma_len;
+
+	if (!priv->tx_chain) {
+		txb->chain_len = 1;
+		priv->tx_chain = txb;
+	} else {
+		priv->tx_chain->chain_len++;
+	}
+
+	netdev_sent_queue(dev, skb->len);
+
+	priv->tx_next = next;
+
+	if (!skb->xmit_more) {
+		smp_wmb();
+		priv->tx_chain->ready = true;
+		priv->tx_chain = NULL;
+		nb8800_tx_dma_start(dev);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void nb8800_tx_error(struct net_device *dev, u32 report)
+{
+	if (report & TX_LATE_COLLISION)
+		dev->stats.collisions++;
+
+	if (report & TX_PACKET_DROPPED)
+		dev->stats.tx_dropped++;
+
+	if (report & TX_FIFO_UNDERRUN)
+		dev->stats.tx_fifo_errors++;
+
+	dev->stats.tx_errors++;
+}
+
+static void nb8800_tx_done(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int limit = priv->tx_next;
+	unsigned int done = priv->tx_done;
+	unsigned int packets = 0;
+	unsigned int len = 0;
+
+	while (done != limit) {
+		struct nb8800_tx_desc *txd = &priv->tx_descs[done];
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
+		struct sk_buff *skb;
+
+		if (!txd->report)
+			break;
+
+		skb = txb->skb;
+		len += skb->len;
+
+		dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
+				 DMA_TO_DEVICE);
+
+		if (IS_TX_ERROR(txd->report)) {
+			nb8800_tx_error(dev, txd->report);
+			kfree_skb(skb);
+		} else {
+			consume_skb(skb);
+		}
+
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
+		dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
+
+		txb->skb = NULL;
+		txb->ready = false;
+		txd->report = 0;
+
+		done = (done + 1) % TX_DESC_COUNT;
+		packets++;
+	}
+
+	if (packets) {
+		smp_mb__before_atomic();
+		atomic_add(packets, &priv->tx_free);
+		netdev_completed_queue(dev, packets, len);
+		netif_wake_queue(dev);
+		priv->tx_done = done;
+	}
+}
+
+static irqreturn_t nb8800_irq(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 val;
+
+	/* tx interrupt */
+	val = nb8800_readl(priv, NB8800_TXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_TXC_SR, val);
+
+		if (val & TSR_DI)
+			nb8800_tx_dma_start_irq(dev);
+
+		if (val & TSR_TI)
+			napi_schedule_irqoff(&priv->napi);
+
+		if (unlikely(val & TSR_DE))
+			netdev_err(dev, "TX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & TSR_TO))
+			netdev_err(dev, "TX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	/* rx interrupt */
+	val = nb8800_readl(priv, NB8800_RXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_RXC_SR, val);
+
+		if (likely(val & (RSR_RI | RSR_DI))) {
+			nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
+			napi_schedule_irqoff(&priv->napi);
+		}
+
+		if (unlikely(val & RSR_DE))
+			netdev_err(dev, "RX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & RSR_RO))
+			netdev_err(dev, "RX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static void nb8800_mac_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	bool gigabit = priv->speed == SPEED_1000;
+	u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
+	u32 mac_mode = 0;
+	u32 slot_time;
+	u32 phy_clk;
+	u32 ict;
+
+	if (!priv->duplex)
+		mac_mode |= HALF_DUPLEX;
+
+	if (gigabit) {
+		if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+			mac_mode |= RGMII_MODE;
+
+		mac_mode |= GMAC_MODE;
+		phy_clk = 125000000;
+
+		/* Should be 512 but register is only 8 bits */
+		slot_time = 255;
+	} else {
+		phy_clk = 25000000;
+		slot_time = 128;
+	}
+
+	ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
+
+	nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
+	nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
+	nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
+}
+
+static void nb8800_pause_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	u32 rxcr;
+
+	if (priv->pause_aneg) {
+		if (!phydev || !phydev->link)
+			return;
+
+		priv->pause_rx = phydev->pause;
+		priv->pause_tx = phydev->pause ^ phydev->asym_pause;
+	}
+
+	nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
+
+	rxcr = nb8800_readl(priv, NB8800_RXC_CR);
+	if (!!(rxcr & RCR_FL) == priv->pause_tx)
+		return;
+
+	if (netif_running(dev)) {
+		napi_disable(&priv->napi);
+		netif_tx_lock_bh(dev);
+		nb8800_dma_stop(dev);
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+		nb8800_start_rx(dev);
+		netif_tx_unlock_bh(dev);
+		napi_enable(&priv->napi);
+	} else {
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+	}
+}
+
+static void nb8800_link_reconfigure(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	int change = 0;
+
+	if (phydev->link) {
+		if (phydev->speed != priv->speed) {
+			priv->speed = phydev->speed;
+			change = 1;
+		}
+
+		if (phydev->duplex != priv->duplex) {
+			priv->duplex = phydev->duplex;
+			change = 1;
+		}
+
+		if (change)
+			nb8800_mac_config(dev);
+
+		nb8800_pause_config(dev);
+	}
+
+	if (phydev->link != priv->link) {
+		priv->link = phydev->link;
+		change = 1;
+	}
+
+	if (change)
+		phy_print_status(priv->phydev);
+}
+
+static void nb8800_update_mac_addr(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
+}
+
+static int nb8800_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sock = addr;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	ether_addr_copy(dev->dev_addr, sock->sa_data);
+	nb8800_update_mac_addr(dev);
+
+	return 0;
+}
+
+static void nb8800_mc_init(struct net_device *dev, int val)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_MC_INIT, val);
+	readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
+				  1, 1000);
+}
+
+static void nb8800_set_rx_mode(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct netdev_hw_addr *ha;
+	int i;
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		nb8800_mac_af(dev, false);
+		return;
+	}
+
+	nb8800_mac_af(dev, true);
+	nb8800_mc_init(dev, 0);
+
+	netdev_for_each_mc_addr(ha, dev) {
+		for (i = 0; i < ETH_ALEN; i++)
+			nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
+
+		nb8800_mc_init(dev, 0xff);
+	}
+}
+
+#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
+#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
+
+static void nb8800_dma_free(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int i;
+
+	if (priv->rx_bufs) {
+		for (i = 0; i < RX_DESC_COUNT; i++)
+			if (priv->rx_bufs[i].page)
+				put_page(priv->rx_bufs[i].page);
+
+		kfree(priv->rx_bufs);
+		priv->rx_bufs = NULL;
+	}
+
+	if (priv->tx_bufs) {
+		for (i = 0; i < TX_DESC_COUNT; i++)
+			kfree_skb(priv->tx_bufs[i].skb);
+
+		kfree(priv->tx_bufs);
+		priv->tx_bufs = NULL;
+	}
+
+	if (priv->rx_descs) {
+		dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
+				  priv->rx_desc_dma);
+		priv->rx_descs = NULL;
+	}
+
+	if (priv->tx_descs) {
+		dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
+				  priv->tx_desc_dma);
+		priv->tx_descs = NULL;
+	}
+}
+
+static void nb8800_dma_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	struct nb8800_tx_desc *txd;
+	unsigned int i;
+
+	for (i = 0; i < RX_DESC_COUNT; i++) {
+		dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
+
+		rxd = &priv->rx_descs[i];
+		rxd->desc.n_addr = rx_dma + sizeof(*rxd);
+		rxd->desc.r_addr =
+			rx_dma + offsetof(struct nb8800_rx_desc, report);
+		rxd->desc.config = priv->rx_dma_config;
+		rxd->report = 0;
+	}
+
+	rxd->desc.n_addr = priv->rx_desc_dma;
+	rxd->desc.config |= DESC_EOC;
+
+	priv->rx_eoc = RX_DESC_COUNT - 1;
+
+	for (i = 0; i < TX_DESC_COUNT; i++) {
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
+		dma_addr_t r_dma = txb->dma_desc +
+			offsetof(struct nb8800_tx_desc, report);
+
+		txd = &priv->tx_descs[i];
+		txd->desc[0].r_addr = r_dma;
+		txd->desc[1].r_addr = r_dma;
+		txd->report = 0;
+	}
+
+	priv->tx_next = 0;
+	priv->tx_queue = 0;
+	priv->tx_done = 0;
+	atomic_set(&priv->tx_free, TX_DESC_COUNT);
+
+	nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
+
+	wmb();		/* ensure all setup is written before starting */
+}
+
+static int nb8800_dma_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int n_rx = RX_DESC_COUNT;
+	unsigned int n_tx = TX_DESC_COUNT;
+	unsigned int i;
+	int err;
+
+	priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
+					    &priv->rx_desc_dma, GFP_KERNEL);
+	if (!priv->rx_descs)
+		goto err_out;
+
+	priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
+	if (!priv->rx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_rx; i++) {
+		err = nb8800_alloc_rx(dev, i, false);
+		if (err)
+			goto err_out;
+	}
+
+	priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
+					    &priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_descs)
+		goto err_out;
+
+	priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
+	if (!priv->tx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_tx; i++)
+		priv->tx_bufs[i].dma_desc =
+			priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
+
+	nb8800_dma_reset(dev);
+
+	return 0;
+
+err_out:
+	nb8800_dma_free(dev);
+
+	return -ENOMEM;
+}
+
+static int nb8800_dma_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
+	struct nb8800_tx_desc *txd = &priv->tx_descs[0];
+	int retry = 5;
+	u32 txcr;
+	u32 rxcr;
+	int err;
+	unsigned int i;
+
+	/* wait for tx to finish */
+	err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
+					!(txcr & TCR_EN) &&
+					priv->tx_done == priv->tx_next,
+					1000, 1000000);
+	if (err)
+		return err;
+
+	/* The rx DMA only stops if it reaches the end of chain.
+	 * To make this happen, we set the EOC flag on all rx
+	 * descriptors, put the device in loopback mode, and send
+	 * a few dummy frames.  The interrupt handler will ignore
+	 * these since NAPI is disabled and no real frames are in
+	 * the tx queue.
+	 */
+
+	for (i = 0; i < RX_DESC_COUNT; i++)
+		priv->rx_descs[i].desc.config |= DESC_EOC;
+
+	txd->desc[0].s_addr =
+		txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+	txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
+	memset(txd->buf, 0, sizeof(txd->buf));
+
+	nb8800_mac_af(dev, false);
+	nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+
+	do {
+		nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+		wmb();
+		nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
+
+		err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
+						rxcr, !(rxcr & RCR_EN),
+						1000, 100000);
+	} while (err && --retry);
+
+	nb8800_mac_af(dev, true);
+	nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+	nb8800_dma_reset(dev);
+
+	return retry ? 0 : -ETIMEDOUT;
+}
+
+static void nb8800_pause_adv(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 adv = 0;
+
+	if (!priv->phydev)
+		return;
+
+	if (priv->pause_rx)
+		adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	if (priv->pause_tx)
+		adv ^= ADVERTISED_Asym_Pause;
+
+	priv->phydev->supported |= adv;
+	priv->phydev->advertising |= adv;
+}
+
+static int nb8800_open(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* clear any pending interrupts */
+	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
+	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
+
+	err = nb8800_dma_init(dev);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
+	if (err)
+		goto err_free_dma;
+
+	nb8800_mac_rx(dev, true);
+	nb8800_mac_tx(dev, true);
+
+	priv->phydev = of_phy_connect(dev, priv->phy_node,
+				      nb8800_link_reconfigure, 0,
+				      priv->phy_mode);
+	if (!priv->phydev)
+		goto err_free_irq;
+
+	nb8800_pause_adv(dev);
+
+	netdev_reset_queue(dev);
+	napi_enable(&priv->napi);
+	netif_start_queue(dev);
+
+	nb8800_start_rx(dev);
+	phy_start(priv->phydev);
+
+	return 0;
+
+err_free_irq:
+	free_irq(dev->irq, dev);
+err_free_dma:
+	nb8800_dma_free(dev);
+
+	return err;
+}
+
+static int nb8800_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	phy_stop(priv->phydev);
+
+	netif_stop_queue(dev);
+	napi_disable(&priv->napi);
+
+	nb8800_dma_stop(dev);
+	nb8800_mac_rx(dev, false);
+	nb8800_mac_tx(dev, false);
+
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
+
+	free_irq(dev->irq, dev);
+
+	nb8800_dma_free(dev);
+
+	return 0;
+}
+
+static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static const struct net_device_ops nb8800_netdev_ops = {
+	.ndo_open		= nb8800_open,
+	.ndo_stop		= nb8800_stop,
+	.ndo_start_xmit		= nb8800_xmit,
+	.ndo_set_mac_address	= nb8800_set_mac_address,
+	.ndo_set_rx_mode	= nb8800_set_rx_mode,
+	.ndo_do_ioctl		= nb8800_ioctl,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int nb8800_nway_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return genphy_restart_aneg(priv->phydev);
+}
+
+static void nb8800_get_pauseparam(struct net_device *dev,
+				  struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	pp->autoneg = priv->pause_aneg;
+	pp->rx_pause = priv->pause_rx;
+	pp->tx_pause = priv->pause_tx;
+}
+
+static int nb8800_set_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	priv->pause_aneg = pp->autoneg;
+	priv->pause_rx = pp->rx_pause;
+	priv->pause_tx = pp->tx_pause;
+
+	nb8800_pause_adv(dev);
+
+	if (!priv->pause_aneg)
+		nb8800_pause_config(dev);
+	else if (priv->phydev)
+		phy_start_aneg(priv->phydev);
+
+	return 0;
+}
+
+static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
+	"rx_bytes_ok",
+	"rx_frames_ok",
+	"rx_undersize_frames",
+	"rx_fragment_frames",
+	"rx_64_byte_frames",
+	"rx_127_byte_frames",
+	"rx_255_byte_frames",
+	"rx_511_byte_frames",
+	"rx_1023_byte_frames",
+	"rx_max_size_frames",
+	"rx_oversize_frames",
+	"rx_bad_fcs_frames",
+	"rx_broadcast_frames",
+	"rx_multicast_frames",
+	"rx_control_frames",
+	"rx_pause_frames",
+	"rx_unsup_control_frames",
+	"rx_align_error_frames",
+	"rx_overrun_frames",
+	"rx_jabber_frames",
+	"rx_bytes",
+	"rx_frames",
+
+	"tx_bytes_ok",
+	"tx_frames_ok",
+	"tx_64_byte_frames",
+	"tx_127_byte_frames",
+	"tx_255_byte_frames",
+	"tx_511_byte_frames",
+	"tx_1023_byte_frames",
+	"tx_max_size_frames",
+	"tx_oversize_frames",
+	"tx_broadcast_frames",
+	"tx_multicast_frames",
+	"tx_control_frames",
+	"tx_pause_frames",
+	"tx_underrun_frames",
+	"tx_single_collision_frames",
+	"tx_multi_collision_frames",
+	"tx_deferred_collision_frames",
+	"tx_late_collision_frames",
+	"tx_excessive_collision_frames",
+	"tx_bytes",
+	"tx_frames",
+	"tx_collisions",
+};
+
+#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
+
+static int nb8800_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return NB8800_NUM_STATS;
+
+	return -EOPNOTSUPP;
+}
+
+static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
+{
+	if (sset == ETH_SS_STATS)
+		memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
+}
+
+static u32 nb8800_read_stat(struct net_device *dev, int index)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_STAT_INDEX, index);
+
+	return nb8800_readl(priv, NB8800_STAT_DATA);
+}
+
+static void nb8800_get_ethtool_stats(struct net_device *dev,
+				     struct ethtool_stats *estats, u64 *st)
+{
+	unsigned int i;
+	u32 rx, tx;
+
+	for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
+		rx = nb8800_read_stat(dev, i);
+		tx = nb8800_read_stat(dev, i | 0x80);
+		st[i] = rx;
+		st[i + NB8800_NUM_STATS / 2] = tx;
+	}
+}
+
+static const struct ethtool_ops nb8800_ethtool_ops = {
+	.get_settings		= nb8800_get_settings,
+	.set_settings		= nb8800_set_settings,
+	.nway_reset		= nb8800_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_pauseparam		= nb8800_get_pauseparam,
+	.set_pauseparam		= nb8800_set_pauseparam,
+	.get_sset_count		= nb8800_get_sset_count,
+	.get_strings		= nb8800_get_strings,
+	.get_ethtool_stats	= nb8800_get_ethtool_stats,
+};
+
+static int nb8800_hw_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 val;
+
+	val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
+	nb8800_writeb(priv, NB8800_TX_CTL1, val);
+
+	/* Collision retry count */
+	nb8800_writeb(priv, NB8800_TX_CTL2, 5);
+
+	val = RX_PAD_STRIP | RX_AF_EN;
+	nb8800_writeb(priv, NB8800_RX_CTL, val);
+
+	/* Chosen by fair dice roll */
+	nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
+
+	/* TX cycles per deferral period */
+	nb8800_writeb(priv, NB8800_TX_SDP, 12);
+
+	/* The following three threshold values have been
+	 * experimentally determined for good results.
+	 */
+
+	/* RX/TX FIFO threshold for partial empty (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
+
+	/* RX/TX FIFO threshold for partial full (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
+
+	/* Buffer size for transmit (64-bit entries) */
+	nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
+
+	/* Configure tx DMA */
+
+	val = nb8800_readl(priv, NB8800_TXC_CR);
+	val &= TCR_LE;		/* keep endian setting */
+	val |= TCR_DM;		/* DMA descriptor mode */
+	val |= TCR_RS;		/* automatically store tx status  */
+	val |= TCR_DIE;		/* interrupt on DMA chain completion */
+	val |= TCR_TFI(7);	/* interrupt after 7 frames transmitted */
+	val |= TCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_TXC_CR, val);
+
+	/* TX complete interrupt after 10 ms or 7 frames (see above) */
+	val = clk_get_rate(priv->clk) / 100;
+	nb8800_writel(priv, NB8800_TX_ITR, val);
+
+	/* Configure rx DMA */
+
+	val = nb8800_readl(priv, NB8800_RXC_CR);
+	val &= RCR_LE;		/* keep endian setting */
+	val |= RCR_DM;		/* DMA descriptor mode */
+	val |= RCR_RS;		/* automatically store rx status */
+	val |= RCR_DIE;		/* interrupt at end of DMA chain */
+	val |= RCR_RFI(7);	/* interrupt after 7 frames received */
+	val |= RCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_RXC_CR, val);
+
+	/* The rx interrupt can fire before the DMA has completed
+	 * unless a small delay is added.  50 us is hopefully enough.
+	 */
+	priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
+
+	/* In NAPI poll mode we want to disable interrupts, but the
+	 * hardware does not permit this.  Delay 10 ms instead.
+	 */
+	priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
+
+	nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+	priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
+
+	/* Flow control settings */
+
+	/* Pause time of 0.1 ms */
+	val = 100000 / 512;
+	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
+	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
+
+	/* Auto-negotiate by default */
+	priv->pause_aneg = true;
+	priv->pause_rx = true;
+	priv->pause_tx = true;
+
+	nb8800_mc_init(dev, 0);
+
+	return 0;
+}
+
+static int nb8800_tangox_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 pad_mode = PAD_MODE_MII;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_GMII:
+		pad_mode = PAD_MODE_MII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+		pad_mode = PAD_MODE_RGMII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+		break;
+
+	default:
+		dev_err(dev->dev.parent, "unsupported phy mode %s\n",
+			phy_modes(priv->phy_mode));
+		return -EINVAL;
+	}
+
+	nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
+
+	return 0;
+}
+
+static int nb8800_tangox_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int clk_div;
+
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
+	usleep_range(1000, 10000);
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
+
+	wmb();		/* ensure reset is cleared before proceeding */
+
+	clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
+	nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tangox_ops = {
+	.init	= nb8800_tangox_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static int nb8800_tango4_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = nb8800_tangox_init(dev);
+	if (err)
+		return err;
+
+	/* On tango4 interrupt on DMA completion per frame works and gives
+	 * better performance despite generating more rx interrupts.
+	 */
+
+	/* Disable unnecessary interrupt on rx completion */
+	nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
+
+	/* Request interrupt on descriptor DMA completion */
+	priv->rx_dma_config |= DESC_ID;
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tango4_ops = {
+	.init	= nb8800_tango4_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static const struct of_device_id nb8800_dt_ids[] = {
+	{
+		.compatible = "aurora,nb8800",
+	},
+	{
+		.compatible = "sigma,smp8642-ethernet",
+		.data = &nb8800_tangox_ops,
+	},
+	{
+		.compatible = "sigma,smp8734-ethernet",
+		.data = &nb8800_tango4_ops,
+	},
+	{ }
+};
+
+static int nb8800_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	const struct nb8800_ops *ops = NULL;
+	struct nb8800_priv *priv;
+	struct resource *res;
+	struct net_device *dev;
+	struct mii_bus *bus;
+	const unsigned char *mac;
+	void __iomem *base;
+	int irq;
+	int ret;
+
+	match = of_match_device(nb8800_dt_ids, &pdev->dev);
+	if (match)
+		ops = match->data;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "No IRQ\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
+
+	dev = alloc_etherdev(sizeof(*priv));
+	if (!dev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	priv = netdev_priv(dev);
+	priv->base = base;
+
+	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	if (priv->phy_mode < 0)
+		priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		ret = PTR_ERR(priv->clk);
+		goto err_free_dev;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto err_free_dev;
+
+	spin_lock_init(&priv->tx_lock);
+
+	if (ops && ops->reset) {
+		ret = ops->reset(dev);
+		if (ret)
+			goto err_free_dev;
+	}
+
+	bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!bus) {
+		ret = -ENOMEM;
+		goto err_disable_clk;
+	}
+
+	bus->name = "nb8800-mii";
+	bus->read = nb8800_mdio_read;
+	bus->write = nb8800_mdio_write;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
+		 (unsigned long)res->start);
+	bus->priv = priv;
+
+	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register MII bus\n");
+		goto err_disable_clk;
+	}
+
+	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		dev_err(&pdev->dev, "no PHY specified\n");
+		ret = -ENODEV;
+		goto err_free_bus;
+	}
+
+	priv->mii_bus = bus;
+
+	ret = nb8800_hw_init(dev);
+	if (ret)
+		goto err_free_bus;
+
+	if (ops && ops->init) {
+		ret = ops->init(dev);
+		if (ret)
+			goto err_free_bus;
+	}
+
+	dev->netdev_ops = &nb8800_netdev_ops;
+	dev->ethtool_ops = &nb8800_ethtool_ops;
+	dev->flags |= IFF_MULTICAST;
+	dev->irq = irq;
+
+	mac = of_get_mac_address(pdev->dev.of_node);
+	if (mac)
+		ether_addr_copy(dev->dev_addr, mac);
+
+	if (!is_valid_ether_addr(dev->dev_addr))
+		eth_hw_addr_random(dev);
+
+	nb8800_update_mac_addr(dev);
+
+	netif_carrier_off(dev);
+
+	ret = register_netdev(dev);
+	if (ret) {
+		netdev_err(dev, "failed to register netdev\n");
+		goto err_free_dma;
+	}
+
+	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
+
+	netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
+
+	return 0;
+
+err_free_dma:
+	nb8800_dma_free(dev);
+err_free_bus:
+	mdiobus_unregister(bus);
+err_disable_clk:
+	clk_disable_unprepare(priv->clk);
+err_free_dev:
+	free_netdev(dev);
+
+	return ret;
+}
+
+static int nb8800_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct nb8800_priv *priv = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+
+	mdiobus_unregister(priv->mii_bus);
+
+	clk_disable_unprepare(priv->clk);
+
+	nb8800_dma_free(ndev);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct platform_driver nb8800_driver = {
+	.driver = {
+		.name		= "nb8800",
+		.of_match_table	= nb8800_dt_ids,
+	},
+	.probe	= nb8800_probe,
+	.remove	= nb8800_remove,
+};
+
+module_platform_driver(nb8800_driver);
+
+MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
+MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
new file mode 100644
index 0000000..e5adbc2
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -0,0 +1,316 @@
+#ifndef _NB8800_H_
+#define _NB8800_H_
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+
+#define RX_DESC_COUNT			256
+#define TX_DESC_COUNT			256
+
+#define NB8800_DESC_LOW			4
+
+#define RX_BUF_SIZE			1552
+
+#define RX_COPYBREAK			256
+#define RX_COPYHDR			128
+
+#define MAX_MDC_CLOCK			2500000
+
+/* Stargate Solutions SSN8800 core registers */
+#define NB8800_TX_CTL1			0x000
+#define TX_TPD				BIT(5)
+#define TX_APPEND_FCS			BIT(4)
+#define TX_PAD_EN			BIT(3)
+#define TX_RETRY_EN			BIT(2)
+#define TX_EN				BIT(0)
+
+#define NB8800_TX_CTL2			0x001
+
+#define NB8800_RX_CTL			0x004
+#define RX_BC_DISABLE			BIT(7)
+#define RX_RUNT				BIT(6)
+#define RX_AF_EN			BIT(5)
+#define RX_PAUSE_EN			BIT(3)
+#define RX_SEND_CRC			BIT(2)
+#define RX_PAD_STRIP			BIT(1)
+#define RX_EN				BIT(0)
+
+#define NB8800_RANDOM_SEED		0x008
+#define NB8800_TX_SDP			0x14
+#define NB8800_TX_TPDP1			0x18
+#define NB8800_TX_TPDP2			0x19
+#define NB8800_SLOT_TIME		0x1c
+
+#define NB8800_MDIO_CMD			0x020
+#define MDIO_CMD_GO			BIT(31)
+#define MDIO_CMD_WR			BIT(26)
+#define MDIO_CMD_ADDR(x)		((x) << 21)
+#define MDIO_CMD_REG(x)			((x) << 16)
+#define MDIO_CMD_DATA(x)		((x) <<	 0)
+
+#define NB8800_MDIO_STS			0x024
+#define MDIO_STS_ERR			BIT(31)
+
+#define NB8800_MC_ADDR(i)		(0x028 + (i))
+#define NB8800_MC_INIT			0x02e
+#define NB8800_UC_ADDR(i)		(0x03c + (i))
+
+#define NB8800_MAC_MODE			0x044
+#define RGMII_MODE			BIT(7)
+#define HALF_DUPLEX			BIT(4)
+#define BURST_EN			BIT(3)
+#define LOOPBACK_EN			BIT(2)
+#define GMAC_MODE			BIT(0)
+
+#define NB8800_IC_THRESHOLD		0x050
+#define NB8800_PE_THRESHOLD		0x051
+#define NB8800_PF_THRESHOLD		0x052
+#define NB8800_TX_BUFSIZE		0x054
+#define NB8800_FIFO_CTL			0x056
+#define NB8800_PQ1			0x060
+#define NB8800_PQ2			0x061
+#define NB8800_SRC_ADDR(i)		(0x06a + (i))
+#define NB8800_STAT_DATA		0x078
+#define NB8800_STAT_INDEX		0x07c
+#define NB8800_STAT_CLEAR		0x07d
+
+#define NB8800_SLEEP_MODE		0x07e
+#define SLEEP_MODE			BIT(0)
+
+#define NB8800_WAKEUP			0x07f
+#define WAKEUP				BIT(0)
+
+/* Aurora NB8800 host interface registers */
+#define NB8800_TXC_CR			0x100
+#define TCR_LK				BIT(12)
+#define TCR_DS				BIT(11)
+#define TCR_BTS(x)			(((x) & 0x7) << 8)
+#define TCR_DIE				BIT(7)
+#define TCR_TFI(x)			(((x) & 0x7) << 4)
+#define TCR_LE				BIT(3)
+#define TCR_RS				BIT(2)
+#define TCR_DM				BIT(1)
+#define TCR_EN				BIT(0)
+
+#define NB8800_TXC_SR			0x104
+#define TSR_DE				BIT(3)
+#define TSR_DI				BIT(2)
+#define TSR_TO				BIT(1)
+#define TSR_TI				BIT(0)
+
+#define NB8800_TX_SAR			0x108
+#define NB8800_TX_DESC_ADDR		0x10c
+
+#define NB8800_TX_REPORT_ADDR		0x110
+#define TX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xffff)
+#define TX_FIRST_DEFERRAL		BIT(7)
+#define TX_EARLY_COLLISIONS(x)		(((x) >> 3) & 0xf)
+#define TX_LATE_COLLISION		BIT(2)
+#define TX_PACKET_DROPPED		BIT(1)
+#define TX_FIFO_UNDERRUN		BIT(0)
+#define IS_TX_ERROR(r)			((r) & 0x07)
+
+#define NB8800_TX_FIFO_SR		0x114
+#define NB8800_TX_ITR			0x118
+
+#define NB8800_RXC_CR			0x200
+#define RCR_FL				BIT(13)
+#define RCR_LK				BIT(12)
+#define RCR_DS				BIT(11)
+#define RCR_BTS(x)			(((x) & 7) << 8)
+#define RCR_DIE				BIT(7)
+#define RCR_RFI(x)			(((x) & 7) << 4)
+#define RCR_LE				BIT(3)
+#define RCR_RS				BIT(2)
+#define RCR_DM				BIT(1)
+#define RCR_EN				BIT(0)
+
+#define NB8800_RXC_SR			0x204
+#define RSR_DE				BIT(3)
+#define RSR_DI				BIT(2)
+#define RSR_RO				BIT(1)
+#define RSR_RI				BIT(0)
+
+#define NB8800_RX_SAR			0x208
+#define NB8800_RX_DESC_ADDR		0x20c
+
+#define NB8800_RX_REPORT_ADDR		0x210
+#define RX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xFFFF)
+#define RX_MULTICAST_PKT		BIT(9)
+#define RX_BROADCAST_PKT		BIT(8)
+#define RX_LENGTH_ERR			BIT(7)
+#define RX_FCS_ERR			BIT(6)
+#define RX_RUNT_PKT			BIT(5)
+#define RX_FIFO_OVERRUN			BIT(4)
+#define RX_LATE_COLLISION		BIT(3)
+#define RX_ALIGNMENT_ERROR		BIT(2)
+#define RX_ERROR_MASK			0xfc
+#define IS_RX_ERROR(r)			((r) & RX_ERROR_MASK)
+
+#define NB8800_RX_FIFO_SR		0x214
+#define NB8800_RX_ITR			0x218
+
+/* Sigma Designs SMP86xx additional registers */
+#define NB8800_TANGOX_PAD_MODE		0x400
+#define PAD_MODE_MASK			0x7
+#define PAD_MODE_MII			0x0
+#define PAD_MODE_RGMII			0x1
+#define PAD_MODE_GTX_CLK_INV		BIT(3)
+#define PAD_MODE_GTX_CLK_DELAY		BIT(4)
+
+#define NB8800_TANGOX_MDIO_CLKDIV	0x420
+#define NB8800_TANGOX_RESET		0x424
+
+/* Hardware DMA descriptor */
+struct nb8800_dma_desc {
+	u32				s_addr;	/* start address */
+	u32				n_addr;	/* next descriptor address */
+	u32				r_addr;	/* report address */
+	u32				config;
+} __aligned(8);
+
+#define DESC_ID				BIT(23)
+#define DESC_EOC			BIT(22)
+#define DESC_EOF			BIT(21)
+#define DESC_LK				BIT(20)
+#define DESC_DS				BIT(19)
+#define DESC_BTS(x)			(((x) & 0x7) << 16)
+
+/* DMA descriptor and associated data for rx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_rx_desc {
+	/* DMA descriptor */
+	struct nb8800_dma_desc		desc;
+
+	/* Status report filled in by hardware */
+	u32				report;
+};
+
+/* Address of buffer on rx ring */
+struct nb8800_rx_buf {
+	struct page			*page;
+	unsigned long			offset;
+};
+
+/* DMA descriptors and associated data for tx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_tx_desc {
+	/* DMA descriptor.  The second descriptor is used if packet
+	 * data is unaligned.
+	 */
+	struct nb8800_dma_desc		desc[2];
+
+	/* Status report filled in by hardware */
+	u32				report;
+
+	/* Bounce buffer for initial unaligned part of packet */
+	u8				buf[8] __aligned(8);
+};
+
+/* Packet in tx queue */
+struct nb8800_tx_buf {
+	/* Currently queued skb */
+	struct sk_buff			*skb;
+
+	/* DMA address of the first descriptor */
+	dma_addr_t			dma_desc;
+
+	/* DMA address of packet data */
+	dma_addr_t			dma_addr;
+
+	/* Length of DMA mapping, less than skb->len if alignment
+	 * buffer is used.
+	 */
+	unsigned int			dma_len;
+
+	/* Number of packets in chain starting here */
+	unsigned int			chain_len;
+
+	/* Packet chain ready to be submitted to hardware */
+	bool				ready;
+};
+
+struct nb8800_priv {
+	struct napi_struct		napi;
+
+	void __iomem			*base;
+
+	/* RX DMA descriptors */
+	struct nb8800_rx_desc		*rx_descs;
+
+	/* RX buffers referenced by DMA descriptors */
+	struct nb8800_rx_buf		*rx_bufs;
+
+	/* Current end of chain */
+	u32				rx_eoc;
+
+	/* Value for rx interrupt time register in NAPI interrupt mode */
+	u32				rx_itr_irq;
+
+	/* Value for rx interrupt time register in NAPI poll mode */
+	u32				rx_itr_poll;
+
+	/* Value for config field of rx DMA descriptors */
+	u32				rx_dma_config;
+
+	/* TX DMA descriptors */
+	struct nb8800_tx_desc		*tx_descs;
+
+	/* TX packet queue */
+	struct nb8800_tx_buf		*tx_bufs;
+
+	/* Number of free tx queue entries */
+	atomic_t			tx_free;
+
+	/* First free tx queue entry */
+	u32				tx_next;
+
+	/* Next buffer to transmit */
+	u32				tx_queue;
+
+	/* Start of current packet chain */
+	struct nb8800_tx_buf		*tx_chain;
+
+	/* Next buffer to reclaim */
+	u32				tx_done;
+
+	/* Lock for DMA activation */
+	spinlock_t			tx_lock;
+
+	struct mii_bus			*mii_bus;
+	struct device_node		*phy_node;
+	struct phy_device		*phydev;
+
+	/* PHY connection type from DT */
+	int				phy_mode;
+
+	/* Current link status */
+	int				speed;
+	int				duplex;
+	int				link;
+
+	/* Pause settings */
+	bool				pause_aneg;
+	bool				pause_rx;
+	bool				pause_tx;
+
+	/* DMA base address of rx descriptors, see rx_descs above */
+	dma_addr_t			rx_desc_dma;
+
+	/* DMA base address of tx descriptors, see tx_descs above */
+	dma_addr_t			tx_desc_dma;
+
+	struct clk			*clk;
+};
+
+struct nb8800_ops {
+	int				(*init)(struct net_device *dev);
+	int				(*reset)(struct net_device *dev);
+};
+
+#endif /* _NB8800_H_ */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index f1d62d5..2e611dc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10139,8 +10139,8 @@
 		DP(BNX2X_MSG_SP, "Invalid vxlan port\n");
 		return;
 	}
-	bp->vxlan_dst_port--;
-	if (bp->vxlan_dst_port)
+	bp->vxlan_dst_port_count--;
+	if (bp->vxlan_dst_port_count)
 		return;
 
 	if (netif_running(bp->dev)) {
@@ -13207,7 +13207,7 @@
 
 	/* VF with OLD Hypervisor or old PF do not support filtering */
 	if (IS_PF(bp)) {
-		if (CHIP_IS_E1x(bp))
+		if (chip_is_e1x)
 			bp->accept_any_vlan = true;
 		else
 			dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index db15c5e..bdf094f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3625,6 +3625,7 @@
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
 		memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN);
+		memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
 		pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
 		pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings);
@@ -3648,8 +3649,11 @@
 
 		vf->fw_fid = le16_to_cpu(resp->fid);
 		memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN);
-		if (!is_valid_ether_addr(vf->mac_addr))
-			random_ether_addr(vf->mac_addr);
+		if (is_valid_ether_addr(vf->mac_addr))
+			/* overwrite netdev dev_adr with admin VF MAC */
+			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+		else
+			random_ether_addr(bp->dev->dev_addr);
 
 		vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
@@ -3880,6 +3884,8 @@
 #endif
 }
 
+static int bnxt_cfg_rx_mode(struct bnxt *);
+
 static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 {
 	int rc = 0;
@@ -3946,11 +3952,9 @@
 		bp->vnic_info[0].rx_mask |=
 				CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
-	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
-	if (rc) {
-		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc);
+	rc = bnxt_cfg_rx_mode(bp);
+	if (rc)
 		goto err_out;
-	}
 
 	rc = bnxt_hwrm_set_coal(bp);
 	if (rc)
@@ -4865,7 +4869,7 @@
 	}
 }
 
-static void bnxt_cfg_rx_mode(struct bnxt *bp)
+static int bnxt_cfg_rx_mode(struct bnxt *bp)
 {
 	struct net_device *dev = bp->dev;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
@@ -4914,6 +4918,7 @@
 			netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n",
 				   rc);
 			vnic->uc_filter_count = i;
+			return rc;
 		}
 	}
 
@@ -4922,6 +4927,8 @@
 	if (rc)
 		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
 			   rc);
+
+	return rc;
 }
 
 static netdev_features_t bnxt_fix_features(struct net_device *dev,
@@ -5212,13 +5219,27 @@
 static int bnxt_change_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = 0;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr))
+		return -EADDRNOTAVAIL;
+#endif
 
-	return 0;
+	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+		return 0;
+
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	if (netif_running(dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+	}
+
+	return rc;
 }
 
 /* rtnl_lock held */
@@ -5686,15 +5707,12 @@
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 	dflt_rings = netif_get_num_default_rss_queues();
-	if (BNXT_PF(bp)) {
-		memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN);
+	if (BNXT_PF(bp))
 		bp->pf.max_irqs = max_irqs;
-	} else {
 #if defined(CONFIG_BNXT_SRIOV)
-		memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
+	else
 		bp->vf.max_irqs = max_irqs;
 #endif
-	}
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings);
 	bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
 	bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f4cf688..7a9af28 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -804,10 +804,9 @@
 	if (!is_valid_ether_addr(resp->perm_mac_address))
 		goto update_vf_mac_exit;
 
-	if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
-		goto update_vf_mac_exit;
-
-	memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
+		memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	/* overwrite netdev dev_adr with admin VF MAC */
 	memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
 update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 88c1e1a..169059c 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -1682,6 +1682,8 @@
 	macb_set_hwaddr(bp);
 
 	config = macb_mdc_clk_div(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	config |= MACB_BF(RBOF, NET_IP_ALIGN);	/* Make eth data aligned */
 	config |= MACB_BIT(PAE);		/* PAuse Enable */
 	config |= MACB_BIT(DRFCS);		/* Discard Rx FCS */
@@ -2416,6 +2418,8 @@
 	/* Set MII management clock divider */
 	val = macb_mdc_clk_div(bp);
 	val |= macb_dbw(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	macb_writel(bp, NCFGR, val);
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 6e1faea..d83b0db 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -215,12 +215,17 @@
 /* GEM specific NCFGR bitfields. */
 #define GEM_GBE_OFFSET		10 /* Gigabit mode enable */
 #define GEM_GBE_SIZE		1
+#define GEM_PCSSEL_OFFSET	11
+#define GEM_PCSSEL_SIZE		1
 #define GEM_CLK_OFFSET		18 /* MDC clock division */
 #define GEM_CLK_SIZE		3
 #define GEM_DBW_OFFSET		21 /* Data bus width */
 #define GEM_DBW_SIZE		2
 #define GEM_RXCOEN_OFFSET	24
 #define GEM_RXCOEN_SIZE		1
+#define GEM_SGMIIEN_OFFSET	27
+#define GEM_SGMIIEN_SIZE	1
+
 
 /* Constants for data bus width. */
 #define GEM_DBW32		0 /* 32 bit AMBA AHB data bus width */
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index f683d97..b895044 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -560,7 +560,7 @@
 #endif
 
 /* For PCI-E Advanced Error Recovery (AER) Interface */
-static struct pci_error_handlers liquidio_err_handler = {
+static const struct pci_error_handlers liquidio_err_handler = {
 	.error_detected = liquidio_pcie_error_detected,
 	.mmio_enabled	= liquidio_pcie_mmio_enabled,
 	.slot_reset	= liquidio_pcie_slot_reset,
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index d3950b2..39ca674 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -120,10 +120,9 @@
  * Calculated for SCLK of 700Mhz
  * value written should be a 1/16th of what is expected
  *
- * 1 tick per 0.05usec = value of 2.2
- * This 10% would be covered in CQ timer thresh value
+ * 1 tick per 0.025usec
  */
-#define NICPF_CLK_PER_INT_TICK		2
+#define NICPF_CLK_PER_INT_TICK		1
 
 /* Time to wait before we decide that a SQ is stuck.
  *
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index c561fdc..4b7fd63 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -37,6 +37,7 @@
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
 	u8			vf_lmac_map[MAX_LMAC];
+	u8			lmac_cnt;
 	struct delayed_work     dwork;
 	struct workqueue_struct *check_link;
 	u8			link[MAX_LMAC];
@@ -279,6 +280,7 @@
 	u64 lmac_credit;
 
 	nic->num_vf_en = 0;
+	nic->lmac_cnt = 0;
 
 	for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
 		if (!(bgx_map & (1 << bgx)))
@@ -288,6 +290,7 @@
 			nic->vf_lmac_map[next_bgx_lmac++] =
 						NIC_SET_VF_LMAC_MAP(bgx, lmac);
 		nic->num_vf_en += lmac_cnt;
+		nic->lmac_cnt += lmac_cnt;
 
 		/* Program LMAC credits */
 		lmac_credit = (1ull << 1); /* channel credit enable */
@@ -715,6 +718,13 @@
 	case NIC_MBOX_MSG_CFG_DONE:
 		/* Last message of VF config msg sequence */
 		nic->vf_enabled[vf] = true;
+		if (vf >= nic->lmac_cnt)
+			goto unlock;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, true);
 		goto unlock;
 	case NIC_MBOX_MSG_SHUTDOWN:
 		/* First msg in VF teardown sequence */
@@ -722,6 +732,14 @@
 		if (vf >= nic->num_vf_en)
 			nic->sqs_used[vf - nic->num_vf_en] = false;
 		nic->pqs_vf[vf] = 0;
+
+		if (vf >= nic->lmac_cnt)
+			break;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, false);
 		break;
 	case NIC_MBOX_MSG_ALLOC_SQS:
 		nic_alloc_sqs(nic, &mbx.sqs_alloc);
@@ -940,7 +958,7 @@
 
 	mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
 
-	for (vf = 0; vf < nic->num_vf_en; vf++) {
+	for (vf = 0; vf < nic->lmac_cnt; vf++) {
 		/* Poll only if VF is UP */
 		if (!nic->vf_enabled[vf])
 			continue;
@@ -1074,8 +1092,7 @@
 
 	if (nic->check_link) {
 		/* Destroy work Queue */
-		cancel_delayed_work(&nic->dwork);
-		flush_workqueue(nic->check_link);
+		cancel_delayed_work_sync(&nic->dwork);
 		destroy_workqueue(nic->check_link);
 	}
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index af54c10..a12b2e3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -112,6 +112,13 @@
 
 	cmd->supported = 0;
 	cmd->transceiver = XCVR_EXTERNAL;
+
+	if (!nic->link_up) {
+		cmd->duplex = DUPLEX_UNKNOWN;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		return 0;
+	}
+
 	if (nic->speed <= 1000) {
 		cmd->port = PORT_MII;
 		cmd->autoneg = AUTONEG_ENABLE;
@@ -125,6 +132,13 @@
 	return 0;
 }
 
+static u32 nicvf_get_link(struct net_device *netdev)
+{
+	struct nicvf *nic = netdev_priv(netdev);
+
+	return nic->link_up;
+}
+
 static void nicvf_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *info)
 {
@@ -660,7 +674,7 @@
 
 static const struct ethtool_ops nicvf_ethtool_ops = {
 	.get_settings		= nicvf_get_settings,
-	.get_link		= ethtool_op_get_link,
+	.get_link		= nicvf_get_link,
 	.get_drvinfo		= nicvf_get_drvinfo,
 	.get_msglevel		= nicvf_get_msglevel,
 	.set_msglevel		= nicvf_set_msglevel,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a937772..dde8dc7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1057,6 +1057,7 @@
 
 	netif_carrier_off(netdev);
 	netif_tx_stop_all_queues(nic->netdev);
+	nic->link_up = false;
 
 	/* Teardown secondary qsets first */
 	if (!nic->sqs_mode) {
@@ -1211,9 +1212,6 @@
 	nic->drv_stats.txq_stop = 0;
 	nic->drv_stats.txq_wake = 0;
 
-	netif_carrier_on(netdev);
-	netif_tx_start_all_queues(netdev);
-
 	return 0;
 cleanup:
 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
@@ -1583,8 +1581,14 @@
 static void nicvf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct nicvf *nic = netdev_priv(netdev);
-	struct net_device *pnetdev = nic->pnicvf->netdev;
+	struct nicvf *nic;
+	struct net_device *pnetdev;
+
+	if (!netdev)
+		return;
+
+	nic = netdev_priv(netdev);
+	pnetdev = nic->pnicvf->netdev;
 
 	/* Check if this Qset is assigned to different VF.
 	 * If yes, clean primary and all secondary Qsets.
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index e404ea8..206b6a7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -592,7 +592,7 @@
 	/* Set threshold value for interrupt generation */
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
-			      qidx, nic->cq_coalesce_usecs);
+			      qidx, CMP_QUEUE_TIMER_THRESH);
 }
 
 /* Configures transmit queue */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index fb4957d..033e830 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -76,7 +76,7 @@
 #define CMP_QSIZE		CMP_QUEUE_SIZE2
 #define CMP_QUEUE_LEN		(1ULL << (CMP_QSIZE + 10))
 #define CMP_QUEUE_CQE_THRESH	0
-#define CMP_QUEUE_TIMER_THRESH	220 /* 10usec */
+#define CMP_QUEUE_TIMER_THRESH	80 /* ~2usec */
 
 #define RBDR_SIZE		RBDR_SIZE0
 #define RCV_BUF_COUNT		(1ULL << (RBDR_SIZE + 13))
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 180aa9f..9df26c2 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -186,6 +186,23 @@
 }
 EXPORT_SYMBOL(bgx_set_lmac_mac);
 
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
+{
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	u64 cfg;
+
+	if (!bgx)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	if (enable)
+		cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
+	else
+		cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
 	struct bgx *bgx = lmac->bgx;
@@ -612,6 +629,8 @@
 		lmac->last_duplex = 1;
 	} else {
 		lmac->link_up = 0;
+		lmac->last_speed = SPEED_UNKNOWN;
+		lmac->last_duplex = DUPLEX_UNKNOWN;
 	}
 
 	if (lmac->last_link != lmac->link_up) {
@@ -654,8 +673,7 @@
 	}
 
 	/* Enable lmac */
-	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG,
-		       CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
 
 	/* Restore default cfg, incase low level firmware changed it */
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
@@ -695,8 +713,7 @@
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
 		/* Destroy work queue */
-		cancel_delayed_work(&lmac->dwork);
-		flush_workqueue(lmac->check_link);
+		cancel_delayed_work_sync(&lmac->dwork);
 		destroy_workqueue(lmac->check_link);
 	}
 
@@ -1009,6 +1026,9 @@
 	struct bgx *bgx = NULL;
 	u8 lmac;
 
+	/* Load octeon mdio driver */
+	octeon_mdiobus_force_mod_depencency();
+
 	bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
 	if (!bgx)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 07b7ec66..149e179 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -182,6 +182,8 @@
 #define BCAST_ACCEPT	1
 #define CAM_ACCEPT	1
 
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
 void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
 unsigned bgx_get_map(int node);
 int bgx_get_lmac_count(int node, int bgx);
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index ed41559..b553409 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -98,8 +98,7 @@
 #elif defined(__mips__)
 static int csr0 = 0x00200000 | 0x4000;
 #else
-#warning Processor architecture undefined!
-static int csr0 = 0x00A00000 | 0x4800;
+static int csr0;
 #endif
 
 /* Operational parameters that usually are not changed. */
@@ -1982,6 +1981,12 @@
 	pr_info("%s", version);
 #endif
 
+	if (!csr0) {
+		pr_warn("tulip: unknown CPU architecture, using default csr0\n");
+		/* default to 8 longword cache line alignment */
+		csr0 = 0x00A00000 | 0x4800;
+	}
+
 	/* copy module parms into globals */
 	tulip_rx_copybreak = rx_copybreak;
 	tulip_max_interrupt_work = max_interrupt_work;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 9beb3d3..3c0e4d5 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -907,7 +907,7 @@
 #elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM)
 	i |= 0x4800;
 #else
-#warning Processor architecture undefined
+	dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n");
 	i |= 0x4800;
 #endif
 	iowrite32(i, ioaddr + PCIBusCfg);
diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig
index f6e858d..ebdc832 100644
--- a/drivers/net/ethernet/dlink/Kconfig
+++ b/drivers/net/ethernet/dlink/Kconfig
@@ -17,15 +17,16 @@
 if NET_VENDOR_DLINK
 
 config DL2K
-	tristate "DL2000/TC902x-based Gigabit Ethernet support"
+	tristate "DL2000/TC902x/IP1000A-based Gigabit Ethernet support"
 	depends on PCI
 	select CRC32
 	---help---
-	  This driver supports DL2000/TC902x-based Gigabit ethernet cards,
+	  This driver supports DL2000/TC902x/IP1000A-based Gigabit ethernet cards,
 	  which includes
 	  D-Link DGE-550T Gigabit Ethernet Adapter.
 	  D-Link DL2000-based Gigabit Ethernet Adapter.
 	  Sundance/Tamarack TC902x Gigabit Ethernet Adapter.
+	  ICPlus IP1000A-based cards
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called dl2k.
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index cf0a5fc..ccca479 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -253,6 +253,19 @@
 	if (err)
 		goto err_out_unmap_rx;
 
+	if (np->chip_id == CHIP_IP1000A &&
+	    (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) {
+		/* PHY magic taken from ipg driver, undocumented registers */
+		mii_write(dev, np->phy_addr, 31, 0x0001);
+		mii_write(dev, np->phy_addr, 27, 0x01e0);
+		mii_write(dev, np->phy_addr, 31, 0x0002);
+		mii_write(dev, np->phy_addr, 27, 0xeb8e);
+		mii_write(dev, np->phy_addr, 31, 0x0000);
+		mii_write(dev, np->phy_addr, 30, 0x005e);
+		/* advertise 1000BASE-T half & full duplex, prefer MASTER */
+		mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700);
+	}
+
 	/* Fiber device? */
 	np->phy_media = (dr16(ASICCtrl) & PhyMedia) ? 1 : 0;
 	np->link_status = 0;
@@ -361,6 +374,11 @@
 	for (i = 0; i < 6; i++)
 		dev->dev_addr[i] = psrom->mac_addr[i];
 
+	if (np->chip_id == CHIP_IP1000A) {
+		np->led_mode = psrom->led_mode;
+		return 0;
+	}
+
 	if (np->pdev->vendor != PCI_VENDOR_ID_DLINK) {
 		return 0;
 	}
@@ -406,6 +424,28 @@
 	return 0;
 }
 
+static void rio_set_led_mode(struct net_device *dev)
+{
+	struct netdev_private *np = netdev_priv(dev);
+	void __iomem *ioaddr = np->ioaddr;
+	u32 mode;
+
+	if (np->chip_id != CHIP_IP1000A)
+		return;
+
+	mode = dr32(ASICCtrl);
+	mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
+
+	if (np->led_mode & 0x01)
+		mode |= IPG_AC_LED_MODE;
+	if (np->led_mode & 0x02)
+		mode |= IPG_AC_LED_MODE_BIT_1;
+	if (np->led_mode & 0x08)
+		mode |= IPG_AC_LED_SPEED;
+
+	dw32(ASICCtrl, mode);
+}
+
 static int
 rio_open (struct net_device *dev)
 {
@@ -424,6 +464,8 @@
 	     GlobalReset | DMAReset | FIFOReset | NetworkReset | HostReset);
 	mdelay(10);
 
+	rio_set_led_mode(dev);
+
 	/* DebugCtrl bit 4, 5, 9 must set */
 	dw32(DebugCtrl, dr32(DebugCtrl) | 0x0230);
 
@@ -433,9 +475,13 @@
 
 	alloc_list (dev);
 
-	/* Get station address */
-	for (i = 0; i < 6; i++)
-		dw8(StationAddr0 + i, dev->dev_addr[i]);
+	/* Set station address */
+	/* 16 or 32-bit access is required by TC9020 datasheet but 8-bit works
+	 * too. However, it doesn't work on IP1000A so we use 16-bit access.
+	 */
+	for (i = 0; i < 3; i++)
+		dw16(StationAddr0 + 2 * i,
+		     cpu_to_le16(((u16 *)dev->dev_addr)[i]));
 
 	set_multicast (dev);
 	if (np->coalesce) {
@@ -780,6 +826,7 @@
 				break;
 			mdelay (1);
 		}
+		rio_set_led_mode(dev);
 		rio_free_tx (dev, 1);
 		/* Reset TFDListPtr */
 		dw32(TFDListPtr0, np->tx_ring_dma +
@@ -799,6 +846,7 @@
 				break;
 			mdelay (1);
 		}
+		rio_set_led_mode(dev);
 		/* Let TxStartThresh stay default value */
 	}
 	/* Maximum Collisions */
@@ -965,6 +1013,7 @@
 			dev->name, int_status);
 		dw16(ASICCtrl + 2, GlobalReset | HostReset);
 		mdelay (500);
+		rio_set_led_mode(dev);
 	}
 }
 
diff --git a/drivers/net/ethernet/dlink/dl2k.h b/drivers/net/ethernet/dlink/dl2k.h
index 23c07b0..8f4f612 100644
--- a/drivers/net/ethernet/dlink/dl2k.h
+++ b/drivers/net/ethernet/dlink/dl2k.h
@@ -211,6 +211,10 @@
 	ResetBusy = 0x0400,
 };
 
+#define IPG_AC_LED_MODE		BIT(14)
+#define IPG_AC_LED_SPEED	BIT(27)
+#define IPG_AC_LED_MODE_BIT_1	BIT(29)
+
 /* Transmit Frame Control bits */
 enum TFC_bits {
 	DwordAlign = 0x00000000,
@@ -332,7 +336,10 @@
 	u16 asic_ctrl;		/* 0x02 */
 	u16 sub_vendor_id;	/* 0x04 */
 	u16 sub_system_id;	/* 0x06 */
-	u16 reserved1[12];	/* 0x08-0x1f */
+	u16 pci_base_1;		/* 0x08 (IP1000A only) */
+	u16 pci_base_2;		/* 0x0a (IP1000A only) */
+	u16 led_mode;		/* 0x0c (IP1000A only) */
+	u16 reserved1[9];	/* 0x0e-0x1f */
 	u8 mac_addr[6];		/* 0x20-0x25 */
 	u8 reserved2[10];	/* 0x26-0x2f */
 	u8 sib[204];		/* 0x30-0xfb */
@@ -397,6 +404,7 @@
 	u16 advertising;	/* NWay media advertisement */
 	u16 negotiate;		/* Negotiated media */
 	int phy_addr;		/* PHY addresses. */
+	u16 led_mode;		/* LED mode read from EEPROM (IP1000A only) */
 };
 
 /* The station address location in the EEPROM. */
@@ -407,10 +415,15 @@
         class_mask              of the class are honored during the comparison.
         driver_data             Data private to the driver.
 */
+#define CHIP_IP1000A	1
 
 static const struct pci_device_id rio_pci_tbl[] = {
 	{0x1186, 0x4000, PCI_ANY_ID, PCI_ANY_ID, },
 	{0x13f0, 0x1021, PCI_ANY_ID, PCI_ANY_ID, },
+	{ PCI_VDEVICE(SUNDANCE,	0x1023), CHIP_IP1000A },
+	{ PCI_VDEVICE(SUNDANCE,	0x2021), CHIP_IP1000A },
+	{ PCI_VDEVICE(DLINK,	0x9021), CHIP_IP1000A },
+	{ PCI_VDEVICE(DLINK,	0x4020), CHIP_IP1000A },
 	{ }
 };
 MODULE_DEVICE_TABLE (pci, rio_pci_tbl);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index f4cb8e4..734f655 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1062,9 +1062,7 @@
 static int be_set_rss_hash_opts(struct be_adapter *adapter,
 				struct ethtool_rxnfc *cmd)
 {
-	struct be_rx_obj *rxo;
-	int status = 0, i, j;
-	u8 rsstable[128];
+	int status;
 	u32 rss_flags = adapter->rss_info.rss_flags;
 
 	if (cmd->data != L3_RSS_FLAGS &&
@@ -1113,20 +1111,11 @@
 	}
 
 	if (rss_flags == adapter->rss_info.rss_flags)
-		return status;
-
-	if (be_multi_rxq(adapter)) {
-		for (j = 0; j < 128; j += adapter->num_rss_qs) {
-			for_all_rss_queues(adapter, rxo, i) {
-				if ((j + i) >= 128)
-					break;
-				rsstable[j + i] = rxo->rss_id;
-			}
-		}
-	}
+		return 0;
 
 	status = be_cmd_rss_config(adapter, adapter->rss_info.rsstable,
-				   rss_flags, 128, adapter->rss_info.rss_hkey);
+				   rss_flags, RSS_INDIR_TABLE_LEN,
+				   adapter->rss_info.rss_hkey);
 	if (!status)
 		adapter->rss_info.rss_flags = rss_flags;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index eb48a97..b6ad029 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -3518,7 +3518,7 @@
 
 	netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
 	rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
-			       128, rss_key);
+			       RSS_INDIR_TABLE_LEN, rss_key);
 	if (rc) {
 		rss->rss_flags = RSS_ENABLE_NONE;
 		return rc;
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index ff76d4e..bee32a9 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -7,7 +7,8 @@
 	default y
 	depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
 		   M523x || M527x || M5272 || M528x || M520x || M532x || \
-		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM)
+		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
+		   ARCH_LAYERSCAPE
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 3e6b9b4..7cf8984 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -647,9 +647,9 @@
 	if (model && strcasecmp(model, "FEC")) {
 		gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1);
 		gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2);
-		if (gfar_irq(grp, TX)->irq == NO_IRQ ||
-		    gfar_irq(grp, RX)->irq == NO_IRQ ||
-		    gfar_irq(grp, ER)->irq == NO_IRQ)
+		if (!gfar_irq(grp, TX)->irq ||
+		    !gfar_irq(grp, RX)->irq ||
+		    !gfar_irq(grp, ER)->irq)
 			return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 664d0c2..b40fba9 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -467,7 +467,7 @@
 
 	etsects->irq = platform_get_irq(dev, 0);
 
-	if (etsects->irq == NO_IRQ) {
+	if (etsects->irq < 0) {
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
diff --git a/drivers/net/ethernet/icplus/Kconfig b/drivers/net/ethernet/icplus/Kconfig
deleted file mode 100644
index 14a66e9..0000000
--- a/drivers/net/ethernet/icplus/Kconfig
+++ /dev/null
@@ -1,13 +0,0 @@
-#
-# IC Plus device configuration
-#
-
-config IP1000
-	tristate "IP1000 Gigabit Ethernet support"
-	depends on PCI
-	select MII
-	---help---
-	  This driver supports IP1000 gigabit Ethernet cards.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called ipg.  This is recommended.
diff --git a/drivers/net/ethernet/icplus/Makefile b/drivers/net/ethernet/icplus/Makefile
deleted file mode 100644
index 5bc87c1..0000000
--- a/drivers/net/ethernet/icplus/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for the IC Plus device drivers
-#
-
-obj-$(CONFIG_IP1000) += ipg.o
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
deleted file mode 100644
index c3b6af8..0000000
--- a/drivers/net/ethernet/icplus/ipg.c
+++ /dev/null
@@ -1,2300 +0,0 @@
-/*
- * ipg.c: Device Driver for the IP1000 Gigabit Ethernet Adapter
- *
- * Copyright (C) 2003, 2007  IC Plus Corp
- *
- * Original Author:
- *
- *   Craig Rich
- *   Sundance Technology, Inc.
- *   www.sundanceti.com
- *   craig_rich@sundanceti.com
- *
- * Current Maintainer:
- *
- *   Sorbica Shieh.
- *   http://www.icplus.com.tw
- *   sorbica@icplus.com.tw
- *
- *   Jesse Huang
- *   http://www.icplus.com.tw
- *   jesse@icplus.com.tw
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/crc32.h>
-#include <linux/ethtool.h>
-#include <linux/interrupt.h>
-#include <linux/gfp.h>
-#include <linux/mii.h>
-#include <linux/mutex.h>
-
-#include <asm/div64.h>
-
-#define IPG_RX_RING_BYTES	(sizeof(struct ipg_rx) * IPG_RFDLIST_LENGTH)
-#define IPG_TX_RING_BYTES	(sizeof(struct ipg_tx) * IPG_TFDLIST_LENGTH)
-#define IPG_RESET_MASK \
-	(IPG_AC_GLOBAL_RESET | IPG_AC_RX_RESET | IPG_AC_TX_RESET | \
-	 IPG_AC_DMA | IPG_AC_FIFO | IPG_AC_NETWORK | IPG_AC_HOST | \
-	 IPG_AC_AUTO_INIT)
-
-#define ipg_w32(val32, reg)	iowrite32((val32), ioaddr + (reg))
-#define ipg_w16(val16, reg)	iowrite16((val16), ioaddr + (reg))
-#define ipg_w8(val8, reg)	iowrite8((val8), ioaddr + (reg))
-
-#define ipg_r32(reg)		ioread32(ioaddr + (reg))
-#define ipg_r16(reg)		ioread16(ioaddr + (reg))
-#define ipg_r8(reg)		ioread8(ioaddr + (reg))
-
-enum {
-	netdev_io_size = 128
-};
-
-#include "ipg.h"
-#define DRV_NAME	"ipg"
-
-MODULE_AUTHOR("IC Plus Corp. 2003");
-MODULE_DESCRIPTION("IC Plus IP1000 Gigabit Ethernet Adapter Linux Driver");
-MODULE_LICENSE("GPL");
-
-/*
- * Defaults
- */
-#define IPG_MAX_RXFRAME_SIZE	0x0600
-#define IPG_RXFRAG_SIZE		0x0600
-#define IPG_RXSUPPORT_SIZE	0x0600
-#define IPG_IS_JUMBO		false
-
-/*
- * Variable record -- index by leading revision/length
- * Revision/Length(=N*4), Address1, Data1, Address2, Data2,...,AddressN,DataN
- */
-static const unsigned short DefaultPhyParam[] = {
-	/* 11/12/03 IP1000A v1-3 rev=0x40 */
-	/*--------------------------------------------------------------------------
-	(0x4000|(15*4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 22, 0x85bd, 24, 0xfff2,
-				 27, 0x0c10, 28, 0x0c10, 29, 0x2c10, 31, 0x0003, 23, 0x92f6,
-				 31, 0x0000, 23, 0x003d, 30, 0x00de, 20, 0x20e7,  9, 0x0700,
-	  --------------------------------------------------------------------------*/
-	/* 12/17/03 IP1000A v1-4 rev=0x40 */
-	(0x4000 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
-	    0x0000,
-	30, 0x005e, 9, 0x0700,
-	/* 01/09/04 IP1000A v1-5 rev=0x41 */
-	(0x4100 | (07 * 4)), 31, 0x0001, 27, 0x01e0, 31, 0x0002, 27, 0xeb8e, 31,
-	    0x0000,
-	30, 0x005e, 9, 0x0700,
-	0x0000
-};
-
-static const char * const ipg_brand_name[] = {
-	"IC PLUS IP1000 1000/100/10 based NIC",
-	"Sundance Technology ST2021 based NIC",
-	"Tamarack Microelectronics TC9020/9021 based NIC",
-	"D-Link NIC IP1000A"
-};
-
-static const struct pci_device_id ipg_pci_tbl[] = {
-	{ PCI_VDEVICE(SUNDANCE,	0x1023), 0 },
-	{ PCI_VDEVICE(SUNDANCE,	0x2021), 1 },
-	{ PCI_VDEVICE(DLINK,	0x9021), 2 },
-	{ PCI_VDEVICE(DLINK,	0x4020), 3 },
-	{ 0, }
-};
-
-MODULE_DEVICE_TABLE(pci, ipg_pci_tbl);
-
-static inline void __iomem *ipg_ioaddr(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	return sp->ioaddr;
-}
-
-#ifdef IPG_DEBUG
-static void ipg_dump_rfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	u32 offset;
-
-	IPG_DEBUG_MSG("_dump_rfdlist\n");
-
-	netdev_info(dev, "rx_current = %02x\n", sp->rx_current);
-	netdev_info(dev, "rx_dirty   = %02x\n", sp->rx_dirty);
-	netdev_info(dev, "RFDList start address = %016lx\n",
-		    (unsigned long)sp->rxd_map);
-	netdev_info(dev, "RFDListPtr register   = %08x%08x\n",
-		    ipg_r32(IPG_RFDLISTPTR1), ipg_r32(IPG_RFDLISTPTR0));
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		offset = (u32) &sp->rxd[i].next_desc - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x RFDNextPtr = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].next_desc);
-		offset = (u32) &sp->rxd[i].rfs - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x RFS        = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].rfs);
-		offset = (u32) &sp->rxd[i].frag_info - (u32) sp->rxd;
-		netdev_info(dev, "%02x %04x frag_info   = %016lx\n",
-			    i, offset, (unsigned long)sp->rxd[i].frag_info);
-	}
-}
-
-static void ipg_dump_tfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	u32 offset;
-
-	IPG_DEBUG_MSG("_dump_tfdlist\n");
-
-	netdev_info(dev, "tx_current         = %02x\n", sp->tx_current);
-	netdev_info(dev, "tx_dirty = %02x\n", sp->tx_dirty);
-	netdev_info(dev, "TFDList start address = %016lx\n",
-		    (unsigned long) sp->txd_map);
-	netdev_info(dev, "TFDListPtr register   = %08x%08x\n",
-		    ipg_r32(IPG_TFDLISTPTR1), ipg_r32(IPG_TFDLISTPTR0));
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		offset = (u32) &sp->txd[i].next_desc - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x TFDNextPtr = %016lx\n",
-			    i, offset, (unsigned long)sp->txd[i].next_desc);
-
-		offset = (u32) &sp->txd[i].tfc - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x TFC        = %016lx\n",
-			    i, offset, (unsigned long) sp->txd[i].tfc);
-		offset = (u32) &sp->txd[i].frag_info - (u32) sp->txd;
-		netdev_info(dev, "%02x %04x frag_info   = %016lx\n",
-			    i, offset, (unsigned long) sp->txd[i].frag_info);
-	}
-}
-#endif
-
-static void ipg_write_phy_ctl(void __iomem *ioaddr, u8 data)
-{
-	ipg_w8(IPG_PC_RSVD_MASK & data, PHY_CTRL);
-	ndelay(IPG_PC_PHYCTRLWAIT_NS);
-}
-
-static void ipg_drive_phy_ctl_low_high(void __iomem *ioaddr, u8 data)
-{
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | data);
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | data);
-}
-
-static void send_three_state(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	phyctrlpolarity |= (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR;
-
-	ipg_drive_phy_ctl_low_high(ioaddr, phyctrlpolarity);
-}
-
-static void send_end(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	ipg_w8((IPG_PC_MGMTCLK_LO | (IPG_PC_MGMTDATA & 0) | IPG_PC_MGMTDIR |
-		phyctrlpolarity) & IPG_PC_RSVD_MASK, PHY_CTRL);
-}
-
-static u16 read_phy_bit(void __iomem *ioaddr, u8 phyctrlpolarity)
-{
-	u16 bit_data;
-
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | phyctrlpolarity);
-
-	bit_data = ((ipg_r8(PHY_CTRL) & IPG_PC_MGMTDATA) >> 1) & 1;
-
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | phyctrlpolarity);
-
-	return bit_data;
-}
-
-/*
- * Read a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static int mdio_read(struct net_device *dev, int phy_id, int phy_reg)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	/*
-	 * The GMII mangement frame structure for a read is as follows:
-	 *
-	 * |Preamble|st|op|phyad|regad|ta|      data      |idle|
-	 * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z   |
-	 *
-	 * <32 1s> = 32 consecutive logic 1 values
-	 * A = bit of Physical Layer device address (MSB first)
-	 * R = bit of register address (MSB first)
-	 * z = High impedance state
-	 * D = bit of read data (MSB first)
-	 *
-	 * Transmission order is 'Preamble' field first, bits transmitted
-	 * left to right (first to last).
-	 */
-	struct {
-		u32 field;
-		unsigned int len;
-	} p[] = {
-		{ GMII_PREAMBLE,	32 },	/* Preamble */
-		{ GMII_ST,		2  },	/* ST */
-		{ GMII_READ,		2  },	/* OP */
-		{ phy_id,		5  },	/* PHYAD */
-		{ phy_reg,		5  },	/* REGAD */
-		{ 0x0000,		2  },	/* TA */
-		{ 0x0000,		16 },	/* DATA */
-		{ 0x0000,		1  }	/* IDLE */
-	};
-	unsigned int i, j;
-	u8 polarity, data;
-
-	polarity  = ipg_r8(PHY_CTRL);
-	polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
-	/* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
-	for (j = 0; j < 5; j++) {
-		for (i = 0; i < p[j].len; i++) {
-			/* For each variable length field, the MSB must be
-			 * transmitted first. Rotate through the field bits,
-			 * starting with the MSB, and move each bit into the
-			 * the 1st (2^1) bit position (this is the bit position
-			 * corresponding to the MgmtData bit of the PhyCtrl
-			 * register for the IPG).
-			 *
-			 * Example: ST = 01;
-			 *
-			 *          First write a '0' to bit 1 of the PhyCtrl
-			 *          register, then write a '1' to bit 1 of the
-			 *          PhyCtrl register.
-			 *
-			 * To do this, right shift the MSB of ST by the value:
-			 * [field length - 1 - #ST bits already written]
-			 * then left shift this result by 1.
-			 */
-			data  = (p[j].field >> (p[j].len - 1 - i)) << 1;
-			data &= IPG_PC_MGMTDATA;
-			data |= polarity | IPG_PC_MGMTDIR;
-
-			ipg_drive_phy_ctl_low_high(ioaddr, data);
-		}
-	}
-
-	send_three_state(ioaddr, polarity);
-
-	read_phy_bit(ioaddr, polarity);
-
-	/*
-	 * For a read cycle, the bits for the next two fields (TA and
-	 * DATA) are driven by the PHY (the IPG reads these bits).
-	 */
-	for (i = 0; i < p[6].len; i++) {
-		p[6].field |=
-		    (read_phy_bit(ioaddr, polarity) << (p[6].len - 1 - i));
-	}
-
-	send_three_state(ioaddr, polarity);
-	send_three_state(ioaddr, polarity);
-	send_three_state(ioaddr, polarity);
-	send_end(ioaddr, polarity);
-
-	/* Return the value of the DATA field. */
-	return p[6].field;
-}
-
-/*
- * Write to a register from the Physical Layer device located
- * on the IPG NIC, using the IPG PHYCTRL register.
- */
-static void mdio_write(struct net_device *dev, int phy_id, int phy_reg, int val)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	/*
-	 * The GMII mangement frame structure for a read is as follows:
-	 *
-	 * |Preamble|st|op|phyad|regad|ta|      data      |idle|
-	 * |< 32 1s>|01|10|AAAAA|RRRRR|z0|DDDDDDDDDDDDDDDD|z   |
-	 *
-	 * <32 1s> = 32 consecutive logic 1 values
-	 * A = bit of Physical Layer device address (MSB first)
-	 * R = bit of register address (MSB first)
-	 * z = High impedance state
-	 * D = bit of write data (MSB first)
-	 *
-	 * Transmission order is 'Preamble' field first, bits transmitted
-	 * left to right (first to last).
-	 */
-	struct {
-		u32 field;
-		unsigned int len;
-	} p[] = {
-		{ GMII_PREAMBLE,	32 },	/* Preamble */
-		{ GMII_ST,		2  },	/* ST */
-		{ GMII_WRITE,		2  },	/* OP */
-		{ phy_id,		5  },	/* PHYAD */
-		{ phy_reg,		5  },	/* REGAD */
-		{ 0x0002,		2  },	/* TA */
-		{ val & 0xffff,		16 },	/* DATA */
-		{ 0x0000,		1  }	/* IDLE */
-	};
-	unsigned int i, j;
-	u8 polarity, data;
-
-	polarity  = ipg_r8(PHY_CTRL);
-	polarity &= (IPG_PC_DUPLEX_POLARITY | IPG_PC_LINK_POLARITY);
-
-	/* Create the Preamble, ST, OP, PHYAD, and REGAD field. */
-	for (j = 0; j < 7; j++) {
-		for (i = 0; i < p[j].len; i++) {
-			/* For each variable length field, the MSB must be
-			 * transmitted first. Rotate through the field bits,
-			 * starting with the MSB, and move each bit into the
-			 * the 1st (2^1) bit position (this is the bit position
-			 * corresponding to the MgmtData bit of the PhyCtrl
-			 * register for the IPG).
-			 *
-			 * Example: ST = 01;
-			 *
-			 *          First write a '0' to bit 1 of the PhyCtrl
-			 *          register, then write a '1' to bit 1 of the
-			 *          PhyCtrl register.
-			 *
-			 * To do this, right shift the MSB of ST by the value:
-			 * [field length - 1 - #ST bits already written]
-			 * then left shift this result by 1.
-			 */
-			data  = (p[j].field >> (p[j].len - 1 - i)) << 1;
-			data &= IPG_PC_MGMTDATA;
-			data |= polarity | IPG_PC_MGMTDIR;
-
-			ipg_drive_phy_ctl_low_high(ioaddr, data);
-		}
-	}
-
-	/* The last cycle is a tri-state, so read from the PHY. */
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_LO | polarity);
-	ipg_r8(PHY_CTRL);
-	ipg_write_phy_ctl(ioaddr, IPG_PC_MGMTCLK_HI | polarity);
-}
-
-static void ipg_set_led_mode(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	u32 mode;
-
-	mode = ipg_r32(ASIC_CTRL);
-	mode &= ~(IPG_AC_LED_MODE_BIT_1 | IPG_AC_LED_MODE | IPG_AC_LED_SPEED);
-
-	if ((sp->led_mode & 0x03) > 1)
-		mode |= IPG_AC_LED_MODE_BIT_1;	/* Write Asic Control Bit 29 */
-
-	if ((sp->led_mode & 0x01) == 1)
-		mode |= IPG_AC_LED_MODE;	/* Write Asic Control Bit 14 */
-
-	if ((sp->led_mode & 0x08) == 8)
-		mode |= IPG_AC_LED_SPEED;	/* Write Asic Control Bit 27 */
-
-	ipg_w32(mode, ASIC_CTRL);
-}
-
-static void ipg_set_phy_set(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	int physet;
-
-	physet = ipg_r8(PHY_SET);
-	physet &= ~(IPG_PS_MEM_LENB9B | IPG_PS_MEM_LEN9 | IPG_PS_NON_COMPDET);
-	physet |= ((sp->led_mode & 0x70) >> 4);
-	ipg_w8(physet, PHY_SET);
-}
-
-static int ipg_reset(struct net_device *dev, u32 resetflags)
-{
-	/* Assert functional resets via the IPG AsicCtrl
-	 * register as specified by the 'resetflags' input
-	 * parameter.
-	 */
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	unsigned int timeout_count = 0;
-
-	IPG_DEBUG_MSG("_reset\n");
-
-	ipg_w32(ipg_r32(ASIC_CTRL) | resetflags, ASIC_CTRL);
-
-	/* Delay added to account for problem with 10Mbps reset. */
-	mdelay(IPG_AC_RESETWAIT);
-
-	while (IPG_AC_RESET_BUSY & ipg_r32(ASIC_CTRL)) {
-		mdelay(IPG_AC_RESETWAIT);
-		if (++timeout_count > IPG_AC_RESET_TIMEOUT)
-			return -ETIME;
-	}
-	/* Set LED Mode in Asic Control */
-	ipg_set_led_mode(dev);
-
-	/* Set PHYSet Register Value */
-	ipg_set_phy_set(dev);
-	return 0;
-}
-
-/* Find the GMII PHY address. */
-static int ipg_find_phyaddr(struct net_device *dev)
-{
-	unsigned int phyaddr, i;
-
-	for (i = 0; i < 32; i++) {
-		u32 status;
-
-		/* Search for the correct PHY address among 32 possible. */
-		phyaddr = (IPG_NIC_PHY_ADDRESS + i) % 32;
-
-		/* 10/22/03 Grace change verify from GMII_PHY_STATUS to
-		   GMII_PHY_ID1
-		 */
-
-		status = mdio_read(dev, phyaddr, MII_BMSR);
-
-		if ((status != 0xFFFF) && (status != 0))
-			return phyaddr;
-	}
-
-	return 0x1f;
-}
-
-/*
- * Configure IPG based on result of IEEE 802.3 PHY
- * auto-negotiation.
- */
-static int ipg_config_autoneg(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int txflowcontrol;
-	unsigned int rxflowcontrol;
-	unsigned int fullduplex;
-	u32 mac_ctrl_val;
-	u32 asicctrl;
-	u8 phyctrl;
-	const char *speed;
-	const char *duplex;
-	const char *tx_desc;
-	const char *rx_desc;
-
-	IPG_DEBUG_MSG("_config_autoneg\n");
-
-	asicctrl = ipg_r32(ASIC_CTRL);
-	phyctrl = ipg_r8(PHY_CTRL);
-	mac_ctrl_val = ipg_r32(MAC_CTRL);
-
-	/* Set flags for use in resolving auto-negotiation, assuming
-	 * non-1000Mbps, half duplex, no flow control.
-	 */
-	fullduplex = 0;
-	txflowcontrol = 0;
-	rxflowcontrol = 0;
-
-	/* To accommodate a problem in 10Mbps operation,
-	 * set a global flag if PHY running in 10Mbps mode.
-	 */
-	sp->tenmbpsmode = 0;
-
-	/* Determine actual speed of operation. */
-	switch (phyctrl & IPG_PC_LINK_SPEED) {
-	case IPG_PC_LINK_SPEED_10MBPS:
-		speed = "10Mbps";
-		sp->tenmbpsmode = 1;
-		break;
-	case IPG_PC_LINK_SPEED_100MBPS:
-		speed = "100Mbps";
-		break;
-	case IPG_PC_LINK_SPEED_1000MBPS:
-		speed = "1000Mbps";
-		break;
-	default:
-		speed = "undefined!";
-		return 0;
-	}
-
-	netdev_info(dev, "Link speed = %s\n", speed);
-	if (sp->tenmbpsmode == 1)
-		netdev_info(dev, "10Mbps operational mode enabled\n");
-
-	if (phyctrl & IPG_PC_DUPLEX_STATUS) {
-		fullduplex = 1;
-		txflowcontrol = 1;
-		rxflowcontrol = 1;
-	}
-
-	/* Configure full duplex, and flow control. */
-	if (fullduplex == 1) {
-
-		/* Configure IPG for full duplex operation. */
-
-		duplex = "full";
-
-		mac_ctrl_val |= IPG_MC_DUPLEX_SELECT_FD;
-
-		if (txflowcontrol == 1) {
-			tx_desc  = "";
-			mac_ctrl_val |= IPG_MC_TX_FLOW_CONTROL_ENABLE;
-		} else {
-			tx_desc = "no ";
-			mac_ctrl_val &= ~IPG_MC_TX_FLOW_CONTROL_ENABLE;
-		}
-
-		if (rxflowcontrol == 1) {
-			rx_desc = "";
-			mac_ctrl_val |= IPG_MC_RX_FLOW_CONTROL_ENABLE;
-		} else {
-			rx_desc = "no ";
-			mac_ctrl_val &= ~IPG_MC_RX_FLOW_CONTROL_ENABLE;
-		}
-	} else {
-		duplex = "half";
-		tx_desc = "no ";
-		rx_desc = "no ";
-		mac_ctrl_val &= (~IPG_MC_DUPLEX_SELECT_FD &
-				 ~IPG_MC_TX_FLOW_CONTROL_ENABLE &
-				 ~IPG_MC_RX_FLOW_CONTROL_ENABLE);
-	}
-
-	netdev_info(dev, "setting %s duplex, %sTX, %sRX flow control\n",
-		    duplex, tx_desc, rx_desc);
-	ipg_w32(mac_ctrl_val, MAC_CTRL);
-
-	return 0;
-}
-
-/* Determine and configure multicast operation and set
- * receive mode for IPG.
- */
-static void ipg_nic_set_multicast_list(struct net_device *dev)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	struct netdev_hw_addr *ha;
-	unsigned int hashindex;
-	u32 hashtable[2];
-	u8 receivemode;
-
-	IPG_DEBUG_MSG("_nic_set_multicast_list\n");
-
-	receivemode = IPG_RM_RECEIVEUNICAST | IPG_RM_RECEIVEBROADCAST;
-
-	if (dev->flags & IFF_PROMISC) {
-		/* NIC to be configured in promiscuous mode. */
-		receivemode = IPG_RM_RECEIVEALLFRAMES;
-	} else if ((dev->flags & IFF_ALLMULTI) ||
-		   ((dev->flags & IFF_MULTICAST) &&
-		    (netdev_mc_count(dev) > IPG_MULTICAST_HASHTABLE_SIZE))) {
-		/* NIC to be configured to receive all multicast
-		 * frames. */
-		receivemode |= IPG_RM_RECEIVEMULTICAST;
-	} else if ((dev->flags & IFF_MULTICAST) && !netdev_mc_empty(dev)) {
-		/* NIC to be configured to receive selected
-		 * multicast addresses. */
-		receivemode |= IPG_RM_RECEIVEMULTICASTHASH;
-	}
-
-	/* Calculate the bits to set for the 64 bit, IPG HASHTABLE.
-	 * The IPG applies a cyclic-redundancy-check (the same CRC
-	 * used to calculate the frame data FCS) to the destination
-	 * address all incoming multicast frames whose destination
-	 * address has the multicast bit set. The least significant
-	 * 6 bits of the CRC result are used as an addressing index
-	 * into the hash table. If the value of the bit addressed by
-	 * this index is a 1, the frame is passed to the host system.
-	 */
-
-	/* Clear hashtable. */
-	hashtable[0] = 0x00000000;
-	hashtable[1] = 0x00000000;
-
-	/* Cycle through all multicast addresses to filter. */
-	netdev_for_each_mc_addr(ha, dev) {
-		/* Calculate CRC result for each multicast address. */
-		hashindex = crc32_le(0xffffffff, ha->addr,
-				     ETH_ALEN);
-
-		/* Use only the least significant 6 bits. */
-		hashindex = hashindex & 0x3F;
-
-		/* Within "hashtable", set bit number "hashindex"
-		 * to a logic 1.
-		 */
-		set_bit(hashindex, (void *)hashtable);
-	}
-
-	/* Write the value of the hashtable, to the 4, 16 bit
-	 * HASHTABLE IPG registers.
-	 */
-	ipg_w32(hashtable[0], HASHTABLE_0);
-	ipg_w32(hashtable[1], HASHTABLE_1);
-
-	ipg_w8(IPG_RM_RSVD_MASK & receivemode, RECEIVE_MODE);
-
-	IPG_DEBUG_MSG("ReceiveMode = %x\n", ipg_r8(RECEIVE_MODE));
-}
-
-static int ipg_io_config(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	u32 origmacctrl;
-	u32 restoremacctrl;
-
-	IPG_DEBUG_MSG("_io_config\n");
-
-	origmacctrl = ipg_r32(MAC_CTRL);
-
-	restoremacctrl = origmacctrl | IPG_MC_STATISTICS_ENABLE;
-
-	/* Based on compilation option, determine if FCS is to be
-	 * stripped on receive frames by IPG.
-	 */
-	if (!IPG_STRIP_FCS_ON_RX)
-		restoremacctrl |= IPG_MC_RCV_FCS;
-
-	/* Determine if transmitter and/or receiver are
-	 * enabled so we may restore MACCTRL correctly.
-	 */
-	if (origmacctrl & IPG_MC_TX_ENABLED)
-		restoremacctrl |= IPG_MC_TX_ENABLE;
-
-	if (origmacctrl & IPG_MC_RX_ENABLED)
-		restoremacctrl |= IPG_MC_RX_ENABLE;
-
-	/* Transmitter and receiver must be disabled before setting
-	 * IFSSelect.
-	 */
-	ipg_w32((origmacctrl & (IPG_MC_RX_DISABLE | IPG_MC_TX_DISABLE)) &
-		IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	/* Now that transmitter and receiver are disabled, write
-	 * to IFSSelect.
-	 */
-	ipg_w32((origmacctrl & IPG_MC_IFS_96BIT) & IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	/* Set RECEIVEMODE register. */
-	ipg_nic_set_multicast_list(dev);
-
-	ipg_w16(sp->max_rxframe_size, MAX_FRAME_SIZE);
-
-	ipg_w8(IPG_RXDMAPOLLPERIOD_VALUE,   RX_DMA_POLL_PERIOD);
-	ipg_w8(IPG_RXDMAURGENTTHRESH_VALUE, RX_DMA_URGENT_THRESH);
-	ipg_w8(IPG_RXDMABURSTTHRESH_VALUE,  RX_DMA_BURST_THRESH);
-	ipg_w8(IPG_TXDMAPOLLPERIOD_VALUE,   TX_DMA_POLL_PERIOD);
-	ipg_w8(IPG_TXDMAURGENTTHRESH_VALUE, TX_DMA_URGENT_THRESH);
-	ipg_w8(IPG_TXDMABURSTTHRESH_VALUE,  TX_DMA_BURST_THRESH);
-	ipg_w16((IPG_IE_HOST_ERROR | IPG_IE_TX_DMA_COMPLETE |
-		 IPG_IE_TX_COMPLETE | IPG_IE_INT_REQUESTED |
-		 IPG_IE_UPDATE_STATS | IPG_IE_LINK_EVENT |
-		 IPG_IE_RX_DMA_COMPLETE | IPG_IE_RX_DMA_PRIORITY), INT_ENABLE);
-	ipg_w16(IPG_FLOWONTHRESH_VALUE,  FLOW_ON_THRESH);
-	ipg_w16(IPG_FLOWOFFTHRESH_VALUE, FLOW_OFF_THRESH);
-
-	/* IPG multi-frag frame bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0200, DEBUG_CTRL);
-
-	/* IPG TX poll now bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0010, DEBUG_CTRL);
-
-	/* IPG RX poll now bug workaround.
-	 * Per silicon revision B3 eratta.
-	 */
-	ipg_w16(ipg_r16(DEBUG_CTRL) | 0x0020, DEBUG_CTRL);
-
-	/* Now restore MACCTRL to original setting. */
-	ipg_w32(IPG_MC_RSVD_MASK & restoremacctrl, MAC_CTRL);
-
-	/* Disable unused RMON statistics. */
-	ipg_w32(IPG_RZ_ALL, RMON_STATISTICS_MASK);
-
-	/* Disable unused MIB statistics. */
-	ipg_w32(IPG_SM_MACCONTROLFRAMESXMTD | IPG_SM_MACCONTROLFRAMESRCVD |
-		IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK | IPG_SM_TXJUMBOFRAMES |
-		IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK | IPG_SM_RXJUMBOFRAMES |
-		IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK |
-		IPG_SM_UDPCHECKSUMERRORS | IPG_SM_TCPCHECKSUMERRORS |
-		IPG_SM_IPCHECKSUMERRORS, STATISTICS_MASK);
-
-	return 0;
-}
-
-/*
- * Create a receive buffer within system memory and update
- * NIC private structure appropriately.
- */
-static int ipg_get_rxbuff(struct net_device *dev, int entry)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct ipg_rx *rxfd = sp->rxd + entry;
-	struct sk_buff *skb;
-	u64 rxfragsize;
-
-	IPG_DEBUG_MSG("_get_rxbuff\n");
-
-	skb = netdev_alloc_skb_ip_align(dev, sp->rxsupport_size);
-	if (!skb) {
-		sp->rx_buff[entry] = NULL;
-		return -ENOMEM;
-	}
-
-	/* Save the address of the sk_buff structure. */
-	sp->rx_buff[entry] = skb;
-
-	rxfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
-		sp->rx_buf_sz, PCI_DMA_FROMDEVICE));
-
-	/* Set the RFD fragment length. */
-	rxfragsize = sp->rxfrag_size;
-	rxfd->frag_info |= cpu_to_le64((rxfragsize << 48) & IPG_RFI_FRAGLEN);
-
-	return 0;
-}
-
-static int init_rfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_init_rfdlist\n");
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		struct ipg_rx *rxfd = sp->rxd + i;
-
-		if (sp->rx_buff[i]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb_irq(sp->rx_buff[i]);
-			sp->rx_buff[i] = NULL;
-		}
-
-		/* Clear out the RFS field. */
-		rxfd->rfs = 0x0000000000000000;
-
-		if (ipg_get_rxbuff(dev, i) < 0) {
-			/*
-			 * A receive buffer was not ready, break the
-			 * RFD list here.
-			 */
-			IPG_DEBUG_MSG("Cannot allocate Rx buffer\n");
-
-			/* Just in case we cannot allocate a single RFD.
-			 * Should not occur.
-			 */
-			if (i == 0) {
-				netdev_err(dev, "No memory available for RFD list\n");
-				return -ENOMEM;
-			}
-		}
-
-		rxfd->next_desc = cpu_to_le64(sp->rxd_map +
-			sizeof(struct ipg_rx)*(i + 1));
-	}
-	sp->rxd[i - 1].next_desc = cpu_to_le64(sp->rxd_map);
-
-	sp->rx_current = 0;
-	sp->rx_dirty = 0;
-
-	/* Write the location of the RFDList to the IPG. */
-	ipg_w32((u32) sp->rxd_map, RFD_LIST_PTR_0);
-	ipg_w32(0x00000000, RFD_LIST_PTR_1);
-
-	return 0;
-}
-
-static void init_tfdlist(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_init_tfdlist\n");
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		struct ipg_tx *txfd = sp->txd + i;
-
-		txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
-		if (sp->tx_buff[i]) {
-			dev_kfree_skb_irq(sp->tx_buff[i]);
-			sp->tx_buff[i] = NULL;
-		}
-
-		txfd->next_desc = cpu_to_le64(sp->txd_map +
-			sizeof(struct ipg_tx)*(i + 1));
-	}
-	sp->txd[i - 1].next_desc = cpu_to_le64(sp->txd_map);
-
-	sp->tx_current = 0;
-	sp->tx_dirty = 0;
-
-	/* Write the location of the TFDList to the IPG. */
-	IPG_DDEBUG_MSG("Starting TFDListPtr = %08x\n",
-		       (u32) sp->txd_map);
-	ipg_w32((u32) sp->txd_map, TFD_LIST_PTR_0);
-	ipg_w32(0x00000000, TFD_LIST_PTR_1);
-
-	sp->reset_current_tfd = 1;
-}
-
-/*
- * Free all transmit buffers which have already been transferred
- * via DMA to the IPG.
- */
-static void ipg_nic_txfree(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int released, pending, dirty;
-
-	IPG_DEBUG_MSG("_nic_txfree\n");
-
-	pending = sp->tx_current - sp->tx_dirty;
-	dirty = sp->tx_dirty % IPG_TFDLIST_LENGTH;
-
-	for (released = 0; released < pending; released++) {
-		struct sk_buff *skb = sp->tx_buff[dirty];
-		struct ipg_tx *txfd = sp->txd + dirty;
-
-		IPG_DEBUG_MSG("TFC = %016lx\n", (unsigned long) txfd->tfc);
-
-		/* Look at each TFD's TFC field beginning
-		 * at the last freed TFD up to the current TFD.
-		 * If the TFDDone bit is set, free the associated
-		 * buffer.
-		 */
-		if (!(txfd->tfc & cpu_to_le64(IPG_TFC_TFDDONE)))
-                        break;
-
-		/* Free the transmit buffer. */
-		if (skb) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
-				skb->len, PCI_DMA_TODEVICE);
-
-			dev_kfree_skb_irq(skb);
-
-			sp->tx_buff[dirty] = NULL;
-		}
-		dirty = (dirty + 1) % IPG_TFDLIST_LENGTH;
-	}
-
-	sp->tx_dirty += released;
-
-	if (netif_queue_stopped(dev) &&
-	    (sp->tx_current != (sp->tx_dirty + IPG_TFDLIST_LENGTH))) {
-		netif_wake_queue(dev);
-	}
-}
-
-static void ipg_tx_timeout(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-
-	ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA | IPG_AC_NETWORK |
-		  IPG_AC_FIFO);
-
-	spin_lock_irq(&sp->lock);
-
-	/* Re-configure after DMA reset. */
-	if (ipg_io_config(dev) < 0)
-		netdev_info(dev, "Error during re-configuration\n");
-
-	init_tfdlist(dev);
-
-	spin_unlock_irq(&sp->lock);
-
-	ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) & IPG_MC_RSVD_MASK,
-		MAC_CTRL);
-}
-
-/*
- * For TxComplete interrupts, free all transmit
- * buffers which have already been transferred via DMA
- * to the IPG.
- */
-static void ipg_nic_txcleanup(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_txcleanup\n");
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		/* Reading the TXSTATUS register clears the
-		 * TX_COMPLETE interrupt.
-		 */
-		u32 txstatusdword = ipg_r32(TX_STATUS);
-
-		IPG_DEBUG_MSG("TxStatus = %08x\n", txstatusdword);
-
-		/* Check for Transmit errors. Error bits only valid if
-		 * TX_COMPLETE bit in the TXSTATUS register is a 1.
-		 */
-		if (!(txstatusdword & IPG_TS_TX_COMPLETE))
-			break;
-
-		/* If in 10Mbps mode, indicate transmit is ready. */
-		if (sp->tenmbpsmode) {
-			netif_wake_queue(dev);
-		}
-
-		/* Transmit error, increment stat counters. */
-		if (txstatusdword & IPG_TS_TX_ERROR) {
-			IPG_DEBUG_MSG("Transmit error\n");
-			sp->stats.tx_errors++;
-		}
-
-		/* Late collision, re-enable transmitter. */
-		if (txstatusdword & IPG_TS_LATE_COLLISION) {
-			IPG_DEBUG_MSG("Late collision on transmit\n");
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-
-		/* Maximum collisions, re-enable transmitter. */
-		if (txstatusdword & IPG_TS_TX_MAX_COLL) {
-			IPG_DEBUG_MSG("Maximum collisions on transmit\n");
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-
-		/* Transmit underrun, reset and re-enable
-		 * transmitter.
-		 */
-		if (txstatusdword & IPG_TS_TX_UNDERRUN) {
-			IPG_DEBUG_MSG("Transmitter underrun\n");
-			sp->stats.tx_fifo_errors++;
-			ipg_reset(dev, IPG_AC_TX_RESET | IPG_AC_DMA |
-				  IPG_AC_NETWORK | IPG_AC_FIFO);
-
-			/* Re-configure after DMA reset. */
-			if (ipg_io_config(dev) < 0) {
-				netdev_info(dev, "Error during re-configuration\n");
-			}
-			init_tfdlist(dev);
-
-			ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_TX_ENABLE) &
-				IPG_MC_RSVD_MASK, MAC_CTRL);
-		}
-	}
-
-	ipg_nic_txfree(dev);
-}
-
-/* Provides statistical information about the IPG NIC. */
-static struct net_device_stats *ipg_nic_get_stats(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	u16 temp1;
-	u16 temp2;
-
-	IPG_DEBUG_MSG("_nic_get_stats\n");
-
-	/* Check to see if the NIC has been initialized via nic_open,
-	 * before trying to read statistic registers.
-	 */
-	if (!netif_running(dev))
-		return &sp->stats;
-
-	sp->stats.rx_packets += ipg_r32(IPG_FRAMESRCVDOK);
-	sp->stats.tx_packets += ipg_r32(IPG_FRAMESXMTDOK);
-	sp->stats.rx_bytes += ipg_r32(IPG_OCTETRCVOK);
-	sp->stats.tx_bytes += ipg_r32(IPG_OCTETXMTOK);
-	temp1 = ipg_r16(IPG_FRAMESLOSTRXERRORS);
-	sp->stats.rx_errors += temp1;
-	sp->stats.rx_missed_errors += temp1;
-	temp1 = ipg_r32(IPG_SINGLECOLFRAMES) + ipg_r32(IPG_MULTICOLFRAMES) +
-		ipg_r32(IPG_LATECOLLISIONS);
-	temp2 = ipg_r16(IPG_CARRIERSENSEERRORS);
-	sp->stats.collisions += temp1;
-	sp->stats.tx_dropped += ipg_r16(IPG_FRAMESABORTXSCOLLS);
-	sp->stats.tx_errors += ipg_r16(IPG_FRAMESWEXDEFERRAL) +
-		ipg_r32(IPG_FRAMESWDEFERREDXMT) + temp1 + temp2;
-	sp->stats.multicast += ipg_r32(IPG_MCSTOCTETRCVDOK);
-
-	/* detailed tx_errors */
-	sp->stats.tx_carrier_errors += temp2;
-
-	/* detailed rx_errors */
-	sp->stats.rx_length_errors += ipg_r16(IPG_INRANGELENGTHERRORS) +
-		ipg_r16(IPG_FRAMETOOLONGERRORS);
-	sp->stats.rx_crc_errors += ipg_r16(IPG_FRAMECHECKSEQERRORS);
-
-	/* Unutilized IPG statistic registers. */
-	ipg_r32(IPG_MCSTFRAMESRCVDOK);
-
-	return &sp->stats;
-}
-
-/* Restore used receive buffers. */
-static int ipg_nic_rxrestore(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	const unsigned int curr = sp->rx_current;
-	unsigned int dirty = sp->rx_dirty;
-
-	IPG_DEBUG_MSG("_nic_rxrestore\n");
-
-	for (dirty = sp->rx_dirty; curr - dirty > 0; dirty++) {
-		unsigned int entry = dirty % IPG_RFDLIST_LENGTH;
-
-		/* rx_copybreak may poke hole here and there. */
-		if (sp->rx_buff[entry])
-			continue;
-
-		/* Generate a new receive buffer to replace the
-		 * current buffer (which will be released by the
-		 * Linux system).
-		 */
-		if (ipg_get_rxbuff(dev, entry) < 0) {
-			IPG_DEBUG_MSG("Cannot allocate new Rx buffer\n");
-
-			break;
-		}
-
-		/* Reset the RFS field. */
-		sp->rxd[entry].rfs = 0x0000000000000000;
-	}
-	sp->rx_dirty = dirty;
-
-	return 0;
-}
-
-/* use jumboindex and jumbosize to control jumbo frame status
- * initial status is jumboindex=-1 and jumbosize=0
- * 1. jumboindex = -1 and jumbosize=0 : previous jumbo frame has been done.
- * 2. jumboindex != -1 and jumbosize != 0 : jumbo frame is not over size and receiving
- * 3. jumboindex = -1 and jumbosize != 0 : jumbo frame is over size, already dump
- *               previous receiving and need to continue dumping the current one
- */
-enum {
-	NORMAL_PACKET,
-	ERROR_PACKET
-};
-
-enum {
-	FRAME_NO_START_NO_END	= 0,
-	FRAME_WITH_START		= 1,
-	FRAME_WITH_END		= 10,
-	FRAME_WITH_START_WITH_END = 11
-};
-
-static void ipg_nic_rx_free_skb(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
-
-	if (sp->rx_buff[entry]) {
-		struct ipg_rx *rxfd = sp->rxd + entry;
-
-		pci_unmap_single(sp->pdev,
-			le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-			sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-		dev_kfree_skb_irq(sp->rx_buff[entry]);
-		sp->rx_buff[entry] = NULL;
-	}
-}
-
-static int ipg_nic_rx_check_frame_type(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct ipg_rx *rxfd = sp->rxd + (sp->rx_current % IPG_RFDLIST_LENGTH);
-	int type = FRAME_NO_START_NO_END;
-
-	if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART)
-		type += FRAME_WITH_START;
-	if (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND)
-		type += FRAME_WITH_END;
-	return type;
-}
-
-static int ipg_nic_rx_check_error(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int entry = sp->rx_current % IPG_RFDLIST_LENGTH;
-	struct ipg_rx *rxfd = sp->rxd + entry;
-
-	if (IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
-	     (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
-	      IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
-	      IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR))) {
-		IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
-			      (unsigned long) rxfd->rfs);
-
-		/* Increment general receive error statistic. */
-		sp->stats.rx_errors++;
-
-		/* Increment detailed receive error statistics. */
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
-			IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
-
-			sp->stats.rx_fifo_errors++;
-		}
-
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
-			IPG_DEBUG_MSG("RX runt occurred\n");
-			sp->stats.rx_length_errors++;
-		}
-
-		/* Do nothing for IPG_RFS_RXOVERSIZEDFRAME,
-		 * error count handled by a IPG statistic register.
-		 */
-
-		if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
-			IPG_DEBUG_MSG("RX alignment error occurred\n");
-			sp->stats.rx_frame_errors++;
-		}
-
-		/* Do nothing for IPG_RFS_RXFCSERROR, error count
-		 * handled by a IPG statistic register.
-		 */
-
-		/* Free the memory associated with the RX
-		 * buffer since it is erroneous and we will
-		 * not pass it to higher layer processes.
-		 */
-		if (sp->rx_buff[entry]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-			dev_kfree_skb_irq(sp->rx_buff[entry]);
-			sp->rx_buff[entry] = NULL;
-		}
-		return ERROR_PACKET;
-	}
-	return NORMAL_PACKET;
-}
-
-static void ipg_nic_rx_with_start_and_end(struct net_device *dev,
-					  struct ipg_nic_private *sp,
-					  struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-	struct sk_buff *skb;
-	int framelen;
-
-	if (jumbo->found_start) {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
-		return;
-
-	skb = sp->rx_buff[entry];
-	if (!skb)
-		return;
-
-	/* accept this frame and send to upper layer */
-	framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-	if (framelen > sp->rxfrag_size)
-		framelen = sp->rxfrag_size;
-
-	skb_put(skb, framelen);
-	skb->protocol = eth_type_trans(skb, dev);
-	skb_checksum_none_assert(skb);
-	netif_rx(skb);
-	sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_start(struct net_device *dev,
-				  struct ipg_nic_private *sp,
-				  struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-	struct pci_dev *pdev = sp->pdev;
-	struct sk_buff *skb;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) != NORMAL_PACKET)
-		return;
-
-	/* accept this frame and send to upper layer */
-	skb = sp->rx_buff[entry];
-	if (!skb)
-		return;
-
-	if (jumbo->found_start)
-		dev_kfree_skb_irq(jumbo->skb);
-
-	pci_unmap_single(pdev, le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-			 sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-	skb_put(skb, sp->rxfrag_size);
-
-	jumbo->found_start = 1;
-	jumbo->current_size = sp->rxfrag_size;
-	jumbo->skb = skb;
-
-	sp->rx_buff[entry] = NULL;
-}
-
-static void ipg_nic_rx_with_end(struct net_device *dev,
-				struct ipg_nic_private *sp,
-				struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
-		struct sk_buff *skb = sp->rx_buff[entry];
-
-		if (!skb)
-			return;
-
-		if (jumbo->found_start) {
-			int framelen, endframelen;
-
-			framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
-			endframelen = framelen - jumbo->current_size;
-			if (framelen > sp->rxsupport_size)
-				dev_kfree_skb_irq(jumbo->skb);
-			else {
-				memcpy(skb_put(jumbo->skb, endframelen),
-				       skb->data, endframelen);
-
-				jumbo->skb->protocol =
-				    eth_type_trans(jumbo->skb, dev);
-
-				skb_checksum_none_assert(jumbo->skb);
-				netif_rx(jumbo->skb);
-			}
-		}
-
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-
-		ipg_nic_rx_free_skb(dev);
-	} else {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-}
-
-static void ipg_nic_rx_no_start_no_end(struct net_device *dev,
-				       struct ipg_nic_private *sp,
-				       struct ipg_rx *rxfd, unsigned entry)
-{
-	struct ipg_jumbo *jumbo = &sp->jumbo;
-
-	/* 1: found error, 0 no error */
-	if (ipg_nic_rx_check_error(dev) == NORMAL_PACKET) {
-		struct sk_buff *skb = sp->rx_buff[entry];
-
-		if (skb) {
-			if (jumbo->found_start) {
-				jumbo->current_size += sp->rxfrag_size;
-				if (jumbo->current_size <= sp->rxsupport_size) {
-					memcpy(skb_put(jumbo->skb,
-						       sp->rxfrag_size),
-					       skb->data, sp->rxfrag_size);
-				}
-			}
-			ipg_nic_rx_free_skb(dev);
-		}
-	} else {
-		dev_kfree_skb_irq(jumbo->skb);
-		jumbo->found_start = 0;
-		jumbo->current_size = 0;
-		jumbo->skb = NULL;
-	}
-}
-
-static int ipg_nic_rx_jumbo(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int curr = sp->rx_current;
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_rx\n");
-
-	for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
-		unsigned int entry = curr % IPG_RFDLIST_LENGTH;
-		struct ipg_rx *rxfd = sp->rxd + entry;
-
-		if (!(rxfd->rfs & cpu_to_le64(IPG_RFS_RFDDONE)))
-			break;
-
-		switch (ipg_nic_rx_check_frame_type(dev)) {
-		case FRAME_WITH_START_WITH_END:
-			ipg_nic_rx_with_start_and_end(dev, sp, rxfd, entry);
-			break;
-		case FRAME_WITH_START:
-			ipg_nic_rx_with_start(dev, sp, rxfd, entry);
-			break;
-		case FRAME_WITH_END:
-			ipg_nic_rx_with_end(dev, sp, rxfd, entry);
-			break;
-		case FRAME_NO_START_NO_END:
-			ipg_nic_rx_no_start_no_end(dev, sp, rxfd, entry);
-			break;
-		}
-	}
-
-	sp->rx_current = curr;
-
-	if (i == IPG_MAXRFDPROCESS_COUNT) {
-		/* There are more RFDs to process, however the
-		 * allocated amount of RFD processing time has
-		 * expired. Assert Interrupt Requested to make
-		 * sure we come back to process the remaining RFDs.
-		 */
-		ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
-	}
-
-	ipg_nic_rxrestore(dev);
-
-	return 0;
-}
-
-static int ipg_nic_rx(struct net_device *dev)
-{
-	/* Transfer received Ethernet frames to higher network layers. */
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	unsigned int curr = sp->rx_current;
-	void __iomem *ioaddr = sp->ioaddr;
-	struct ipg_rx *rxfd;
-	unsigned int i;
-
-	IPG_DEBUG_MSG("_nic_rx\n");
-
-#define __RFS_MASK \
-	cpu_to_le64(IPG_RFS_RFDDONE | IPG_RFS_FRAMESTART | IPG_RFS_FRAMEEND)
-
-	for (i = 0; i < IPG_MAXRFDPROCESS_COUNT; i++, curr++) {
-		unsigned int entry = curr % IPG_RFDLIST_LENGTH;
-		struct sk_buff *skb = sp->rx_buff[entry];
-		unsigned int framelen;
-
-		rxfd = sp->rxd + entry;
-
-		if (((rxfd->rfs & __RFS_MASK) != __RFS_MASK) || !skb)
-			break;
-
-		/* Get received frame length. */
-		framelen = le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFRAMELEN;
-
-		/* Check for jumbo frame arrival with too small
-		 * RXFRAG_SIZE.
-		 */
-		if (framelen > sp->rxfrag_size) {
-			IPG_DEBUG_MSG
-			    ("RFS FrameLen > allocated fragment size\n");
-
-			framelen = sp->rxfrag_size;
-		}
-
-		if ((IPG_DROP_ON_RX_ETH_ERRORS && (le64_to_cpu(rxfd->rfs) &
-		       (IPG_RFS_RXFIFOOVERRUN | IPG_RFS_RXRUNTFRAME |
-			IPG_RFS_RXALIGNMENTERROR | IPG_RFS_RXFCSERROR |
-			IPG_RFS_RXOVERSIZEDFRAME | IPG_RFS_RXLENGTHERROR)))) {
-
-			IPG_DEBUG_MSG("Rx error, RFS = %016lx\n",
-				      (unsigned long int) rxfd->rfs);
-
-			/* Increment general receive error statistic. */
-			sp->stats.rx_errors++;
-
-			/* Increment detailed receive error statistics. */
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFIFOOVERRUN) {
-				IPG_DEBUG_MSG("RX FIFO overrun occurred\n");
-				sp->stats.rx_fifo_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXRUNTFRAME) {
-				IPG_DEBUG_MSG("RX runt occurred\n");
-				sp->stats.rx_length_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXOVERSIZEDFRAME) ;
-			/* Do nothing, error count handled by a IPG
-			 * statistic register.
-			 */
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXALIGNMENTERROR) {
-				IPG_DEBUG_MSG("RX alignment error occurred\n");
-				sp->stats.rx_frame_errors++;
-			}
-
-			if (le64_to_cpu(rxfd->rfs) & IPG_RFS_RXFCSERROR) ;
-			/* Do nothing, error count handled by a IPG
-			 * statistic register.
-			 */
-
-			/* Free the memory associated with the RX
-			 * buffer since it is erroneous and we will
-			 * not pass it to higher layer processes.
-			 */
-			if (skb) {
-				__le64 info = rxfd->frag_info;
-
-				pci_unmap_single(sp->pdev,
-					le64_to_cpu(info) & ~IPG_RFI_FRAGLEN,
-					sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-
-				dev_kfree_skb_irq(skb);
-			}
-		} else {
-
-			/* Adjust the new buffer length to accommodate the size
-			 * of the received frame.
-			 */
-			skb_put(skb, framelen);
-
-			/* Set the buffer's protocol field to Ethernet. */
-			skb->protocol = eth_type_trans(skb, dev);
-
-			/* The IPG encountered an error with (or
-			 * there were no) IP/TCP/UDP checksums.
-			 * This may or may not indicate an invalid
-			 * IP/TCP/UDP frame was received. Let the
-			 * upper layer decide.
-			 */
-			skb_checksum_none_assert(skb);
-
-			/* Hand off frame for higher layer processing.
-			 * The function netif_rx() releases the sk_buff
-			 * when processing completes.
-			 */
-			netif_rx(skb);
-		}
-
-		/* Assure RX buffer is not reused by IPG. */
-		sp->rx_buff[entry] = NULL;
-	}
-
-	/*
-	 * If there are more RFDs to process and the allocated amount of RFD
-	 * processing time has expired, assert Interrupt Requested to make
-	 * sure we come back to process the remaining RFDs.
-	 */
-	if (i == IPG_MAXRFDPROCESS_COUNT)
-		ipg_w32(ipg_r32(ASIC_CTRL) | IPG_AC_INT_REQUEST, ASIC_CTRL);
-
-#ifdef IPG_DEBUG
-	/* Check if the RFD list contained no receive frame data. */
-	if (!i)
-		sp->EmptyRFDListCount++;
-#endif
-	while ((le64_to_cpu(rxfd->rfs) & IPG_RFS_RFDDONE) &&
-	       !((le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMESTART) &&
-		 (le64_to_cpu(rxfd->rfs) & IPG_RFS_FRAMEEND))) {
-		unsigned int entry = curr++ % IPG_RFDLIST_LENGTH;
-
-		rxfd = sp->rxd + entry;
-
-		IPG_DEBUG_MSG("Frame requires multiple RFDs\n");
-
-		/* An unexpected event, additional code needed to handle
-		 * properly. So for the time being, just disregard the
-		 * frame.
-		 */
-
-		/* Free the memory associated with the RX
-		 * buffer since it is erroneous and we will
-		 * not pass it to higher layer processes.
-		 */
-		if (sp->rx_buff[entry]) {
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-			dev_kfree_skb_irq(sp->rx_buff[entry]);
-		}
-
-		/* Assure RX buffer is not reused by IPG. */
-		sp->rx_buff[entry] = NULL;
-	}
-
-	sp->rx_current = curr;
-
-	/* Check to see if there are a minimum number of used
-	 * RFDs before restoring any (should improve performance.)
-	 */
-	if ((curr - sp->rx_dirty) >= IPG_MINUSEDRFDSTOFREE)
-		ipg_nic_rxrestore(dev);
-
-	return 0;
-}
-
-static void ipg_reset_after_host_error(struct work_struct *work)
-{
-	struct ipg_nic_private *sp =
-		container_of(work, struct ipg_nic_private, task.work);
-	struct net_device *dev = sp->dev;
-
-	/*
-	 * Acknowledge HostError interrupt by resetting
-	 * IPG DMA and HOST.
-	 */
-	ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
-	init_rfdlist(dev);
-	init_tfdlist(dev);
-
-	if (ipg_io_config(dev) < 0) {
-		netdev_info(dev, "Cannot recover from PCI error\n");
-		schedule_delayed_work(&sp->task, HZ);
-	}
-}
-
-static irqreturn_t ipg_interrupt_handler(int irq, void *dev_inst)
-{
-	struct net_device *dev = dev_inst;
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int handled = 0;
-	u16 status;
-
-	IPG_DEBUG_MSG("_interrupt_handler\n");
-
-	if (sp->is_jumbo)
-		ipg_nic_rxrestore(dev);
-
-	spin_lock(&sp->lock);
-
-	/* Get interrupt source information, and acknowledge
-	 * some (i.e. TxDMAComplete, RxDMAComplete, RxEarly,
-	 * IntRequested, MacControlFrame, LinkEvent) interrupts
-	 * if issued. Also, all IPG interrupts are disabled by
-	 * reading IntStatusAck.
-	 */
-	status = ipg_r16(INT_STATUS_ACK);
-
-	IPG_DEBUG_MSG("IntStatusAck = %04x\n", status);
-
-	/* Shared IRQ of remove event. */
-	if (!(status & IPG_IS_RSVD_MASK))
-		goto out_enable;
-
-	handled = 1;
-
-	if (unlikely(!netif_running(dev)))
-		goto out_unlock;
-
-	/* If RFDListEnd interrupt, restore all used RFDs. */
-	if (status & IPG_IS_RFD_LIST_END) {
-		IPG_DEBUG_MSG("RFDListEnd Interrupt\n");
-
-		/* The RFD list end indicates an RFD was encountered
-		 * with a 0 NextPtr, or with an RFDDone bit set to 1
-		 * (indicating the RFD is not read for use by the
-		 * IPG.) Try to restore all RFDs.
-		 */
-		ipg_nic_rxrestore(dev);
-
-#ifdef IPG_DEBUG
-		/* Increment the RFDlistendCount counter. */
-		sp->RFDlistendCount++;
-#endif
-	}
-
-	/* If RFDListEnd, RxDMAPriority, RxDMAComplete, or
-	 * IntRequested interrupt, process received frames. */
-	if ((status & IPG_IS_RX_DMA_PRIORITY) ||
-	    (status & IPG_IS_RFD_LIST_END) ||
-	    (status & IPG_IS_RX_DMA_COMPLETE) ||
-	    (status & IPG_IS_INT_REQUESTED)) {
-#ifdef IPG_DEBUG
-		/* Increment the RFD list checked counter if interrupted
-		 * only to check the RFD list. */
-		if (status & (~(IPG_IS_RX_DMA_PRIORITY | IPG_IS_RFD_LIST_END |
-				IPG_IS_RX_DMA_COMPLETE | IPG_IS_INT_REQUESTED) &
-			       (IPG_IS_HOST_ERROR | IPG_IS_TX_DMA_COMPLETE |
-				IPG_IS_LINK_EVENT | IPG_IS_TX_COMPLETE |
-				IPG_IS_UPDATE_STATS)))
-			sp->RFDListCheckedCount++;
-#endif
-
-		if (sp->is_jumbo)
-			ipg_nic_rx_jumbo(dev);
-		else
-			ipg_nic_rx(dev);
-	}
-
-	/* If TxDMAComplete interrupt, free used TFDs. */
-	if (status & IPG_IS_TX_DMA_COMPLETE)
-		ipg_nic_txfree(dev);
-
-	/* TxComplete interrupts indicate one of numerous actions.
-	 * Determine what action to take based on TXSTATUS register.
-	 */
-	if (status & IPG_IS_TX_COMPLETE)
-		ipg_nic_txcleanup(dev);
-
-	/* If UpdateStats interrupt, update Linux Ethernet statistics */
-	if (status & IPG_IS_UPDATE_STATS)
-		ipg_nic_get_stats(dev);
-
-	/* If HostError interrupt, reset IPG. */
-	if (status & IPG_IS_HOST_ERROR) {
-		IPG_DDEBUG_MSG("HostError Interrupt\n");
-
-		schedule_delayed_work(&sp->task, 0);
-	}
-
-	/* If LinkEvent interrupt, resolve autonegotiation. */
-	if (status & IPG_IS_LINK_EVENT) {
-		if (ipg_config_autoneg(dev) < 0)
-			netdev_info(dev, "Auto-negotiation error\n");
-	}
-
-	/* If MACCtrlFrame interrupt, do nothing. */
-	if (status & IPG_IS_MAC_CTRL_FRAME)
-		IPG_DEBUG_MSG("MACCtrlFrame interrupt\n");
-
-	/* If RxComplete interrupt, do nothing. */
-	if (status & IPG_IS_RX_COMPLETE)
-		IPG_DEBUG_MSG("RxComplete interrupt\n");
-
-	/* If RxEarly interrupt, do nothing. */
-	if (status & IPG_IS_RX_EARLY)
-		IPG_DEBUG_MSG("RxEarly interrupt\n");
-
-out_enable:
-	/* Re-enable IPG interrupts. */
-	ipg_w16(IPG_IE_TX_DMA_COMPLETE | IPG_IE_RX_DMA_COMPLETE |
-		IPG_IE_HOST_ERROR | IPG_IE_INT_REQUESTED | IPG_IE_TX_COMPLETE |
-		IPG_IE_LINK_EVENT | IPG_IE_UPDATE_STATS, INT_ENABLE);
-out_unlock:
-	spin_unlock(&sp->lock);
-
-	return IRQ_RETVAL(handled);
-}
-
-static void ipg_rx_clear(struct ipg_nic_private *sp)
-{
-	unsigned int i;
-
-	for (i = 0; i < IPG_RFDLIST_LENGTH; i++) {
-		if (sp->rx_buff[i]) {
-			struct ipg_rx *rxfd = sp->rxd + i;
-
-			dev_kfree_skb_irq(sp->rx_buff[i]);
-			sp->rx_buff[i] = NULL;
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(rxfd->frag_info) & ~IPG_RFI_FRAGLEN,
-				sp->rx_buf_sz, PCI_DMA_FROMDEVICE);
-		}
-	}
-}
-
-static void ipg_tx_clear(struct ipg_nic_private *sp)
-{
-	unsigned int i;
-
-	for (i = 0; i < IPG_TFDLIST_LENGTH; i++) {
-		if (sp->tx_buff[i]) {
-			struct ipg_tx *txfd = sp->txd + i;
-
-			pci_unmap_single(sp->pdev,
-				le64_to_cpu(txfd->frag_info) & ~IPG_TFI_FRAGLEN,
-				sp->tx_buff[i]->len, PCI_DMA_TODEVICE);
-
-			dev_kfree_skb_irq(sp->tx_buff[i]);
-
-			sp->tx_buff[i] = NULL;
-		}
-	}
-}
-
-static int ipg_nic_open(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	struct pci_dev *pdev = sp->pdev;
-	int rc;
-
-	IPG_DEBUG_MSG("_nic_open\n");
-
-	sp->rx_buf_sz = sp->rxsupport_size;
-
-	/* Check for interrupt line conflicts, and request interrupt
-	 * line for IPG.
-	 *
-	 * IMPORTANT: Disable IPG interrupts prior to registering
-	 *            IRQ.
-	 */
-	ipg_w16(0x0000, INT_ENABLE);
-
-	/* Register the interrupt line to be used by the IPG within
-	 * the Linux system.
-	 */
-	rc = request_irq(pdev->irq, ipg_interrupt_handler, IRQF_SHARED,
-			 dev->name, dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error when requesting interrupt\n");
-		goto out;
-	}
-
-	dev->irq = pdev->irq;
-
-	rc = -ENOMEM;
-
-	sp->rxd = dma_alloc_coherent(&pdev->dev, IPG_RX_RING_BYTES,
-				     &sp->rxd_map, GFP_KERNEL);
-	if (!sp->rxd)
-		goto err_free_irq_0;
-
-	sp->txd = dma_alloc_coherent(&pdev->dev, IPG_TX_RING_BYTES,
-				     &sp->txd_map, GFP_KERNEL);
-	if (!sp->txd)
-		goto err_free_rx_1;
-
-	rc = init_rfdlist(dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error during configuration\n");
-		goto err_free_tx_2;
-	}
-
-	init_tfdlist(dev);
-
-	rc = ipg_io_config(dev);
-	if (rc < 0) {
-		netdev_info(dev, "Error during configuration\n");
-		goto err_release_tfdlist_3;
-	}
-
-	/* Resolve autonegotiation. */
-	if (ipg_config_autoneg(dev) < 0)
-		netdev_info(dev, "Auto-negotiation error\n");
-
-	/* initialize JUMBO Frame control variable */
-	sp->jumbo.found_start = 0;
-	sp->jumbo.current_size = 0;
-	sp->jumbo.skb = NULL;
-
-	/* Enable transmit and receive operation of the IPG. */
-	ipg_w32((ipg_r32(MAC_CTRL) | IPG_MC_RX_ENABLE | IPG_MC_TX_ENABLE) &
-		 IPG_MC_RSVD_MASK, MAC_CTRL);
-
-	netif_start_queue(dev);
-out:
-	return rc;
-
-err_release_tfdlist_3:
-	ipg_tx_clear(sp);
-	ipg_rx_clear(sp);
-err_free_tx_2:
-	dma_free_coherent(&pdev->dev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-err_free_rx_1:
-	dma_free_coherent(&pdev->dev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
-err_free_irq_0:
-	free_irq(pdev->irq, dev);
-	goto out;
-}
-
-static int ipg_nic_stop(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	struct pci_dev *pdev = sp->pdev;
-
-	IPG_DEBUG_MSG("_nic_stop\n");
-
-	netif_stop_queue(dev);
-
-	IPG_DUMPTFDLIST(dev);
-
-	do {
-		(void) ipg_r16(INT_STATUS_ACK);
-
-		ipg_reset(dev, IPG_AC_GLOBAL_RESET | IPG_AC_HOST | IPG_AC_DMA);
-
-		synchronize_irq(pdev->irq);
-	} while (ipg_r16(INT_ENABLE) & IPG_IE_RSVD_MASK);
-
-	ipg_rx_clear(sp);
-
-	ipg_tx_clear(sp);
-
-	pci_free_consistent(pdev, IPG_RX_RING_BYTES, sp->rxd, sp->rxd_map);
-	pci_free_consistent(pdev, IPG_TX_RING_BYTES, sp->txd, sp->txd_map);
-
-	free_irq(pdev->irq, dev);
-
-	return 0;
-}
-
-static netdev_tx_t ipg_nic_hard_start_xmit(struct sk_buff *skb,
-					   struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int entry = sp->tx_current % IPG_TFDLIST_LENGTH;
-	unsigned long flags;
-	struct ipg_tx *txfd;
-
-	IPG_DDEBUG_MSG("_nic_hard_start_xmit\n");
-
-	/* If in 10Mbps mode, stop the transmit queue so
-	 * no more transmit frames are accepted.
-	 */
-	if (sp->tenmbpsmode)
-		netif_stop_queue(dev);
-
-	if (sp->reset_current_tfd) {
-		sp->reset_current_tfd = 0;
-		entry = 0;
-	}
-
-	txfd = sp->txd + entry;
-
-	sp->tx_buff[entry] = skb;
-
-	/* Clear all TFC fields, except TFDDONE. */
-	txfd->tfc = cpu_to_le64(IPG_TFC_TFDDONE);
-
-	/* Specify the TFC field within the TFD. */
-	txfd->tfc |= cpu_to_le64(IPG_TFC_WORDALIGNDISABLED |
-		(IPG_TFC_FRAMEID & sp->tx_current) |
-		(IPG_TFC_FRAGCOUNT & (1 << 24)));
-	/*
-	 * 16--17 (WordAlign) <- 3 (disable),
-	 * 0--15 (FrameId) <- sp->tx_current,
-	 * 24--27 (FragCount) <- 1
-	 */
-
-	/* Request TxComplete interrupts at an interval defined
-	 * by the constant IPG_FRAMESBETWEENTXCOMPLETES.
-	 * Request TxComplete interrupt for every frame
-	 * if in 10Mbps mode to accommodate problem with 10Mbps
-	 * processing.
-	 */
-	if (sp->tenmbpsmode)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_TXINDICATE);
-	txfd->tfc |= cpu_to_le64(IPG_TFC_TXDMAINDICATE);
-	/* Based on compilation option, determine if FCS is to be
-	 * appended to transmit frame by IPG.
-	 */
-	if (!(IPG_APPEND_FCS_ON_TX))
-		txfd->tfc |= cpu_to_le64(IPG_TFC_FCSAPPENDDISABLE);
-
-	/* Based on compilation option, determine if IP, TCP and/or
-	 * UDP checksums are to be added to transmit frame by IPG.
-	 */
-	if (IPG_ADD_IPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_IPCHECKSUMENABLE);
-
-	if (IPG_ADD_TCPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_TCPCHECKSUMENABLE);
-
-	if (IPG_ADD_UDPCHECKSUM_ON_TX)
-		txfd->tfc |= cpu_to_le64(IPG_TFC_UDPCHECKSUMENABLE);
-
-	/* Based on compilation option, determine if VLAN tag info is to be
-	 * inserted into transmit frame by IPG.
-	 */
-	if (IPG_INSERT_MANUAL_VLAN_TAG) {
-		txfd->tfc |= cpu_to_le64(IPG_TFC_VLANTAGINSERT |
-			((u64) IPG_MANUAL_VLAN_VID << 32) |
-			((u64) IPG_MANUAL_VLAN_CFI << 44) |
-			((u64) IPG_MANUAL_VLAN_USERPRIORITY << 45));
-	}
-
-	/* The fragment start location within system memory is defined
-	 * by the sk_buff structure's data field. The physical address
-	 * of this location within the system's virtual memory space
-	 * is determined using the IPG_HOST2BUS_MAP function.
-	 */
-	txfd->frag_info = cpu_to_le64(pci_map_single(sp->pdev, skb->data,
-		skb->len, PCI_DMA_TODEVICE));
-
-	/* The length of the fragment within system memory is defined by
-	 * the sk_buff structure's len field.
-	 */
-	txfd->frag_info |= cpu_to_le64(IPG_TFI_FRAGLEN &
-		((u64) (skb->len & 0xffff) << 48));
-
-	/* Clear the TFDDone bit last to indicate the TFD is ready
-	 * for transfer to the IPG.
-	 */
-	txfd->tfc &= cpu_to_le64(~IPG_TFC_TFDDONE);
-
-	spin_lock_irqsave(&sp->lock, flags);
-
-	sp->tx_current++;
-
-	mmiowb();
-
-	ipg_w32(IPG_DC_TX_DMA_POLL_NOW, DMA_CTRL);
-
-	if (sp->tx_current == (sp->tx_dirty + IPG_TFDLIST_LENGTH))
-		netif_stop_queue(dev);
-
-	spin_unlock_irqrestore(&sp->lock, flags);
-
-	return NETDEV_TX_OK;
-}
-
-static void ipg_set_phy_default_param(unsigned char rev,
-				      struct net_device *dev, int phy_address)
-{
-	unsigned short length;
-	unsigned char revision;
-	const unsigned short *phy_param;
-	unsigned short address, value;
-
-	phy_param = &DefaultPhyParam[0];
-	length = *phy_param & 0x00FF;
-	revision = (unsigned char)((*phy_param) >> 8);
-	phy_param++;
-	while (length != 0) {
-		if (rev == revision) {
-			while (length > 1) {
-				address = *phy_param;
-				value = *(phy_param + 1);
-				phy_param += 2;
-				mdio_write(dev, phy_address, address, value);
-				length -= 4;
-			}
-			break;
-		} else {
-			phy_param += length / 2;
-			length = *phy_param & 0x00FF;
-			revision = (unsigned char)((*phy_param) >> 8);
-			phy_param++;
-		}
-	}
-}
-
-static int read_eeprom(struct net_device *dev, int eep_addr)
-{
-	void __iomem *ioaddr = ipg_ioaddr(dev);
-	unsigned int i;
-	int ret = 0;
-	u16 value;
-
-	value = IPG_EC_EEPROM_READOPCODE | (eep_addr & 0xff);
-	ipg_w16(value, EEPROM_CTRL);
-
-	for (i = 0; i < 1000; i++) {
-		u16 data;
-
-		mdelay(10);
-		data = ipg_r16(EEPROM_CTRL);
-		if (!(data & IPG_EC_EEPROM_BUSY)) {
-			ret = ipg_r16(EEPROM_DATA);
-			break;
-		}
-	}
-	return ret;
-}
-
-static void ipg_init_mii(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	struct mii_if_info *mii_if = &sp->mii_if;
-	int phyaddr;
-
-	mii_if->dev          = dev;
-	mii_if->mdio_read    = mdio_read;
-	mii_if->mdio_write   = mdio_write;
-	mii_if->phy_id_mask  = 0x1f;
-	mii_if->reg_num_mask = 0x1f;
-
-	mii_if->phy_id = phyaddr = ipg_find_phyaddr(dev);
-
-	if (phyaddr != 0x1f) {
-		u16 mii_phyctrl, mii_1000cr;
-
-		mii_1000cr  = mdio_read(dev, phyaddr, MII_CTRL1000);
-		mii_1000cr |= ADVERTISE_1000FULL | ADVERTISE_1000HALF |
-			GMII_PHY_1000BASETCONTROL_PreferMaster;
-		mdio_write(dev, phyaddr, MII_CTRL1000, mii_1000cr);
-
-		mii_phyctrl = mdio_read(dev, phyaddr, MII_BMCR);
-
-		/* Set default phyparam */
-		ipg_set_phy_default_param(sp->pdev->revision, dev, phyaddr);
-
-		/* Reset PHY */
-		mii_phyctrl |= BMCR_RESET | BMCR_ANRESTART;
-		mdio_write(dev, phyaddr, MII_BMCR, mii_phyctrl);
-
-	}
-}
-
-static int ipg_hw_init(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	void __iomem *ioaddr = sp->ioaddr;
-	unsigned int i;
-	int rc;
-
-	/* Read/Write and Reset EEPROM Value */
-	/* Read LED Mode Configuration from EEPROM */
-	sp->led_mode = read_eeprom(dev, 6);
-
-	/* Reset all functions within the IPG. Do not assert
-	 * RST_OUT as not compatible with some PHYs.
-	 */
-	rc = ipg_reset(dev, IPG_RESET_MASK);
-	if (rc < 0)
-		goto out;
-
-	ipg_init_mii(dev);
-
-	/* Read MAC Address from EEPROM */
-	for (i = 0; i < 3; i++)
-		sp->station_addr[i] = read_eeprom(dev, 16 + i);
-
-	for (i = 0; i < 3; i++)
-		ipg_w16(sp->station_addr[i], STATION_ADDRESS_0 + 2*i);
-
-	/* Set station address in ethernet_device structure. */
-	dev->dev_addr[0] =  ipg_r16(STATION_ADDRESS_0) & 0x00ff;
-	dev->dev_addr[1] = (ipg_r16(STATION_ADDRESS_0) & 0xff00) >> 8;
-	dev->dev_addr[2] =  ipg_r16(STATION_ADDRESS_1) & 0x00ff;
-	dev->dev_addr[3] = (ipg_r16(STATION_ADDRESS_1) & 0xff00) >> 8;
-	dev->dev_addr[4] =  ipg_r16(STATION_ADDRESS_2) & 0x00ff;
-	dev->dev_addr[5] = (ipg_r16(STATION_ADDRESS_2) & 0xff00) >> 8;
-out:
-	return rc;
-}
-
-static int ipg_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = generic_mii_ioctl(&sp->mii_if, if_mii(ifr), cmd, NULL);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_nic_change_mtu(struct net_device *dev, int new_mtu)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int err;
-
-	/* Function to accommodate changes to Maximum Transfer Unit
-	 * (or MTU) of IPG NIC. Cannot use default function since
-	 * the default will not allow for MTU > 1500 bytes.
-	 */
-
-	IPG_DEBUG_MSG("_nic_change_mtu\n");
-
-	/*
-	 * Check that the new MTU value is between 68 (14 byte header, 46 byte
-	 * payload, 4 byte FCS) and 10 KB, which is the largest supported MTU.
-	 */
-	if (new_mtu < 68 || new_mtu > 10240)
-		return -EINVAL;
-
-	err = ipg_nic_stop(dev);
-	if (err)
-		return err;
-
-	dev->mtu = new_mtu;
-
-	sp->max_rxframe_size = new_mtu;
-
-	sp->rxfrag_size = new_mtu;
-	if (sp->rxfrag_size > 4088)
-		sp->rxfrag_size = 4088;
-
-	sp->rxsupport_size = sp->max_rxframe_size;
-
-	if (new_mtu > 0x0600)
-		sp->is_jumbo = true;
-	else
-		sp->is_jumbo = false;
-
-	return ipg_nic_open(dev);
-}
-
-static int ipg_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_ethtool_gset(&sp->mii_if, cmd);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_ethtool_sset(&sp->mii_if, cmd);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static int ipg_nway_reset(struct net_device *dev)
-{
-	struct ipg_nic_private *sp = netdev_priv(dev);
-	int rc;
-
-	mutex_lock(&sp->mii_mutex);
-	rc = mii_nway_restart(&sp->mii_if);
-	mutex_unlock(&sp->mii_mutex);
-
-	return rc;
-}
-
-static const struct ethtool_ops ipg_ethtool_ops = {
-	.get_settings = ipg_get_settings,
-	.set_settings = ipg_set_settings,
-	.nway_reset   = ipg_nway_reset,
-};
-
-static void ipg_remove(struct pci_dev *pdev)
-{
-	struct net_device *dev = pci_get_drvdata(pdev);
-	struct ipg_nic_private *sp = netdev_priv(dev);
-
-	IPG_DEBUG_MSG("_remove\n");
-
-	/* Un-register Ethernet device. */
-	unregister_netdev(dev);
-
-	pci_iounmap(pdev, sp->ioaddr);
-
-	pci_release_regions(pdev);
-
-	free_netdev(dev);
-	pci_disable_device(pdev);
-}
-
-static const struct net_device_ops ipg_netdev_ops = {
-	.ndo_open		= ipg_nic_open,
-	.ndo_stop		= ipg_nic_stop,
-	.ndo_start_xmit		= ipg_nic_hard_start_xmit,
-	.ndo_get_stats		= ipg_nic_get_stats,
-	.ndo_set_rx_mode	= ipg_nic_set_multicast_list,
-	.ndo_do_ioctl		= ipg_ioctl,
-	.ndo_tx_timeout 	= ipg_tx_timeout,
-	.ndo_change_mtu 	= ipg_nic_change_mtu,
-	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	unsigned int i = id->driver_data;
-	struct ipg_nic_private *sp;
-	struct net_device *dev;
-	void __iomem *ioaddr;
-	int rc;
-
-	rc = pci_enable_device(pdev);
-	if (rc < 0)
-		goto out;
-
-	pr_info("%s: %s\n", pci_name(pdev), ipg_brand_name[i]);
-
-	pci_set_master(pdev);
-
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
-	if (rc < 0) {
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (rc < 0) {
-			pr_err("%s: DMA config failed\n", pci_name(pdev));
-			goto err_disable_0;
-		}
-	}
-
-	/*
-	 * Initialize net device.
-	 */
-	dev = alloc_etherdev(sizeof(struct ipg_nic_private));
-	if (!dev) {
-		rc = -ENOMEM;
-		goto err_disable_0;
-	}
-
-	sp = netdev_priv(dev);
-	spin_lock_init(&sp->lock);
-	mutex_init(&sp->mii_mutex);
-
-	sp->is_jumbo = IPG_IS_JUMBO;
-	sp->rxfrag_size = IPG_RXFRAG_SIZE;
-	sp->rxsupport_size = IPG_RXSUPPORT_SIZE;
-	sp->max_rxframe_size = IPG_MAX_RXFRAME_SIZE;
-
-	/* Declare IPG NIC functions for Ethernet device methods.
-	 */
-	dev->netdev_ops = &ipg_netdev_ops;
-	SET_NETDEV_DEV(dev, &pdev->dev);
-	dev->ethtool_ops = &ipg_ethtool_ops;
-
-	rc = pci_request_regions(pdev, DRV_NAME);
-	if (rc)
-		goto err_free_dev_1;
-
-	ioaddr = pci_iomap(pdev, 1, pci_resource_len(pdev, 1));
-	if (!ioaddr) {
-		pr_err("%s: cannot map MMIO\n", pci_name(pdev));
-		rc = -EIO;
-		goto err_release_regions_2;
-	}
-
-	/* Save the pointer to the PCI device information. */
-	sp->ioaddr = ioaddr;
-	sp->pdev = pdev;
-	sp->dev = dev;
-
-	INIT_DELAYED_WORK(&sp->task, ipg_reset_after_host_error);
-
-	pci_set_drvdata(pdev, dev);
-
-	rc = ipg_hw_init(dev);
-	if (rc < 0)
-		goto err_unmap_3;
-
-	rc = register_netdev(dev);
-	if (rc < 0)
-		goto err_unmap_3;
-
-	netdev_info(dev, "Ethernet device registered\n");
-out:
-	return rc;
-
-err_unmap_3:
-	pci_iounmap(pdev, ioaddr);
-err_release_regions_2:
-	pci_release_regions(pdev);
-err_free_dev_1:
-	free_netdev(dev);
-err_disable_0:
-	pci_disable_device(pdev);
-	goto out;
-}
-
-static struct pci_driver ipg_pci_driver = {
-	.name		= IPG_DRIVER_NAME,
-	.id_table	= ipg_pci_tbl,
-	.probe		= ipg_probe,
-	.remove		= ipg_remove,
-};
-
-module_pci_driver(ipg_pci_driver);
diff --git a/drivers/net/ethernet/icplus/ipg.h b/drivers/net/ethernet/icplus/ipg.h
deleted file mode 100644
index de60628..0000000
--- a/drivers/net/ethernet/icplus/ipg.h
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Include file for Gigabit Ethernet device driver for Network
- * Interface Cards (NICs) utilizing the Tamarack Microelectronics
- * Inc. IPG Gigabit or Triple Speed Ethernet Media Access
- * Controller.
- */
-#ifndef __LINUX_IPG_H
-#define __LINUX_IPG_H
-
-#include <linux/module.h>
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <asm/bitops.h>
-
-/*
- *	Constants
- */
-
-/* GMII based PHY IDs */
-#define		NS				0x2000
-#define		MARVELL				0x0141
-#define		ICPLUS_PHY			0x243
-
-/* NIC Physical Layer Device MII register fields. */
-#define         MII_PHY_SELECTOR_IEEE8023       0x0001
-#define         MII_PHY_TECHABILITYFIELD        0x1FE0
-
-/* GMII_PHY_1000 need to set to prefer master */
-#define         GMII_PHY_1000BASETCONTROL_PreferMaster 0x0400
-
-/* NIC Physical Layer Device GMII constants. */
-#define         GMII_PREAMBLE                    0xFFFFFFFF
-#define         GMII_ST                          0x1
-#define         GMII_READ                        0x2
-#define         GMII_WRITE                       0x1
-#define         GMII_TA_READ_MASK                0x1
-#define         GMII_TA_WRITE                    0x2
-
-/* I/O register offsets. */
-enum ipg_regs {
-	DMA_CTRL		= 0x00,
-	RX_DMA_STATUS		= 0x08, /* Unused + reserved */
-	TFD_LIST_PTR_0		= 0x10,
-	TFD_LIST_PTR_1		= 0x14,
-	TX_DMA_BURST_THRESH	= 0x18,
-	TX_DMA_URGENT_THRESH	= 0x19,
-	TX_DMA_POLL_PERIOD	= 0x1a,
-	RFD_LIST_PTR_0		= 0x1c,
-	RFD_LIST_PTR_1		= 0x20,
-	RX_DMA_BURST_THRESH	= 0x24,
-	RX_DMA_URGENT_THRESH	= 0x25,
-	RX_DMA_POLL_PERIOD	= 0x26,
-	DEBUG_CTRL		= 0x2c,
-	ASIC_CTRL		= 0x30,
-	FIFO_CTRL		= 0x38, /* Unused */
-	FLOW_OFF_THRESH		= 0x3c,
-	FLOW_ON_THRESH		= 0x3e,
-	EEPROM_DATA		= 0x48,
-	EEPROM_CTRL		= 0x4a,
-	EXPROM_ADDR		= 0x4c, /* Unused */
-	EXPROM_DATA		= 0x50, /* Unused */
-	WAKE_EVENT		= 0x51, /* Unused */
-	COUNTDOWN		= 0x54, /* Unused */
-	INT_STATUS_ACK		= 0x5a,
-	INT_ENABLE		= 0x5c,
-	INT_STATUS		= 0x5e, /* Unused */
-	TX_STATUS		= 0x60,
-	MAC_CTRL		= 0x6c,
-	VLAN_TAG		= 0x70, /* Unused */
-	PHY_SET			= 0x75,
-	PHY_CTRL		= 0x76,
-	STATION_ADDRESS_0	= 0x78,
-	STATION_ADDRESS_1	= 0x7a,
-	STATION_ADDRESS_2	= 0x7c,
-	MAX_FRAME_SIZE		= 0x86,
-	RECEIVE_MODE		= 0x88,
-	HASHTABLE_0		= 0x8c,
-	HASHTABLE_1		= 0x90,
-	RMON_STATISTICS_MASK	= 0x98,
-	STATISTICS_MASK		= 0x9c,
-	RX_JUMBO_FRAMES		= 0xbc, /* Unused */
-	TCP_CHECKSUM_ERRORS	= 0xc0, /* Unused */
-	IP_CHECKSUM_ERRORS	= 0xc2, /* Unused */
-	UDP_CHECKSUM_ERRORS	= 0xc4, /* Unused */
-	TX_JUMBO_FRAMES		= 0xf4  /* Unused */
-};
-
-/* Ethernet MIB statistic register offsets. */
-#define	IPG_OCTETRCVOK			0xA8
-#define	IPG_MCSTOCTETRCVDOK		0xAC
-#define	IPG_BCSTOCTETRCVOK		0xB0
-#define	IPG_FRAMESRCVDOK		0xB4
-#define	IPG_MCSTFRAMESRCVDOK		0xB8
-#define	IPG_BCSTFRAMESRCVDOK		0xBE
-#define	IPG_MACCONTROLFRAMESRCVD	0xC6
-#define	IPG_FRAMETOOLONGERRORS		0xC8
-#define	IPG_INRANGELENGTHERRORS		0xCA
-#define	IPG_FRAMECHECKSEQERRORS		0xCC
-#define	IPG_FRAMESLOSTRXERRORS		0xCE
-#define	IPG_OCTETXMTOK			0xD0
-#define	IPG_MCSTOCTETXMTOK		0xD4
-#define	IPG_BCSTOCTETXMTOK		0xD8
-#define	IPG_FRAMESXMTDOK		0xDC
-#define	IPG_MCSTFRAMESXMTDOK		0xE0
-#define	IPG_FRAMESWDEFERREDXMT		0xE4
-#define	IPG_LATECOLLISIONS		0xE8
-#define	IPG_MULTICOLFRAMES		0xEC
-#define	IPG_SINGLECOLFRAMES		0xF0
-#define	IPG_BCSTFRAMESXMTDOK		0xF6
-#define	IPG_CARRIERSENSEERRORS		0xF8
-#define	IPG_MACCONTROLFRAMESXMTDOK	0xFA
-#define	IPG_FRAMESABORTXSCOLLS		0xFC
-#define	IPG_FRAMESWEXDEFERRAL		0xFE
-
-/* RMON statistic register offsets. */
-#define	IPG_ETHERSTATSCOLLISIONS			0x100
-#define	IPG_ETHERSTATSOCTETSTRANSMIT			0x104
-#define	IPG_ETHERSTATSPKTSTRANSMIT			0x108
-#define	IPG_ETHERSTATSPKTS64OCTESTSTRANSMIT		0x10C
-#define	IPG_ETHERSTATSPKTS65TO127OCTESTSTRANSMIT	0x110
-#define	IPG_ETHERSTATSPKTS128TO255OCTESTSTRANSMIT	0x114
-#define	IPG_ETHERSTATSPKTS256TO511OCTESTSTRANSMIT	0x118
-#define	IPG_ETHERSTATSPKTS512TO1023OCTESTSTRANSMIT	0x11C
-#define	IPG_ETHERSTATSPKTS1024TO1518OCTESTSTRANSMIT	0x120
-#define	IPG_ETHERSTATSCRCALIGNERRORS			0x124
-#define	IPG_ETHERSTATSUNDERSIZEPKTS			0x128
-#define	IPG_ETHERSTATSFRAGMENTS				0x12C
-#define	IPG_ETHERSTATSJABBERS				0x130
-#define	IPG_ETHERSTATSOCTETS				0x134
-#define	IPG_ETHERSTATSPKTS				0x138
-#define	IPG_ETHERSTATSPKTS64OCTESTS			0x13C
-#define	IPG_ETHERSTATSPKTS65TO127OCTESTS		0x140
-#define	IPG_ETHERSTATSPKTS128TO255OCTESTS		0x144
-#define	IPG_ETHERSTATSPKTS256TO511OCTESTS		0x148
-#define	IPG_ETHERSTATSPKTS512TO1023OCTESTS		0x14C
-#define	IPG_ETHERSTATSPKTS1024TO1518OCTESTS		0x150
-
-/* RMON statistic register equivalents. */
-#define	IPG_ETHERSTATSMULTICASTPKTSTRANSMIT		0xE0
-#define	IPG_ETHERSTATSBROADCASTPKTSTRANSMIT		0xF6
-#define	IPG_ETHERSTATSMULTICASTPKTS			0xB8
-#define	IPG_ETHERSTATSBROADCASTPKTS			0xBE
-#define	IPG_ETHERSTATSOVERSIZEPKTS			0xC8
-#define	IPG_ETHERSTATSDROPEVENTS			0xCE
-
-/* Serial EEPROM offsets */
-#define	IPG_EEPROM_CONFIGPARAM		0x00
-#define	IPG_EEPROM_ASICCTRL		0x01
-#define	IPG_EEPROM_SUBSYSTEMVENDORID	0x02
-#define	IPG_EEPROM_SUBSYSTEMID		0x03
-#define	IPG_EEPROM_STATIONADDRESS0	0x10
-#define	IPG_EEPROM_STATIONADDRESS1	0x11
-#define	IPG_EEPROM_STATIONADDRESS2	0x12
-
-/* Register & data structure bit masks */
-
-/* PCI register masks. */
-
-/* IOBaseAddress */
-#define         IPG_PIB_RSVD_MASK		0xFFFFFE01
-#define         IPG_PIB_IOBASEADDRESS		0xFFFFFF00
-#define         IPG_PIB_IOBASEADDRIND		0x00000001
-
-/* MemBaseAddress */
-#define         IPG_PMB_RSVD_MASK		0xFFFFFE07
-#define         IPG_PMB_MEMBASEADDRIND		0x00000001
-#define         IPG_PMB_MEMMAPTYPE		0x00000006
-#define         IPG_PMB_MEMMAPTYPE0		0x00000002
-#define         IPG_PMB_MEMMAPTYPE1		0x00000004
-#define         IPG_PMB_MEMBASEADDRESS		0xFFFFFE00
-
-/* ConfigStatus */
-#define IPG_CS_RSVD_MASK                0xFFB0
-#define IPG_CS_CAPABILITIES             0x0010
-#define IPG_CS_66MHZCAPABLE             0x0020
-#define IPG_CS_FASTBACK2BACK            0x0080
-#define IPG_CS_DATAPARITYREPORTED       0x0100
-#define IPG_CS_DEVSELTIMING             0x0600
-#define IPG_CS_SIGNALEDTARGETABORT      0x0800
-#define IPG_CS_RECEIVEDTARGETABORT      0x1000
-#define IPG_CS_RECEIVEDMASTERABORT      0x2000
-#define IPG_CS_SIGNALEDSYSTEMERROR      0x4000
-#define IPG_CS_DETECTEDPARITYERROR      0x8000
-
-/* TFD data structure masks. */
-
-/* TFDList, TFC */
-#define	IPG_TFC_RSVD_MASK			0x0000FFFF9FFFFFFFULL
-#define	IPG_TFC_FRAMEID				0x000000000000FFFFULL
-#define	IPG_TFC_WORDALIGN			0x0000000000030000ULL
-#define	IPG_TFC_WORDALIGNTODWORD		0x0000000000000000ULL
-#define	IPG_TFC_WORDALIGNTOWORD			0x0000000000020000ULL
-#define	IPG_TFC_WORDALIGNDISABLED		0x0000000000030000ULL
-#define	IPG_TFC_TCPCHECKSUMENABLE		0x0000000000040000ULL
-#define	IPG_TFC_UDPCHECKSUMENABLE		0x0000000000080000ULL
-#define	IPG_TFC_IPCHECKSUMENABLE		0x0000000000100000ULL
-#define	IPG_TFC_FCSAPPENDDISABLE		0x0000000000200000ULL
-#define	IPG_TFC_TXINDICATE			0x0000000000400000ULL
-#define	IPG_TFC_TXDMAINDICATE			0x0000000000800000ULL
-#define	IPG_TFC_FRAGCOUNT			0x000000000F000000ULL
-#define	IPG_TFC_VLANTAGINSERT			0x0000000010000000ULL
-#define	IPG_TFC_TFDDONE				0x0000000080000000ULL
-#define	IPG_TFC_VID				0x00000FFF00000000ULL
-#define	IPG_TFC_CFI				0x0000100000000000ULL
-#define	IPG_TFC_USERPRIORITY			0x0000E00000000000ULL
-
-/* TFDList, FragInfo */
-#define	IPG_TFI_RSVD_MASK			0xFFFF00FFFFFFFFFFULL
-#define	IPG_TFI_FRAGADDR			0x000000FFFFFFFFFFULL
-#define	IPG_TFI_FRAGLEN				0xFFFF000000000000ULL
-
-/* RFD data structure masks. */
-
-/* RFDList, RFS */
-#define	IPG_RFS_RSVD_MASK			0x0000FFFFFFFFFFFFULL
-#define	IPG_RFS_RXFRAMELEN			0x000000000000FFFFULL
-#define	IPG_RFS_RXFIFOOVERRUN			0x0000000000010000ULL
-#define	IPG_RFS_RXRUNTFRAME			0x0000000000020000ULL
-#define	IPG_RFS_RXALIGNMENTERROR		0x0000000000040000ULL
-#define	IPG_RFS_RXFCSERROR			0x0000000000080000ULL
-#define	IPG_RFS_RXOVERSIZEDFRAME		0x0000000000100000ULL
-#define	IPG_RFS_RXLENGTHERROR			0x0000000000200000ULL
-#define	IPG_RFS_VLANDETECTED			0x0000000000400000ULL
-#define	IPG_RFS_TCPDETECTED			0x0000000000800000ULL
-#define	IPG_RFS_TCPERROR			0x0000000001000000ULL
-#define	IPG_RFS_UDPDETECTED			0x0000000002000000ULL
-#define	IPG_RFS_UDPERROR			0x0000000004000000ULL
-#define	IPG_RFS_IPDETECTED			0x0000000008000000ULL
-#define	IPG_RFS_IPERROR				0x0000000010000000ULL
-#define	IPG_RFS_FRAMESTART			0x0000000020000000ULL
-#define	IPG_RFS_FRAMEEND			0x0000000040000000ULL
-#define	IPG_RFS_RFDDONE				0x0000000080000000ULL
-#define	IPG_RFS_TCI				0x0000FFFF00000000ULL
-
-/* RFDList, FragInfo */
-#define	IPG_RFI_RSVD_MASK			0xFFFF00FFFFFFFFFFULL
-#define	IPG_RFI_FRAGADDR			0x000000FFFFFFFFFFULL
-#define	IPG_RFI_FRAGLEN				0xFFFF000000000000ULL
-
-/* I/O Register masks. */
-
-/* RMON Statistics Mask */
-#define	IPG_RZ_ALL					0x0FFFFFFF
-
-/* Statistics Mask */
-#define	IPG_SM_ALL					0x0FFFFFFF
-#define	IPG_SM_OCTETRCVOK_FRAMESRCVDOK			0x00000001
-#define	IPG_SM_MCSTOCTETRCVDOK_MCSTFRAMESRCVDOK		0x00000002
-#define	IPG_SM_BCSTOCTETRCVDOK_BCSTFRAMESRCVDOK		0x00000004
-#define	IPG_SM_RXJUMBOFRAMES				0x00000008
-#define	IPG_SM_TCPCHECKSUMERRORS			0x00000010
-#define	IPG_SM_IPCHECKSUMERRORS				0x00000020
-#define	IPG_SM_UDPCHECKSUMERRORS			0x00000040
-#define	IPG_SM_MACCONTROLFRAMESRCVD			0x00000080
-#define	IPG_SM_FRAMESTOOLONGERRORS			0x00000100
-#define	IPG_SM_INRANGELENGTHERRORS			0x00000200
-#define	IPG_SM_FRAMECHECKSEQERRORS			0x00000400
-#define	IPG_SM_FRAMESLOSTRXERRORS			0x00000800
-#define	IPG_SM_OCTETXMTOK_FRAMESXMTOK			0x00001000
-#define	IPG_SM_MCSTOCTETXMTOK_MCSTFRAMESXMTDOK		0x00002000
-#define	IPG_SM_BCSTOCTETXMTOK_BCSTFRAMESXMTDOK		0x00004000
-#define	IPG_SM_FRAMESWDEFERREDXMT			0x00008000
-#define	IPG_SM_LATECOLLISIONS				0x00010000
-#define	IPG_SM_MULTICOLFRAMES				0x00020000
-#define	IPG_SM_SINGLECOLFRAMES				0x00040000
-#define	IPG_SM_TXJUMBOFRAMES				0x00080000
-#define	IPG_SM_CARRIERSENSEERRORS			0x00100000
-#define	IPG_SM_MACCONTROLFRAMESXMTD			0x00200000
-#define	IPG_SM_FRAMESABORTXSCOLLS			0x00400000
-#define	IPG_SM_FRAMESWEXDEFERAL				0x00800000
-
-/* Countdown */
-#define	IPG_CD_RSVD_MASK		0x0700FFFF
-#define	IPG_CD_COUNT			0x0000FFFF
-#define	IPG_CD_COUNTDOWNSPEED		0x01000000
-#define	IPG_CD_COUNTDOWNMODE		0x02000000
-#define	IPG_CD_COUNTINTENABLED		0x04000000
-
-/* TxDMABurstThresh */
-#define IPG_TB_RSVD_MASK                0xFF
-
-/* TxDMAUrgentThresh */
-#define IPG_TU_RSVD_MASK                0xFF
-
-/* TxDMAPollPeriod */
-#define IPG_TP_RSVD_MASK                0xFF
-
-/* RxDMAUrgentThresh */
-#define IPG_RU_RSVD_MASK                0xFF
-
-/* RxDMAPollPeriod */
-#define IPG_RP_RSVD_MASK                0xFF
-
-/* ReceiveMode */
-#define IPG_RM_RSVD_MASK                0x3F
-#define IPG_RM_RECEIVEUNICAST           0x01
-#define IPG_RM_RECEIVEMULTICAST         0x02
-#define IPG_RM_RECEIVEBROADCAST         0x04
-#define IPG_RM_RECEIVEALLFRAMES         0x08
-#define IPG_RM_RECEIVEMULTICASTHASH     0x10
-#define IPG_RM_RECEIVEIPMULTICAST       0x20
-
-/* PhySet */
-#define IPG_PS_MEM_LENB9B               0x01
-#define IPG_PS_MEM_LEN9                 0x02
-#define IPG_PS_NON_COMPDET              0x04
-
-/* PhyCtrl */
-#define IPG_PC_RSVD_MASK                0xFF
-#define IPG_PC_MGMTCLK_LO               0x00
-#define IPG_PC_MGMTCLK_HI               0x01
-#define IPG_PC_MGMTCLK                  0x01
-#define IPG_PC_MGMTDATA                 0x02
-#define IPG_PC_MGMTDIR                  0x04
-#define IPG_PC_DUPLEX_POLARITY          0x08
-#define IPG_PC_DUPLEX_STATUS            0x10
-#define IPG_PC_LINK_POLARITY            0x20
-#define IPG_PC_LINK_SPEED               0xC0
-#define IPG_PC_LINK_SPEED_10MBPS        0x40
-#define IPG_PC_LINK_SPEED_100MBPS       0x80
-#define IPG_PC_LINK_SPEED_1000MBPS      0xC0
-
-/* DMACtrl */
-#define IPG_DC_RSVD_MASK                0xC07D9818
-#define IPG_DC_RX_DMA_COMPLETE          0x00000008
-#define IPG_DC_RX_DMA_POLL_NOW          0x00000010
-#define IPG_DC_TX_DMA_COMPLETE          0x00000800
-#define IPG_DC_TX_DMA_POLL_NOW          0x00001000
-#define IPG_DC_TX_DMA_IN_PROG           0x00008000
-#define IPG_DC_RX_EARLY_DISABLE         0x00010000
-#define IPG_DC_MWI_DISABLE              0x00040000
-#define IPG_DC_TX_WRITE_BACK_DISABLE    0x00080000
-#define IPG_DC_TX_BURST_LIMIT           0x00700000
-#define IPG_DC_TARGET_ABORT             0x40000000
-#define IPG_DC_MASTER_ABORT             0x80000000
-
-/* ASICCtrl */
-#define IPG_AC_RSVD_MASK                0x07FFEFF2
-#define IPG_AC_EXP_ROM_SIZE             0x00000002
-#define IPG_AC_PHY_SPEED10              0x00000010
-#define IPG_AC_PHY_SPEED100             0x00000020
-#define IPG_AC_PHY_SPEED1000            0x00000040
-#define IPG_AC_PHY_MEDIA                0x00000080
-#define IPG_AC_FORCED_CFG               0x00000700
-#define IPG_AC_D3RESETDISABLE           0x00000800
-#define IPG_AC_SPEED_UP_MODE            0x00002000
-#define IPG_AC_LED_MODE                 0x00004000
-#define IPG_AC_RST_OUT_POLARITY         0x00008000
-#define IPG_AC_GLOBAL_RESET             0x00010000
-#define IPG_AC_RX_RESET                 0x00020000
-#define IPG_AC_TX_RESET                 0x00040000
-#define IPG_AC_DMA                      0x00080000
-#define IPG_AC_FIFO                     0x00100000
-#define IPG_AC_NETWORK                  0x00200000
-#define IPG_AC_HOST                     0x00400000
-#define IPG_AC_AUTO_INIT                0x00800000
-#define IPG_AC_RST_OUT                  0x01000000
-#define IPG_AC_INT_REQUEST              0x02000000
-#define IPG_AC_RESET_BUSY               0x04000000
-#define IPG_AC_LED_SPEED                0x08000000
-#define IPG_AC_LED_MODE_BIT_1           0x20000000
-
-/* EepromCtrl */
-#define IPG_EC_RSVD_MASK                0x83FF
-#define IPG_EC_EEPROM_ADDR              0x00FF
-#define IPG_EC_EEPROM_OPCODE            0x0300
-#define IPG_EC_EEPROM_SUBCOMMAD         0x0000
-#define IPG_EC_EEPROM_WRITEOPCODE       0x0100
-#define IPG_EC_EEPROM_READOPCODE        0x0200
-#define IPG_EC_EEPROM_ERASEOPCODE       0x0300
-#define IPG_EC_EEPROM_BUSY              0x8000
-
-/* FIFOCtrl */
-#define IPG_FC_RSVD_MASK                0xC001
-#define IPG_FC_RAM_TEST_MODE            0x0001
-#define IPG_FC_TRANSMITTING             0x4000
-#define IPG_FC_RECEIVING                0x8000
-
-/* TxStatus */
-#define IPG_TS_RSVD_MASK                0xFFFF00DD
-#define IPG_TS_TX_ERROR                 0x00000001
-#define IPG_TS_LATE_COLLISION           0x00000004
-#define IPG_TS_TX_MAX_COLL              0x00000008
-#define IPG_TS_TX_UNDERRUN              0x00000010
-#define IPG_TS_TX_IND_REQD              0x00000040
-#define IPG_TS_TX_COMPLETE              0x00000080
-#define IPG_TS_TX_FRAMEID               0xFFFF0000
-
-/* WakeEvent */
-#define IPG_WE_WAKE_PKT_ENABLE          0x01
-#define IPG_WE_MAGIC_PKT_ENABLE         0x02
-#define IPG_WE_LINK_EVT_ENABLE          0x04
-#define IPG_WE_WAKE_POLARITY            0x08
-#define IPG_WE_WAKE_PKT_EVT             0x10
-#define IPG_WE_MAGIC_PKT_EVT            0x20
-#define IPG_WE_LINK_EVT                 0x40
-#define IPG_WE_WOL_ENABLE               0x80
-
-/* IntEnable */
-#define IPG_IE_RSVD_MASK                0x1FFE
-#define IPG_IE_HOST_ERROR               0x0002
-#define IPG_IE_TX_COMPLETE              0x0004
-#define IPG_IE_MAC_CTRL_FRAME           0x0008
-#define IPG_IE_RX_COMPLETE              0x0010
-#define IPG_IE_RX_EARLY                 0x0020
-#define IPG_IE_INT_REQUESTED            0x0040
-#define IPG_IE_UPDATE_STATS             0x0080
-#define IPG_IE_LINK_EVENT               0x0100
-#define IPG_IE_TX_DMA_COMPLETE          0x0200
-#define IPG_IE_RX_DMA_COMPLETE          0x0400
-#define IPG_IE_RFD_LIST_END             0x0800
-#define IPG_IE_RX_DMA_PRIORITY          0x1000
-
-/* IntStatus */
-#define IPG_IS_RSVD_MASK                0x1FFF
-#define IPG_IS_INTERRUPT_STATUS         0x0001
-#define IPG_IS_HOST_ERROR               0x0002
-#define IPG_IS_TX_COMPLETE              0x0004
-#define IPG_IS_MAC_CTRL_FRAME           0x0008
-#define IPG_IS_RX_COMPLETE              0x0010
-#define IPG_IS_RX_EARLY                 0x0020
-#define IPG_IS_INT_REQUESTED            0x0040
-#define IPG_IS_UPDATE_STATS             0x0080
-#define IPG_IS_LINK_EVENT               0x0100
-#define IPG_IS_TX_DMA_COMPLETE          0x0200
-#define IPG_IS_RX_DMA_COMPLETE          0x0400
-#define IPG_IS_RFD_LIST_END             0x0800
-#define IPG_IS_RX_DMA_PRIORITY          0x1000
-
-/* MACCtrl */
-#define IPG_MC_RSVD_MASK                0x7FE33FA3
-#define IPG_MC_IFS_SELECT               0x00000003
-#define IPG_MC_IFS_4352BIT              0x00000003
-#define IPG_MC_IFS_1792BIT              0x00000002
-#define IPG_MC_IFS_1024BIT              0x00000001
-#define IPG_MC_IFS_96BIT                0x00000000
-#define IPG_MC_DUPLEX_SELECT            0x00000020
-#define IPG_MC_DUPLEX_SELECT_FD         0x00000020
-#define IPG_MC_DUPLEX_SELECT_HD         0x00000000
-#define IPG_MC_TX_FLOW_CONTROL_ENABLE   0x00000080
-#define IPG_MC_RX_FLOW_CONTROL_ENABLE   0x00000100
-#define IPG_MC_RCV_FCS                  0x00000200
-#define IPG_MC_FIFO_LOOPBACK            0x00000400
-#define IPG_MC_MAC_LOOPBACK             0x00000800
-#define IPG_MC_AUTO_VLAN_TAGGING        0x00001000
-#define IPG_MC_AUTO_VLAN_UNTAGGING      0x00002000
-#define IPG_MC_COLLISION_DETECT         0x00010000
-#define IPG_MC_CARRIER_SENSE            0x00020000
-#define IPG_MC_STATISTICS_ENABLE        0x00200000
-#define IPG_MC_STATISTICS_DISABLE       0x00400000
-#define IPG_MC_STATISTICS_ENABLED       0x00800000
-#define IPG_MC_TX_ENABLE                0x01000000
-#define IPG_MC_TX_DISABLE               0x02000000
-#define IPG_MC_TX_ENABLED               0x04000000
-#define IPG_MC_RX_ENABLE                0x08000000
-#define IPG_MC_RX_DISABLE               0x10000000
-#define IPG_MC_RX_ENABLED               0x20000000
-#define IPG_MC_PAUSED                   0x40000000
-
-/*
- *	Tune
- */
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS append on TX. */
-#define         IPG_APPEND_FCS_ON_TX         1
-
-/* Assign IPG_APPEND_FCS_ON_TX > 0 for auto FCS strip on RX. */
-#define         IPG_STRIP_FCS_ON_RX          1
-
-/* Assign IPG_DROP_ON_RX_ETH_ERRORS > 0 to drop RX frames with
- * Ethernet errors.
- */
-#define         IPG_DROP_ON_RX_ETH_ERRORS    1
-
-/* Assign IPG_INSERT_MANUAL_VLAN_TAG > 0 to insert VLAN tags manually
- * (via TFC).
- */
-#define		IPG_INSERT_MANUAL_VLAN_TAG   0
-
-/* Assign IPG_ADD_IPCHECKSUM_ON_TX > 0 for auto IP checksum on TX. */
-#define         IPG_ADD_IPCHECKSUM_ON_TX     0
-
-/* Assign IPG_ADD_TCPCHECKSUM_ON_TX > 0 for auto TCP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define         IPG_ADD_TCPCHECKSUM_ON_TX    0
-
-/* Assign IPG_ADD_UDPCHECKSUM_ON_TX > 0 for auto UDP checksum on TX.
- * DO NOT USE FOR SILICON REVISIONS B3 AND EARLIER.
- */
-#define         IPG_ADD_UDPCHECKSUM_ON_TX    0
-
-/* If inserting VLAN tags manually, assign the IPG_MANUAL_VLAN_xx
- * constants as desired.
- */
-#define		IPG_MANUAL_VLAN_VID		0xABC
-#define		IPG_MANUAL_VLAN_CFI		0x1
-#define		IPG_MANUAL_VLAN_USERPRIORITY 0x5
-
-#define         IPG_IO_REG_RANGE		0xFF
-#define         IPG_MEM_REG_RANGE		0x154
-#define         IPG_DRIVER_NAME		"Sundance Technology IPG Triple-Speed Ethernet"
-#define         IPG_NIC_PHY_ADDRESS          0x01
-#define		IPG_DMALIST_ALIGN_PAD	0x07
-#define		IPG_MULTICAST_HASHTABLE_SIZE	0x40
-
-/* Number of milliseconds to wait after issuing a software reset.
- * 0x05 <= IPG_AC_RESETWAIT to account for proper 10Mbps operation.
- */
-#define         IPG_AC_RESETWAIT             0x05
-
-/* Number of IPG_AC_RESETWAIT timeperiods before declaring timeout. */
-#define         IPG_AC_RESET_TIMEOUT         0x0A
-
-/* Minimum number of nanoseconds used to toggle MDC clock during
- * MII/GMII register access.
- */
-#define		IPG_PC_PHYCTRLWAIT_NS		200
-
-#define		IPG_TFDLIST_LENGTH		0x100
-
-/* Number of frames between TxDMAComplete interrupt.
- * 0 < IPG_FRAMESBETWEENTXDMACOMPLETES <= IPG_TFDLIST_LENGTH
- */
-#define		IPG_FRAMESBETWEENTXDMACOMPLETES 0x1
-
-#define		IPG_RFDLIST_LENGTH		0x100
-
-/* Maximum number of RFDs to process per interrupt.
- * 1 < IPG_MAXRFDPROCESS_COUNT < IPG_RFDLIST_LENGTH
- */
-#define		IPG_MAXRFDPROCESS_COUNT	0x80
-
-/* Minimum margin between last freed RFD, and current RFD.
- * 1 < IPG_MINUSEDRFDSTOFREE < IPG_RFDLIST_LENGTH
- */
-#define		IPG_MINUSEDRFDSTOFREE	0x80
-
-/* specify the jumbo frame maximum size
- * per unit is 0x600 (the rx_buffer size that one RFD can carry)
- */
-#define     MAX_JUMBOSIZE	        0x8	/* max is 12K */
-
-/* Key register values loaded at driver start up. */
-
-/* TXDMAPollPeriod is specified in 320ns increments.
- *
- * Value	Time
- * ---------------------
- * 0x00-0x01	320ns
- * 0x03		~1us
- * 0x1F		~10us
- * 0xFF		~82us
- */
-#define		IPG_TXDMAPOLLPERIOD_VALUE	0x26
-
-/* TxDMAUrgentThresh specifies the minimum amount of
- * data in the transmit FIFO before asserting an
- * urgent transmit DMA request.
- *
- * Value	Min TxFIFO occupied space before urgent TX request
- * ---------------------------------------------------------------
- * 0x00-0x04	128 bytes (1024 bits)
- * 0x27		1248 bytes (~10000 bits)
- * 0x30		1536 bytes (12288 bits)
- * 0xFF		8192 bytes (65535 bits)
- */
-#define		IPG_TXDMAURGENTTHRESH_VALUE	0x04
-
-/* TxDMABurstThresh specifies the minimum amount of
- * free space in the transmit FIFO before asserting an
- * transmit DMA request.
- *
- * Value	Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08	256 bytes
- * 0x30		1536 bytes
- * 0xFF		8192 bytes
- */
-#define		IPG_TXDMABURSTTHRESH_VALUE	0x30
-
-/* RXDMAPollPeriod is specified in 320ns increments.
- *
- * Value	Time
- * ---------------------
- * 0x00-0x01	320ns
- * 0x03		~1us
- * 0x1F		~10us
- * 0xFF		~82us
- */
-#define		IPG_RXDMAPOLLPERIOD_VALUE	0x01
-
-/* RxDMAUrgentThresh specifies the minimum amount of
- * free space within the receive FIFO before asserting
- * a urgent receive DMA request.
- *
- * Value	Min RxFIFO free space before urgent RX request
- * ---------------------------------------------------------------
- * 0x00-0x04	128 bytes (1024 bits)
- * 0x27		1248 bytes (~10000 bits)
- * 0x30		1536 bytes (12288 bits)
- * 0xFF		8192 bytes (65535 bits)
- */
-#define		IPG_RXDMAURGENTTHRESH_VALUE	0x30
-
-/* RxDMABurstThresh specifies the minimum amount of
- * occupied space within the receive FIFO before asserting
- * a receive DMA request.
- *
- * Value	Min TxFIFO free space before TX request
- * ----------------------------------------------------
- * 0x00-0x08	256 bytes
- * 0x30		1536 bytes
- * 0xFF		8192 bytes
- */
-#define		IPG_RXDMABURSTTHRESH_VALUE	0x30
-
-/* FlowOnThresh specifies the maximum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * maximum pause time transmitted.
- *
- * Value	Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000	0 bytes
- * 0x0740	29,696 bytes
- * 0x07FF	32,752 bytes
- */
-#define		IPG_FLOWONTHRESH_VALUE	0x0740
-
-/* FlowOffThresh specifies the minimum amount of occupied
- * space in the receive FIFO before a PAUSE frame with
- * zero pause time is transmitted.
- *
- * Value	Max RxFIFO occupied space before PAUSE
- * ---------------------------------------------------
- * 0x0000	0 bytes
- * 0x00BF	3056 bytes
- * 0x07FF	32,752 bytes
- */
-#define		IPG_FLOWOFFTHRESH_VALUE	0x00BF
-
-/*
- * Miscellaneous macros.
- */
-
-/* Macros for printing debug statements. */
-#ifdef IPG_DEBUG
-#  define IPG_DEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DDEBUG_MSG(fmt, args...)			\
-	printk(KERN_DEBUG "IPG: " fmt, ##args)
-#  define IPG_DUMPRFDLIST(args) ipg_dump_rfdlist(args)
-#  define IPG_DUMPTFDLIST(args) ipg_dump_tfdlist(args)
-#else
-#  define IPG_DEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DDEBUG_MSG(fmt, args...)			\
-do {							\
-	if (0)						\
-		printk(KERN_DEBUG "IPG: " fmt, ##args);	\
-} while (0)
-#  define IPG_DUMPRFDLIST(args)
-#  define IPG_DUMPTFDLIST(args)
-#endif
-
-/*
- * End miscellaneous macros.
- */
-
-/* Transmit Frame Descriptor. The IPG supports 15 fragments,
- * however Linux requires only a single fragment. Note, each
- * TFD field is 64 bits wide.
- */
-struct ipg_tx {
-	__le64 next_desc;
-	__le64 tfc;
-	__le64 frag_info;
-};
-
-/* Receive Frame Descriptor. Note, each RFD field is 64 bits wide.
- */
-struct ipg_rx {
-	__le64 next_desc;
-	__le64 rfs;
-	__le64 frag_info;
-};
-
-struct ipg_jumbo {
-	int found_start;
-	int current_size;
-	struct sk_buff *skb;
-};
-
-/* Structure of IPG NIC specific data. */
-struct ipg_nic_private {
-	void __iomem *ioaddr;
-	struct ipg_tx *txd;
-	struct ipg_rx *rxd;
-	dma_addr_t txd_map;
-	dma_addr_t rxd_map;
-	struct sk_buff *tx_buff[IPG_TFDLIST_LENGTH];
-	struct sk_buff *rx_buff[IPG_RFDLIST_LENGTH];
-	unsigned int tx_current;
-	unsigned int tx_dirty;
-	unsigned int rx_current;
-	unsigned int rx_dirty;
-	bool is_jumbo;
-	struct ipg_jumbo jumbo;
-	unsigned long rxfrag_size;
-	unsigned long rxsupport_size;
-	unsigned long max_rxframe_size;
-	unsigned int rx_buf_sz;
-	struct pci_dev *pdev;
-	struct net_device *dev;
-	struct net_device_stats stats;
-	spinlock_t lock;
-	int tenmbpsmode;
-
-	u16 led_mode;
-	u16 station_addr[3];	/* Station Address in EEPROM Reg 0x10..0x12 */
-
-	struct mutex		mii_mutex;
-	struct mii_if_info	mii_if;
-	int reset_current_tfd;
-#ifdef IPG_DEBUG
-	int RFDlistendCount;
-	int RFDListCheckedCount;
-	int EmptyRFDListCount;
-#endif
-	struct delayed_work task;
-};
-
-#endif				/* __LINUX_IPG_H */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 639263d..7781e80 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -627,8 +627,10 @@
 
 		/* verify the skb head is not shared */
 		err = skb_cow_head(skb, 0);
-		if (err)
+		if (err) {
+			dev_kfree_skb(skb);
 			return NETDEV_TX_OK;
+		}
 
 		/* locate vlan header */
 		vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e84c7f2..ed622fa 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -36,7 +36,7 @@
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
-#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(1)
+#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -62,6 +62,7 @@
 #define MVNETA_WIN_SIZE(w)                      (0x2204 + ((w) << 3))
 #define MVNETA_WIN_REMAP(w)                     (0x2280 + ((w) << 2))
 #define MVNETA_BASE_ADDR_ENABLE                 0x2290
+#define MVNETA_ACCESS_PROTECT_ENABLE            0x2294
 #define MVNETA_PORT_CONFIG                      0x2400
 #define      MVNETA_UNI_PROMISC_MODE            BIT(0)
 #define      MVNETA_DEF_RXQ(q)                  ((q) << 1)
@@ -159,7 +160,7 @@
 
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000  // note: neta says it's 0x000000FF
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0x000000ff
 
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
@@ -242,6 +243,7 @@
 #define MVNETA_VLAN_TAG_LEN             4
 
 #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+#define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
 #define MVNETA_ACC_MODE_EXT		1
 
@@ -1579,12 +1581,16 @@
 		}
 
 		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
-		if (!skb)
-			goto err_drop_frame;
 
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
 		dma_unmap_single(dev->dev.parent, phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
 
+		if (!skb)
+			goto err_drop_frame;
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 
@@ -3191,6 +3197,7 @@
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
 }
 
 /* Power up the port */
@@ -3250,6 +3257,7 @@
 	char hw_mac_addr[ETH_ALEN];
 	const char *mac_from;
 	const char *managed;
+	int tx_csum_limit;
 	int phy_mode;
 	int err;
 	int cpu;
@@ -3350,8 +3358,21 @@
 		}
 	}
 
-	if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
-		pp->tx_csum_limit = 1600;
+	if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+		if (tx_csum_limit < 0 ||
+		    tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+			tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+			dev_info(&pdev->dev,
+				 "Wrong TX csum limit in DT, set to %dB\n",
+				 MVNETA_TX_CSUM_DEF_SIZE);
+		}
+	} else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+		tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+	} else {
+		tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+	}
+
+	pp->tx_csum_limit = tx_csum_limit;
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 85f1b1e..31c491e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -892,9 +892,10 @@
 		dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 		dev->caps.phys_port_id[i] = func_cap.phys_port_id;
-		if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
-						    &dev->caps.gid_table_len[i],
-						    &dev->caps.pkey_table_len[i]))
+		err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
+						      &dev->caps.gid_table_len[i],
+						      &dev->caps.pkey_table_len[i]);
+		if (err)
 			goto err_mem;
 	}
 
@@ -906,6 +907,7 @@
 			 dev->caps.uar_page_size * dev->caps.num_uars,
 			 (unsigned long long)
 			 pci_resource_len(dev->persist->pdev, 2));
+		err = -ENOMEM;
 		goto err_mem;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 9813d34..6fec3e9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4952,26 +4952,41 @@
 	struct res_counter *counter;
 	struct res_counter *tmp;
 	int err;
-	int index;
+	int *counters_arr = NULL;
+	int i, j;
 
 	err = move_all_busy(dev, slave, RES_COUNTER);
 	if (err)
 		mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
 			  slave);
 
-	spin_lock_irq(mlx4_tlock(dev));
-	list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
-		if (counter->com.owner == slave) {
-			index = counter->com.res_id;
-			rb_erase(&counter->com.node,
-				 &tracker->res_tree[RES_COUNTER]);
-			list_del(&counter->com.list);
-			kfree(counter);
-			__mlx4_counter_free(dev, index);
+	counters_arr = kmalloc_array(dev->caps.max_counters,
+				     sizeof(*counters_arr), GFP_KERNEL);
+	if (!counters_arr)
+		return;
+
+	do {
+		i = 0;
+		j = 0;
+		spin_lock_irq(mlx4_tlock(dev));
+		list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+			if (counter->com.owner == slave) {
+				counters_arr[i++] = counter->com.res_id;
+				rb_erase(&counter->com.node,
+					 &tracker->res_tree[RES_COUNTER]);
+				list_del(&counter->com.list);
+				kfree(counter);
+			}
+		}
+		spin_unlock_irq(mlx4_tlock(dev));
+
+		while (j < i) {
+			__mlx4_counter_free(dev, counters_arr[j++]);
 			mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
 		}
-	}
-	spin_unlock_irq(mlx4_tlock(dev));
+	} while (i);
+
+	kfree(counters_arr);
 }
 
 static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index f2ae62d..22e72bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -334,9 +334,15 @@
 
 #define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb)
 
+enum mlx5e_dma_map_type {
+	MLX5E_DMA_MAP_SINGLE,
+	MLX5E_DMA_MAP_PAGE
+};
+
 struct mlx5e_sq_dma {
-	dma_addr_t addr;
-	u32        size;
+	dma_addr_t              addr;
+	u32                     size;
+	enum mlx5e_dma_map_type type;
 };
 
 enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5fc4d2d..1e52db3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@
 	return err;
 }
 
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+						  u32 tirn)
+{
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_NUM_TT; i++) {
+		err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+							     priv->tirn[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1376,6 +1412,13 @@
 		goto err_clear_state_opened_flag;
 	}
 
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	if (err) {
+		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+			   __func__, err);
+		goto err_close_channels;
+	}
+
 	mlx5e_update_carrier(priv);
 	mlx5e_redirect_rqts(priv);
 
@@ -1383,6 +1426,8 @@
 
 	return 0;
 
+err_close_channels:
+	mlx5e_close_channels(priv);
 err_clear_state_opened_flag:
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 	return err;
@@ -1856,6 +1901,8 @@
 
 	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
+	max_mtu = MLX5E_HW2SW_MTU(max_mtu);
+
 	if (new_mtu > max_mtu) {
 		netdev_err(netdev,
 			   "%s: Bad MTU (%d) > (%d) Max\n",
@@ -1909,6 +1956,9 @@
 			       "Not creating net device, some required device capabilities are missing\n");
 		return -ENOTSUPP;
 	}
+	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index cd8f85a..1341b1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -61,41 +61,49 @@
 	}
 }
 
-static void mlx5e_dma_pop_last_pushed(struct mlx5e_sq *sq, dma_addr_t *addr,
-				      u32 *size)
+static inline void mlx5e_tx_dma_unmap(struct device *pdev,
+				      struct mlx5e_sq_dma *dma)
 {
-	sq->dma_fifo_pc--;
-	*addr = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr;
-	*size = sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size;
+	switch (dma->type) {
+	case MLX5E_DMA_MAP_SINGLE:
+		dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	case MLX5E_DMA_MAP_PAGE:
+		dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+		break;
+	default:
+		WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+	}
+}
+
+static inline void mlx5e_dma_push(struct mlx5e_sq *sq,
+				  dma_addr_t addr,
+				  u32 size,
+				  enum mlx5e_dma_map_type map_type)
+{
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
+	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
+	sq->dma_fifo_pc++;
+}
+
+static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
+{
+	return &sq->dma_fifo[i & sq->dma_fifo_mask];
 }
 
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb)
 {
-	dma_addr_t addr;
-	u32 size;
 	int i;
 
 	for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) {
-		mlx5e_dma_pop_last_pushed(sq, &addr, &size);
-		dma_unmap_single(sq->pdev, addr, size, DMA_TO_DEVICE);
+		struct mlx5e_sq_dma *last_pushed_dma =
+			mlx5e_dma_get(sq, --sq->dma_fifo_pc);
+
+		mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
 	}
 }
 
-static inline void mlx5e_dma_push(struct mlx5e_sq *sq, dma_addr_t addr,
-				  u32 size)
-{
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
-	sq->dma_fifo_pc++;
-}
-
-static inline void mlx5e_dma_get(struct mlx5e_sq *sq, u32 i, dma_addr_t *addr,
-				 u32 *size)
-{
-	*addr = sq->dma_fifo[i & sq->dma_fifo_mask].addr;
-	*size = sq->dma_fifo[i & sq->dma_fifo_mask].size;
-}
-
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -118,8 +126,15 @@
 	 */
 #define MLX5E_MIN_INLINE ETH_HLEN
 
-	if (bf && (skb_headlen(skb) <= sq->max_inline))
-		return skb_headlen(skb);
+	if (bf) {
+		u16 ihs = skb_headlen(skb);
+
+		if (skb_vlan_tag_present(skb))
+			ihs += VLAN_HLEN;
+
+		if (ihs <= sq->max_inline)
+			return skb_headlen(skb);
+	}
 
 	return MLX5E_MIN_INLINE;
 }
@@ -218,7 +233,7 @@
 		dseg->lkey       = sq->mkey_be;
 		dseg->byte_count = cpu_to_be32(headlen);
 
-		mlx5e_dma_push(sq, dma_addr, headlen);
+		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
 		MLX5E_TX_SKB_CB(skb)->num_dma++;
 
 		dseg++;
@@ -237,7 +252,7 @@
 		dseg->lkey       = sq->mkey_be;
 		dseg->byte_count = cpu_to_be32(fsz);
 
-		mlx5e_dma_push(sq, dma_addr, fsz);
+		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
 		MLX5E_TX_SKB_CB(skb)->num_dma++;
 
 		dseg++;
@@ -353,13 +368,10 @@
 			}
 
 			for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) {
-				dma_addr_t addr;
-				u32 size;
+				struct mlx5e_sq_dma *dma =
+					mlx5e_dma_get(sq, dma_fifo_cc++);
 
-				mlx5e_dma_get(sq, dma_fifo_cc, &addr, &size);
-				dma_fifo_cc++;
-				dma_unmap_single(sq->pdev, addr, size,
-						 DMA_TO_DEVICE);
+				mlx5e_tx_dma_unmap(sq->pdev, dma);
 			}
 
 			npkts++;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b159ef8..0576651 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1326,7 +1326,7 @@
 	/* Get platform resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
-	if ((!res) || (irq < 0) || (irq >= NR_IRQS)) {
+	if (!res || irq < 0) {
 		dev_err(&pdev->dev, "error getting resources.\n");
 		ret = -ENXIO;
 		goto err_exit;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b4f2123..79ef799 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7429,15 +7429,15 @@
 
 			rtl8169_rx_vlan_tag(desc, skb);
 
+			if (skb->pkt_type == PACKET_MULTICAST)
+				dev->stats.multicast++;
+
 			napi_gro_receive(&tp->napi, skb);
 
 			u64_stats_update_begin(&tp->rx_stats.syncp);
 			tp->rx_stats.packets++;
 			tp->rx_stats.bytes += pkt_size;
 			u64_stats_update_end(&tp->rx_stats.syncp);
-
-			if (skb->pkt_type == PACKET_MULTICAST)
-				dev->stats.multicast++;
 		}
 release_descriptor:
 		desc->opts2 = 0;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index aa7b208..ed5da4d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -408,8 +408,6 @@
 	/* Interrupt enable: */
 	/* Frame receive */
 	ravb_write(ndev, RIC0_FRE0 | RIC0_FRE1, RIC0);
-	/* Receive FIFO full warning */
-	ravb_write(ndev, RIC1_RFWE, RIC1);
 	/* Receive FIFO full error, descriptor empty */
 	ravb_write(ndev, RIC2_QFE0 | RIC2_QFE1 | RIC2_RFFE, RIC2);
 	/* Frame transmitted, timestamp FIFO updated */
@@ -733,8 +731,10 @@
 			    ((tis  & tic)  & BIT(q))) {
 				if (napi_schedule_prep(&priv->napi[q])) {
 					/* Mask RX and TX interrupts */
-					ravb_write(ndev, ric0 & ~BIT(q), RIC0);
-					ravb_write(ndev, tic  & ~BIT(q), TIC);
+					ric0 &= ~BIT(q);
+					tic &= ~BIT(q);
+					ravb_write(ndev, ric0, RIC0);
+					ravb_write(ndev, tic, TIC);
 					__napi_schedule(&priv->napi[q]);
 				} else {
 					netdev_warn(ndev,
@@ -1225,7 +1225,7 @@
 	/* Device init */
 	error = ravb_dmac_init(ndev);
 	if (error)
-		goto out_free_irq;
+		goto out_free_irq2;
 	ravb_emac_init(ndev);
 
 	/* Initialise PTP Clock driver */
@@ -1243,9 +1243,11 @@
 out_ptp_stop:
 	/* Stop PTP Clock driver */
 	ravb_ptp_stop(ndev);
+out_free_irq2:
+	if (priv->chip_id == RCAR_GEN3)
+		free_irq(priv->emac_irq, ndev);
 out_free_irq:
 	free_irq(ndev->irq, ndev);
-	free_irq(priv->emac_irq, ndev);
 out_napi_off:
 	napi_disable(&priv->napi[RAVB_NC]);
 	napi_disable(&priv->napi[RAVB_BE]);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index d288f1c..a3c42a3 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3422,7 +3422,7 @@
  * with our request for slot reset the mmio_enabled callback will never be
  * called, and the link_reset callback is not used by AER or EEH mechanisms.
  */
-static struct pci_error_handlers efx_err_handlers = {
+static const struct pci_error_handlers efx_err_handlers = {
 	.error_detected = efx_io_error_detected,
 	.slot_reset	= efx_io_slot_reset,
 	.resume		= efx_io_resume,
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index c860c90..219a99b 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -809,22 +809,17 @@
 
 static int smsc911x_phy_reset(struct smsc911x_data *pdata)
 {
-	struct phy_device *phy_dev = pdata->phy_dev;
 	unsigned int temp;
 	unsigned int i = 100000;
 
-	BUG_ON(!phy_dev);
-	BUG_ON(!phy_dev->bus);
-
-	SMSC_TRACE(pdata, hw, "Performing PHY BCR Reset");
-	smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, BMCR_RESET);
+	temp = smsc911x_reg_read(pdata, PMT_CTRL);
+	smsc911x_reg_write(pdata, PMT_CTRL, temp | PMT_CTRL_PHY_RST_);
 	do {
 		msleep(1);
-		temp = smsc911x_mii_read(phy_dev->bus, phy_dev->addr,
-			MII_BMCR);
-	} while ((i--) && (temp & BMCR_RESET));
+		temp = smsc911x_reg_read(pdata, PMT_CTRL);
+	} while ((i--) && (temp & PMT_CTRL_PHY_RST_));
 
-	if (temp & BMCR_RESET) {
+	if (unlikely(temp & PMT_CTRL_PHY_RST_)) {
 		SMSC_WARN(pdata, hw, "PHY reset failed to complete");
 		return -EIO;
 	}
@@ -2296,7 +2291,7 @@
 	}
 
 	/* Reset the LAN911x */
-	if (smsc911x_soft_reset(pdata))
+	if (smsc911x_phy_reset(pdata) || smsc911x_soft_reset(pdata))
 		return -ENODEV;
 
 	dev->flags |= IFF_MULTICAST;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 9d89bdb..82de68b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -337,11 +337,11 @@
 			     QSGMII_PHY_RX_SIGNAL_DETECT_EN |
 			     QSGMII_PHY_TX_DRIVER_EN |
 			     QSGMII_PHY_QSGMII_EN |
-			     0x4 << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
-			     0x3 << QSGMII_PHY_RX_DC_BIAS_OFFSET |
-			     0x1 << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
-			     0x2 << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
-			     0xC << QSGMII_PHY_TX_DRV_AMP_OFFSET);
+			     0x4ul << QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET |
+			     0x3ul << QSGMII_PHY_RX_DC_BIAS_OFFSET |
+			     0x1ul << QSGMII_PHY_RX_INPUT_EQU_OFFSET |
+			     0x2ul << QSGMII_PHY_CDR_PI_SLEW_OFFSET |
+			     0xCul << QSGMII_PHY_TX_DRV_AMP_OFFSET);
 	}
 
 	plat_dat->has_gmac = true;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 7f6f4a4..58c05ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -299,16 +299,17 @@
 	if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
 		const char *rs;
 
+		dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+
 		err = of_property_read_string(np, "st,tx-retime-src", &rs);
 		if (err < 0) {
 			dev_warn(dev, "Use internal clock source\n");
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
-		} else if (!strcasecmp(rs, "clk_125")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-		} else if (!strcasecmp(rs, "txclk")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+		} else {
+			if (!strcasecmp(rs, "clk_125"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
+			else if (!strcasecmp(rs, "txclk"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
 		}
-
 		dwmac->speed = SPEED_1000;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 64d8aa4..3c6549a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -185,7 +185,7 @@
 			priv->clk_csr = STMMAC_CSR_100_150M;
 		else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M))
 			priv->clk_csr = STMMAC_CSR_150_250M;
-		else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
+		else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
 			priv->clk_csr = STMMAC_CSR_250_300M;
 	}
 }
@@ -2232,6 +2232,12 @@
 
 			frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
 
+			/*  check if frame_len fits the preallocated memory */
+			if (frame_len > priv->dma_buf_sz) {
+				priv->dev->stats.rx_length_errors++;
+				break;
+			}
+
 			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
 			 * Type frames (LLC/LLC-SNAP)
 			 */
@@ -3102,6 +3108,7 @@
 	init_dma_desc_rings(ndev, GFP_ATOMIC);
 	stmmac_hw_setup(ndev, false);
 	stmmac_init_tx_coalesce(priv);
+	stmmac_set_rx_mode(ndev);
 
 	napi_enable(&priv->napi);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ebf6abc..bba670c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -138,7 +138,6 @@
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node) {
-		int reset_gpio, active_low;
 
 		if (data->reset_gpio < 0) {
 			struct device_node *np = priv->device->of_node;
@@ -154,24 +153,23 @@
 						"snps,reset-active-low");
 			of_property_read_u32_array(np,
 				"snps,reset-delays-us", data->delays, 3);
+
+			if (gpio_request(data->reset_gpio, "mdio-reset"))
+				return 0;
 		}
 
-		reset_gpio = data->reset_gpio;
-		active_low = data->active_low;
+		gpio_direction_output(data->reset_gpio,
+				      data->active_low ? 1 : 0);
+		if (data->delays[0])
+			msleep(DIV_ROUND_UP(data->delays[0], 1000));
 
-		if (!gpio_request(reset_gpio, "mdio-reset")) {
-			gpio_direction_output(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[0])
-				msleep(DIV_ROUND_UP(data->delays[0], 1000));
+		gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
+		if (data->delays[1])
+			msleep(DIV_ROUND_UP(data->delays[1], 1000));
 
-			gpio_set_value(reset_gpio, active_low ? 0 : 1);
-			if (data->delays[1])
-				msleep(DIV_ROUND_UP(data->delays[1], 1000));
-
-			gpio_set_value(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[2])
-				msleep(DIV_ROUND_UP(data->delays[2], 1000));
-		}
+		gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
+		if (data->delays[2])
+			msleep(DIV_ROUND_UP(data->delays[2], 1000));
 	}
 #endif
 
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index c08be62..1562ab4 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -78,6 +78,9 @@
 
 int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 {
+	if (of_machine_is_compatible("ti,dm8148"))
+		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
+
 	if (of_machine_is_compatible("ti,am33xx"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index ae68afd..f38696c 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -345,13 +345,6 @@
 */
 VELOCITY_PARAM(speed_duplex, "Setting the speed and duplex mode");
 
-#define VAL_PKT_LEN_DEF     0
-/* ValPktLen[] is used for setting the checksum offload ability of NIC.
-   0: Receive frame with invalid layer 2 length (Default)
-   1: Drop frame with invalid layer 2 length
-*/
-VELOCITY_PARAM(ValPktLen, "Receiving or Drop invalid 802.3 frame");
-
 #define WOL_OPT_DEF     0
 #define WOL_OPT_MIN     0
 #define WOL_OPT_MAX     7
@@ -494,7 +487,6 @@
 
 	velocity_set_int_opt(&opts->flow_cntl, flow_control[index], FLOW_CNTL_MIN, FLOW_CNTL_MAX, FLOW_CNTL_DEF, "flow_control", devname);
 	velocity_set_bool_opt(&opts->flags, IP_byte_align[index], IP_ALIG_DEF, VELOCITY_FLAGS_IP_ALIGN, "IP_byte_align", devname);
-	velocity_set_bool_opt(&opts->flags, ValPktLen[index], VAL_PKT_LEN_DEF, VELOCITY_FLAGS_VAL_PKT_LEN, "ValPktLen", devname);
 	velocity_set_int_opt((int *) &opts->spd_dpx, speed_duplex[index], MED_LNK_MIN, MED_LNK_MAX, MED_LNK_DEF, "Media link mode", devname);
 	velocity_set_int_opt(&opts->wol_opts, wol_opts[index], WOL_OPT_MIN, WOL_OPT_MAX, WOL_OPT_DEF, "Wake On Lan options", devname);
 	opts->numrx = (opts->numrx & ~3);
@@ -2055,8 +2047,9 @@
 	int pkt_len = le16_to_cpu(rd->rdesc0.len) & 0x3fff;
 	struct sk_buff *skb;
 
-	if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP)) {
-		VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
+	if (unlikely(rd->rdesc0.RSR & (RSR_STP | RSR_EDP | RSR_RL))) {
+		if (rd->rdesc0.RSR & (RSR_STP | RSR_EDP))
+			VELOCITY_PRT(MSG_LEVEL_VERBOSE, KERN_ERR " %s : the received frame spans multiple RDs.\n", vptr->netdev->name);
 		stats->rx_length_errors++;
 		return -EINVAL;
 	}
@@ -2069,17 +2062,6 @@
 	dma_sync_single_for_cpu(vptr->dev, rd_info->skb_dma,
 				    vptr->rx.buf_sz, DMA_FROM_DEVICE);
 
-	/*
-	 *	Drop frame not meeting IEEE 802.3
-	 */
-
-	if (vptr->flags & VELOCITY_FLAGS_VAL_PKT_LEN) {
-		if (rd->rdesc0.RSR & RSR_RL) {
-			stats->rx_length_errors++;
-			return -EINVAL;
-		}
-	}
-
 	velocity_rx_csum(rd, skb);
 
 	if (velocity_rx_copy(&skb, pkt_len, vptr) < 0) {
diff --git a/drivers/net/fjes/fjes_hw.c b/drivers/net/fjes/fjes_hw.c
index bb8b530..b103adb 100644
--- a/drivers/net/fjes/fjes_hw.c
+++ b/drivers/net/fjes/fjes_hw.c
@@ -599,7 +599,7 @@
 		FJES_CMD_REQ_RES_CODE_BUSY) &&
 	       (timeout > 0)) {
 		msleep(200 + hw->my_epid * 20);
-			timeout -= (200 + hw->my_epid * 20);
+		timeout -= (200 + hw->my_epid * 20);
 
 		res_buf->unshare_buffer.length = 0;
 		res_buf->unshare_buffer.code = 0;
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index d50887e..8c48bb2 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -254,7 +254,7 @@
 	}
 }
 
-static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
 			    bool local)
 {
 	struct ipvl_dev *ipvlan = addr->master;
@@ -262,6 +262,7 @@
 	unsigned int len;
 	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
 	bool success = false;
+	struct sk_buff *skb = *pskb;
 
 	len = skb->len + ETH_HLEN;
 	if (unlikely(!(dev->flags & IFF_UP))) {
@@ -273,6 +274,7 @@
 	if (!skb)
 		goto out;
 
+	*pskb = skb;
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 
@@ -486,7 +488,7 @@
 
 	addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 	if (addr)
-		return ipvlan_rcv_frame(addr, skb, true);
+		return ipvlan_rcv_frame(addr, &skb, true);
 
 out:
 	skb->dev = ipvlan->phy_dev;
@@ -506,7 +508,7 @@
 		if (lyr3h) {
 			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
 			if (addr)
-				return ipvlan_rcv_frame(addr, skb, true);
+				return ipvlan_rcv_frame(addr, &skb, true);
 		}
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (!skb)
@@ -589,7 +591,7 @@
 
 	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 	if (addr)
-		ret = ipvlan_rcv_frame(addr, skb, false);
+		ret = ipvlan_rcv_frame(addr, pskb, false);
 
 out:
 	return ret;
@@ -626,7 +628,7 @@
 
 		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
 		if (addr)
-			ret = ipvlan_rcv_frame(addr, skb, false);
+			ret = ipvlan_rcv_frame(addr, pskb, false);
 	}
 
 	return ret;
@@ -651,5 +653,5 @@
 	WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
 			  port->mode);
 	kfree_skb(skb);
-	return NET_RX_DROP;
+	return RX_HANDLER_CONSUMED;
 }
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 86f6c62..06c8bfe 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -415,6 +415,7 @@
 		skb = ip_check_defrag(dev_net(skb->dev), skb, IP_DEFRAG_MACVLAN);
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
+		*pskb = skb;
 		eth = eth_hdr(skb);
 		macvlan_forward_source(skb, port, eth->h_source);
 		src = macvlan_hash_lookup(port, eth->h_source);
@@ -456,6 +457,7 @@
 		goto out;
 	}
 
+	*pskb = skb;
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 54036ae..0fc5219 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -498,7 +498,7 @@
 	wait_queue_head_t *wqueue;
 
 	if (!sock_writeable(sk) ||
-	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+	    !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
 	wqueue = sk_sleep(sk);
@@ -585,7 +585,7 @@
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(&q->sk) ||
-	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) &&
+	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) &&
 	     sock_writeable(&q->sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index fabf11d..2d020a3 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -308,6 +308,8 @@
 	.flags			= PHY_HAS_INTERRUPT,
 	.config_aneg		= genphy_config_aneg,
 	.read_status		= genphy_read_status,
+	.ack_interrupt		= at803x_ack_interrupt,
+	.config_intr		= at803x_config_intr,
 	.driver			= {
 		.owner = THIS_MODULE,
 	},
@@ -327,6 +329,8 @@
 	.flags			= PHY_HAS_INTERRUPT,
 	.config_aneg		= genphy_config_aneg,
 	.read_status		= genphy_read_status,
+	.ack_interrupt		= at803x_ack_interrupt,
+	.config_intr		= at803x_config_intr,
 	.driver			= {
 		.owner = THIS_MODULE,
 	},
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 07a6119..3ce5d95 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -614,7 +614,7 @@
 	{ PHY_ID_BCM5461, 0xfffffff0 },
 	{ PHY_ID_BCM54616S, 0xfffffff0 },
 	{ PHY_ID_BCM5464, 0xfffffff0 },
-	{ PHY_ID_BCM5482, 0xfffffff0 },
+	{ PHY_ID_BCM5481, 0xfffffff0 },
 	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5de8d58..0240552 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1154,6 +1154,21 @@
 		.driver = { .owner = THIS_MODULE },
 	},
 	{
+		.phy_id = MARVELL_PHY_ID_88E1540,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E1540",
+		.features = PHY_GBIT_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_aneg = &m88e1510_config_aneg,
+		.read_status = &marvell_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.did_interrupt = &m88e1121_did_interrupt,
+		.resume = &genphy_resume,
+		.suspend = &genphy_suspend,
+		.driver = { .owner = THIS_MODULE },
+	},
+	{
 		.phy_id = MARVELL_PHY_ID_88E3016,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E3016",
@@ -1186,6 +1201,7 @@
 	{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ }
 };
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index adb48ab..47cd306d 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -448,7 +448,8 @@
 		mdiobus_write(phydev->bus, mii_data->phy_id,
 			      mii_data->reg_num, val);
 
-		if (mii_data->reg_num == MII_BMCR &&
+		if (mii_data->phy_id == phydev->addr &&
+		    mii_data->reg_num == MII_BMCR &&
 		    val & BMCR_RESET)
 			return phy_init_hw(phydev);
 
@@ -863,6 +864,9 @@
 			needs_aneg = true;
 		break;
 	case PHY_NOLINK:
+		if (phy_interrupt_is_valid(phydev))
+			break;
+
 		err = phy_read_status(phydev);
 		if (err)
 			break;
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 76cad71..dd295db 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -66,6 +66,7 @@
 #define PHY_ID_VSC8244			0x000fc6c0
 #define PHY_ID_VSC8514			0x00070670
 #define PHY_ID_VSC8574			0x000704a0
+#define PHY_ID_VSC8601			0x00070420
 #define PHY_ID_VSC8662			0x00070660
 #define PHY_ID_VSC8221			0x000fc550
 #define PHY_ID_VSC8211			0x000fc4b0
@@ -133,7 +134,8 @@
 			(phydev->drv->phy_id == PHY_ID_VSC8234 ||
 			 phydev->drv->phy_id == PHY_ID_VSC8244 ||
 			 phydev->drv->phy_id == PHY_ID_VSC8514 ||
-			 phydev->drv->phy_id == PHY_ID_VSC8574) ?
+			 phydev->drv->phy_id == PHY_ID_VSC8574 ||
+			 phydev->drv->phy_id == PHY_ID_VSC8601) ?
 				MII_VSC8244_IMASK_MASK :
 				MII_VSC8221_IMASK_MASK);
 	else {
@@ -272,6 +274,18 @@
 	.config_intr    = &vsc82xx_config_intr,
 	.driver         = { .owner = THIS_MODULE,},
 }, {
+	.phy_id         = PHY_ID_VSC8601,
+	.name           = "Vitesse VSC8601",
+	.phy_id_mask    = 0x000ffff0,
+	.features       = PHY_GBIT_FEATURES,
+	.flags          = PHY_HAS_INTERRUPT,
+	.config_init    = &genphy_config_init,
+	.config_aneg    = &genphy_config_aneg,
+	.read_status    = &genphy_read_status,
+	.ack_interrupt  = &vsc824x_ack_interrupt,
+	.config_intr    = &vsc82xx_config_intr,
+	.driver         = { .owner = THIS_MODULE,},
+}, {
 	.phy_id         = PHY_ID_VSC8662,
 	.name           = "Vitesse VSC8662",
 	.phy_id_mask    = 0x000ffff0,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b1878fa..f0db770 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1040,7 +1040,7 @@
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(sk) ||
-	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
 	     sock_writeable(sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
@@ -1488,7 +1488,7 @@
 	if (!sock_writeable(sk))
 		return;
 
-	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+	if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
 	wqueue = sk_sleep(sk);
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index c78d3cb..3da70bf 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -696,6 +696,11 @@
 			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
 	.driver_info = (kernel_ulong_t) &wwan_info,
 }, {
+	/* Dell DW5580 modules */
+	USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, 0x81ba, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = (kernel_ulong_t)&wwan_info,
+}, {
 	USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
 			USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long) &cdc_info,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a187f08..3b1ba82 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -691,7 +691,6 @@
 
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags)
 {
-	const struct usb_cdc_union_desc *union_desc = NULL;
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *driver;
 	u8 *buf;
@@ -725,15 +724,16 @@
 	/* parse through descriptors associated with control interface */
 	cdc_parse_cdc_header(&hdr, intf, buf, len);
 
-	ctx->data = usb_ifnum_to_if(dev->udev,
-				    hdr.usb_cdc_union_desc->bSlaveInterface0);
+	if (hdr.usb_cdc_union_desc)
+		ctx->data = usb_ifnum_to_if(dev->udev,
+					    hdr.usb_cdc_union_desc->bSlaveInterface0);
 	ctx->ether_desc = hdr.usb_cdc_ether_desc;
 	ctx->func_desc = hdr.usb_cdc_ncm_desc;
 	ctx->mbim_desc = hdr.usb_cdc_mbim_desc;
 	ctx->mbim_extended_desc = hdr.usb_cdc_mbim_extended_desc;
 
 	/* some buggy devices have an IAD but no CDC Union */
-	if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
+	if (!hdr.usb_cdc_union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
 		ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1);
 		dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n");
 	}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 34799ea..9a5be8b 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -725,6 +725,7 @@
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
+	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc002, 4)},	/* Olivetti Olicard 140 */
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 46f4cad..4179037 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -587,6 +587,12 @@
 						&adapter->pdev->dev,
 						rbi->skb->data, rbi->len,
 						PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      rbi->dma_addr)) {
+					dev_kfree_skb_any(rbi->skb);
+					rq->stats.rx_buf_alloc_failure++;
+					break;
+				}
 			} else {
 				/* rx buffer skipped by the device */
 			}
@@ -605,13 +611,18 @@
 						&adapter->pdev->dev,
 						rbi->page, 0, PAGE_SIZE,
 						PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      rbi->dma_addr)) {
+					put_page(rbi->page);
+					rq->stats.rx_buf_alloc_failure++;
+					break;
+				}
 			} else {
 				/* rx buffers skipped by the device */
 			}
 			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
 		}
 
-		BUG_ON(rbi->dma_addr == 0);
 		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
 		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
 					   | val | rbi->len);
@@ -655,7 +666,7 @@
 }
 
 
-static void
+static int
 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
 		struct vmxnet3_adapter *adapter)
@@ -715,6 +726,8 @@
 		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 				skb->data + buf_offset, buf_size,
 				PCI_DMA_TODEVICE);
+		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+			return -EFAULT;
 
 		tbi->len = buf_size;
 
@@ -755,6 +768,8 @@
 			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
 							 buf_offset, buf_size,
 							 DMA_TO_DEVICE);
+			if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+				return -EFAULT;
 
 			tbi->len = buf_size;
 
@@ -782,6 +797,8 @@
 	/* set the last buf_info for the pkt */
 	tbi->skb = skb;
 	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
+
+	return 0;
 }
 
 
@@ -1020,7 +1037,8 @@
 	}
 
 	/* fill tx descs related to addr & len */
-	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
+	if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
+		goto unlock_drop_pkt;
 
 	/* setup the EOP desc */
 	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
@@ -1231,6 +1249,7 @@
 		struct vmxnet3_rx_buf_info *rbi;
 		struct sk_buff *skb, *new_skb = NULL;
 		struct page *new_page = NULL;
+		dma_addr_t new_dma_addr;
 		int num_to_alloc;
 		struct Vmxnet3_RxDesc *rxd;
 		u32 idx, ring_idx;
@@ -1287,6 +1306,21 @@
 				skip_page_frags = true;
 				goto rcd_done;
 			}
+			new_dma_addr = dma_map_single(&adapter->pdev->dev,
+						      new_skb->data, rbi->len,
+						      PCI_DMA_FROMDEVICE);
+			if (dma_mapping_error(&adapter->pdev->dev,
+					      new_dma_addr)) {
+				dev_kfree_skb(new_skb);
+				/* Skb allocation failed, do not handover this
+				 * skb to stack. Reuse it. Drop the existing pkt
+				 */
+				rq->stats.rx_buf_alloc_failure++;
+				ctx->skb = NULL;
+				rq->stats.drop_total++;
+				skip_page_frags = true;
+				goto rcd_done;
+			}
 
 			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
 					 rbi->len,
@@ -1303,9 +1337,7 @@
 
 			/* Immediate refill */
 			rbi->skb = new_skb;
-			rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
-						       rbi->skb->data, rbi->len,
-						       PCI_DMA_FROMDEVICE);
+			rbi->dma_addr = new_dma_addr;
 			rxd->addr = cpu_to_le64(rbi->dma_addr);
 			rxd->len = rbi->len;
 			if (adapter->version == 2 &&
@@ -1348,6 +1380,19 @@
 					skip_page_frags = true;
 					goto rcd_done;
 				}
+				new_dma_addr = dma_map_page(&adapter->pdev->dev
+							, rbi->page,
+							0, PAGE_SIZE,
+							PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      new_dma_addr)) {
+					put_page(new_page);
+					rq->stats.rx_buf_alloc_failure++;
+					dev_kfree_skb(ctx->skb);
+					ctx->skb = NULL;
+					skip_page_frags = true;
+					goto rcd_done;
+				}
 
 				dma_unmap_page(&adapter->pdev->dev,
 					       rbi->dma_addr, rbi->len,
@@ -1357,10 +1402,7 @@
 
 				/* Immediate refill */
 				rbi->page = new_page;
-				rbi->dma_addr = dma_map_page(&adapter->pdev->dev
-							, rbi->page,
-							0, PAGE_SIZE,
-							PCI_DMA_FROMDEVICE);
+				rbi->dma_addr = new_dma_addr;
 				rxd->addr = cpu_to_le64(rbi->dma_addr);
 				rxd->len = rbi->len;
 			}
@@ -2157,16 +2199,18 @@
 		if (!netdev_mc_empty(netdev)) {
 			new_table = vmxnet3_copy_mc(netdev);
 			if (new_table) {
-				rxConf->mfTableLen = cpu_to_le16(
-					netdev_mc_count(netdev) * ETH_ALEN);
+				size_t sz = netdev_mc_count(netdev) * ETH_ALEN;
+
+				rxConf->mfTableLen = cpu_to_le16(sz);
 				new_table_pa = dma_map_single(
 							&adapter->pdev->dev,
 							new_table,
-							rxConf->mfTableLen,
+							sz,
 							PCI_DMA_TODEVICE);
 			}
 
-			if (new_table_pa) {
+			if (!dma_mapping_error(&adapter->pdev->dev,
+					       new_table_pa)) {
 				new_mode |= VMXNET3_RXM_MCAST;
 				rxConf->mfTablePA = cpu_to_le64(new_table_pa);
 			} else {
@@ -3074,6 +3118,11 @@
 	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
 					     sizeof(struct vmxnet3_adapter),
 					     PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+		dev_err(&pdev->dev, "Failed to map dma\n");
+		err = -EFAULT;
+		goto err_dma_map;
+	}
 	adapter->shared = dma_alloc_coherent(
 				&adapter->pdev->dev,
 				sizeof(struct Vmxnet3_DriverShared),
@@ -3232,6 +3281,7 @@
 err_alloc_shared:
 	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
 			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+err_dma_map:
 	free_netdev(netdev);
 	return err;
 }
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 3f859a5..4c58c83 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.3.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.4.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040300
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040400
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 92fa3e1..4f97484 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -907,7 +907,6 @@
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
 		return -EINVAL;
@@ -916,15 +915,7 @@
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
-	err = register_netdevice(dev);
-	if (err < 0)
-		goto out_fail;
-
-	return 0;
-
-out_fail:
-	free_netdev(dev);
-	return err;
+	return register_netdevice(dev);
 }
 
 static size_t vrf_nl_getsize(const struct net_device *dev)
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e92aaf6..89541cc 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1075,11 +1075,10 @@
 
 	used = pvc_is_used(pvc);
 
-	if (type == ARPHRD_ETHER) {
+	if (type == ARPHRD_ETHER)
 		dev = alloc_netdev(0, "pvceth%d", NET_NAME_UNKNOWN,
 				   ether_setup);
-		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-	} else
+	else
 		dev = alloc_netdev(0, "pvc%d", NET_NAME_UNKNOWN, pvc_setup);
 
 	if (!dev) {
@@ -1088,9 +1087,10 @@
 		return -ENOBUFS;
 	}
 
-	if (type == ARPHRD_ETHER)
+	if (type == ARPHRD_ETHER) {
+		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 		eth_hw_addr_random(dev);
-	else {
+	} else {
 		*(__be16*)dev->dev_addr = htons(dlci);
 		dlci_to_q922(dev->broadcast, dlci);
 	}
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 5c47b01..cd39025 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -549,16 +549,12 @@
 
 static int x25_asy_open_tty(struct tty_struct *tty)
 {
-	struct x25_asy *sl = tty->disc_data;
+	struct x25_asy *sl;
 	int err;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
-	/* First make sure we're not already connected. */
-	if (sl && sl->magic == X25_ASY_MAGIC)
-		return -EEXIST;
-
 	/* OK.  Find a free X.25 channel to use. */
 	sl = x25_asy_alloc();
 	if (sl == NULL)
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index aa9bd92..0947cc2 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -51,6 +51,7 @@
 static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	{
 		.id = QCA988X_HW_2_0_VERSION,
+		.dev_id = QCA988X_2_0_DEVICE_ID,
 		.name = "qca988x hw2.0",
 		.patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
@@ -69,6 +70,25 @@
 	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
+		.dev_id = QCA6164_2_1_DEVICE_ID,
+		.name = "qca6164 hw2.1",
+		.patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
+		.uart_pin = 6,
+		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
+		.fw = {
+			.dir = QCA6174_HW_2_1_FW_DIR,
+			.fw = QCA6174_HW_2_1_FW_FILE,
+			.otp = QCA6174_HW_2_1_OTP_FILE,
+			.board = QCA6174_HW_2_1_BOARD_DATA_FILE,
+			.board_size = QCA6174_BOARD_DATA_SZ,
+			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
+		},
+	},
+	{
+		.id = QCA6174_HW_2_1_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw2.1",
 		.patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -86,6 +106,7 @@
 	},
 	{
 		.id = QCA6174_HW_3_0_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw3.0",
 		.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -103,6 +124,7 @@
 	},
 	{
 		.id = QCA6174_HW_3_2_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw3.2",
 		.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -121,6 +143,7 @@
 	},
 	{
 		.id = QCA99X0_HW_2_0_DEV_VERSION,
+		.dev_id = QCA99X0_2_0_DEVICE_ID,
 		.name = "qca99x0 hw2.0",
 		.patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
@@ -139,10 +162,31 @@
 	},
 	{
 		.id = QCA9377_HW_1_0_DEV_VERSION,
+		.dev_id = QCA9377_1_0_DEVICE_ID,
 		.name = "qca9377 hw1.0",
 		.patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
-		.uart_pin = 7,
+		.uart_pin = 6,
 		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
+		.fw = {
+			.dir = QCA9377_HW_1_0_FW_DIR,
+			.fw = QCA9377_HW_1_0_FW_FILE,
+			.otp = QCA9377_HW_1_0_OTP_FILE,
+			.board = QCA9377_HW_1_0_BOARD_DATA_FILE,
+			.board_size = QCA9377_BOARD_DATA_SZ,
+			.board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
+		},
+	},
+	{
+		.id = QCA9377_HW_1_1_DEV_VERSION,
+		.dev_id = QCA9377_1_0_DEVICE_ID,
+		.name = "qca9377 hw1.1",
+		.patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
+		.uart_pin = 6,
+		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
 		.fw = {
 			.dir = QCA9377_HW_1_0_FW_DIR,
 			.fw = QCA9377_HW_1_0_FW_FILE,
@@ -1263,7 +1307,8 @@
 	for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) {
 		hw_params = &ath10k_hw_params_list[i];
 
-		if (hw_params->id == ar->target_version)
+		if (hw_params->id == ar->target_version &&
+		    hw_params->dev_id == ar->dev_id)
 			break;
 	}
 
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 018c64f..858d75f 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -636,6 +636,7 @@
 
 	struct ath10k_hw_params {
 		u32 id;
+		u16 dev_id;
 		const char *name;
 		u32 patch_load_addr;
 		int uart_pin;
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 39966a0..713c2bc 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -22,6 +22,12 @@
 
 #define ATH10K_FW_DIR			"ath10k"
 
+#define QCA988X_2_0_DEVICE_ID   (0x003c)
+#define QCA6164_2_1_DEVICE_ID   (0x0041)
+#define QCA6174_2_1_DEVICE_ID   (0x003e)
+#define QCA99X0_2_0_DEVICE_ID   (0x0040)
+#define QCA9377_1_0_DEVICE_ID   (0x0042)
+
 /* QCA988X 1.0 definitions (unsupported) */
 #define QCA988X_HW_1_0_CHIP_ID_REV	0x0
 
@@ -42,6 +48,10 @@
 #define QCA6174_HW_3_0_VERSION		0x05020000
 #define QCA6174_HW_3_2_VERSION		0x05030000
 
+/* QCA9377 target BMI version signatures */
+#define QCA9377_HW_1_0_DEV_VERSION	0x05020000
+#define QCA9377_HW_1_1_DEV_VERSION	0x05020001
+
 enum qca6174_pci_rev {
 	QCA6174_PCI_REV_1_1 = 0x11,
 	QCA6174_PCI_REV_1_3 = 0x13,
@@ -60,6 +70,11 @@
 	QCA6174_HW_3_2_CHIP_ID_REV = 10,
 };
 
+enum qca9377_chip_id_rev {
+	QCA9377_HW_1_0_CHIP_ID_REV = 0x0,
+	QCA9377_HW_1_1_CHIP_ID_REV = 0x1,
+};
+
 #define QCA6174_HW_2_1_FW_DIR		"ath10k/QCA6174/hw2.1"
 #define QCA6174_HW_2_1_FW_FILE		"firmware.bin"
 #define QCA6174_HW_2_1_OTP_FILE		"otp.bin"
@@ -85,8 +100,6 @@
 #define QCA99X0_HW_2_0_PATCH_LOAD_ADDR	0x1234
 
 /* QCA9377 1.0 definitions */
-#define QCA9377_HW_1_0_DEV_VERSION     0x05020001
-#define QCA9377_HW_1_0_CHIP_ID_REV     0x1
 #define QCA9377_HW_1_0_FW_DIR          ATH10K_FW_DIR "/QCA9377/hw1.0"
 #define QCA9377_HW_1_0_FW_FILE         "firmware.bin"
 #define QCA9377_HW_1_0_OTP_FILE        "otp.bin"
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a7411fe..95a55405 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4225,7 +4225,7 @@
 
 static u32 get_nss_from_chainmask(u16 chain_mask)
 {
-	if ((chain_mask & 0x15) == 0x15)
+	if ((chain_mask & 0xf) == 0xf)
 		return 4;
 	else if ((chain_mask & 0x7) == 0x7)
 		return 3;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 3fca200..930785a 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -57,12 +57,6 @@
 #define ATH10K_PCI_TARGET_WAIT 3000
 #define ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS 3
 
-#define QCA988X_2_0_DEVICE_ID	(0x003c)
-#define QCA6164_2_1_DEVICE_ID	(0x0041)
-#define QCA6174_2_1_DEVICE_ID	(0x003e)
-#define QCA99X0_2_0_DEVICE_ID	(0x0040)
-#define QCA9377_1_0_DEVICE_ID	(0x0042)
-
 static const struct pci_device_id ath10k_pci_id_table[] = {
 	{ PCI_VDEVICE(ATHEROS, QCA988X_2_0_DEVICE_ID) }, /* PCI-E QCA988X V2 */
 	{ PCI_VDEVICE(ATHEROS, QCA6164_2_1_DEVICE_ID) }, /* PCI-E QCA6164 V2.1 */
@@ -92,7 +86,9 @@
 	{ QCA6174_2_1_DEVICE_ID, QCA6174_HW_3_2_CHIP_ID_REV },
 
 	{ QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV },
+
 	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV },
+	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV },
 };
 
 static void ath10k_pci_buffer_cleanup(struct ath10k *ar);
@@ -111,8 +107,9 @@
 static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state);
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
 
-static const struct ce_attr host_ce_config_wlan[] = {
+static struct ce_attr host_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.flags = CE_ATTR_FLAGS,
@@ -128,7 +125,7 @@
 		.src_nentries = 0,
 		.src_sz_max = 2048,
 		.dest_nentries = 512,
-		.recv_cb = ath10k_pci_htc_rx_cb,
+		.recv_cb = ath10k_pci_htt_htc_rx_cb,
 	},
 
 	/* CE2: target->host WMI */
@@ -217,7 +214,7 @@
 };
 
 /* Target firmware's Copy Engine configuration. */
-static const struct ce_pipe_config target_ce_config_wlan[] = {
+static struct ce_pipe_config target_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.pipenum = __cpu_to_le32(0),
@@ -330,7 +327,7 @@
  * This table is derived from the CE_PCI TABLE, above.
  * It is passed to the Target at startup for use by firmware.
  */
-static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
+static struct service_to_pipe target_service_to_ce_map_wlan[] = {
 	{
 		__cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO),
 		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
@@ -1208,6 +1205,16 @@
 	ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
 }
 
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
+{
+	/* CE4 polling needs to be done whenever CE pipe which transports
+	 * HTT Rx (target->host) is processed.
+	 */
+	ath10k_ce_per_engine_service(ce_state->ar, 4);
+
+	ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
+}
+
 /* Called by lower (CE) layer when a send to HTT Target completes. */
 static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state)
 {
@@ -2027,6 +2034,29 @@
 	return 0;
 }
 
+static void ath10k_pci_override_ce_config(struct ath10k *ar)
+{
+	struct ce_attr *attr;
+	struct ce_pipe_config *config;
+
+	/* For QCA6174 we're overriding the Copy Engine 5 configuration,
+	 * since it is currently used for other feature.
+	 */
+
+	/* Override Host's Copy Engine 5 configuration */
+	attr = &host_ce_config_wlan[5];
+	attr->src_sz_max = 0;
+	attr->dest_nentries = 0;
+
+	/* Override Target firmware's Copy Engine configuration */
+	config = &target_ce_config_wlan[5];
+	config->pipedir = __cpu_to_le32(PIPEDIR_OUT);
+	config->nbytes_max = __cpu_to_le32(2048);
+
+	/* Map from service/endpoint to Copy Engine */
+	target_service_to_ce_map_wlan[15].pipenum = __cpu_to_le32(1);
+}
+
 static int ath10k_pci_alloc_pipes(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
@@ -3020,6 +3050,9 @@
 		goto err_core_destroy;
 	}
 
+	if (QCA_REV_6174(ar))
+		ath10k_pci_override_ce_config(ar);
+
 	ret = ath10k_pci_alloc_pipes(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c
index 1a73c7a..bf88ec3 100644
--- a/drivers/net/wireless/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-7000.c
@@ -69,7 +69,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL7260_UCODE_API_MAX	17
+#define IWL7260_UCODE_API_MAX	19
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK	13
diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c
index 0116e5a..9bcc0bf 100644
--- a/drivers/net/wireless/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-8000.c
@@ -69,7 +69,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	17
+#define IWL8000_UCODE_API_MAX	19
 
 /* Oldest version we won't warn about */
 #define IWL8000_UCODE_API_OK	13
diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c
index 85ae902..29ae58e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/iwlwifi/mvm/d3.c
@@ -309,9 +309,9 @@
 	 * to transmit packets to the AP, i.e. the PTK.
 	 */
 	if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
-		key->hw_key_idx = 0;
 		mvm->ptk_ivlen = key->iv_len;
 		mvm->ptk_icvlen = key->icv_len;
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
 	} else {
 		/*
 		 * firmware only supports TSC/RSC for a single key,
@@ -319,12 +319,11 @@
 		 * with new ones -- this relies on mac80211 doing
 		 * list_add_tail().
 		 */
-		key->hw_key_idx = 1;
 		mvm->gtk_ivlen = key->iv_len;
 		mvm->gtk_icvlen = key->icv_len;
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
 	}
 
-	ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, true);
 	data->error = ret != 0;
 out_unlock:
 	mutex_unlock(&mvm->mutex);
@@ -772,9 +771,6 @@
 	 */
 	set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
 
-	/* We reprogram keys and shouldn't allocate new key indices */
-	memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table));
-
 	mvm->ptk_ivlen = 0;
 	mvm->ptk_icvlen = 0;
 	mvm->ptk_ivlen = 0;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 1fb6846..e88afac 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -2941,6 +2941,7 @@
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 	int ret;
+	u8 key_offset;
 
 	if (iwlwifi_mod_params.sw_crypto) {
 		IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n");
@@ -3006,10 +3007,14 @@
 			break;
 		}
 
+		/* in HW restart reuse the index, otherwise request a new one */
+		if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+			key_offset = key->hw_key_idx;
+		else
+			key_offset = STA_KEY_IDX_INVALID;
+
 		IWL_DEBUG_MAC80211(mvm, "set hwcrypto key\n");
-		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key,
-					  test_bit(IWL_MVM_STATUS_IN_HW_RESTART,
-						   &mvm->status));
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
 		if (ret) {
 			IWL_WARN(mvm, "set key failed\n");
 			/*
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index 300a249..354acbd 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c
@@ -1201,7 +1201,8 @@
 	return max_offs;
 }
 
-static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
+static u8 iwl_mvm_get_key_sta_id(struct iwl_mvm *mvm,
+				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -1218,8 +1219,21 @@
 	 * station ID, then use AP's station ID.
 	 */
 	if (vif->type == NL80211_IFTYPE_STATION &&
-	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT)
-		return mvmvif->ap_sta_id;
+	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
+		u8 sta_id = mvmvif->ap_sta_id;
+
+		sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
+						lockdep_is_held(&mvm->mutex));
+		/*
+		 * It is possible that the 'sta' parameter is NULL,
+		 * for example when a GTK is removed - the sta_id will then
+		 * be the AP ID, and no station was passed by mac80211.
+		 */
+		if (IS_ERR_OR_NULL(sta))
+			return IWL_MVM_STATION_COUNT;
+
+		return sta_id;
+	}
 
 	return IWL_MVM_STATION_COUNT;
 }
@@ -1227,7 +1241,8 @@
 static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
 				struct iwl_mvm_sta *mvm_sta,
 				struct ieee80211_key_conf *keyconf, bool mcast,
-				u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags)
+				u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags,
+				u8 key_offset)
 {
 	struct iwl_mvm_add_sta_key_cmd cmd = {};
 	__le16 key_flags;
@@ -1269,7 +1284,7 @@
 	if (mcast)
 		key_flags |= cpu_to_le16(STA_KEY_MULTICAST);
 
-	cmd.key_offset = keyconf->hw_key_idx;
+	cmd.key_offset = key_offset;
 	cmd.key_flags = key_flags;
 	cmd.sta_id = sta_id;
 
@@ -1360,6 +1375,7 @@
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta,
 				 struct ieee80211_key_conf *keyconf,
+				 u8 key_offset,
 				 bool mcast)
 {
 	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
@@ -1375,17 +1391,17 @@
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   seq.tkip.iv32, p1k, 0);
+					   seq.tkip.iv32, p1k, 0, key_offset);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_WEP104:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   0, NULL, 0);
+					   0, NULL, 0, key_offset);
 		break;
 	default:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   0, NULL, 0);
+					   0, NULL, 0, key_offset);
 	}
 
 	return ret;
@@ -1433,7 +1449,7 @@
 			struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta,
 			struct ieee80211_key_conf *keyconf,
-			bool have_key_offset)
+			u8 key_offset)
 {
 	bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
 	u8 sta_id;
@@ -1443,7 +1459,7 @@
 	lockdep_assert_held(&mvm->mutex);
 
 	/* Get the station id from the mvm local station table */
-	sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 	if (sta_id == IWL_MVM_STATION_COUNT) {
 		IWL_ERR(mvm, "Failed to find station id\n");
 		return -EINVAL;
@@ -1470,18 +1486,25 @@
 	if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
 		return -EINVAL;
 
-	if (!have_key_offset) {
-		/*
-		 * The D3 firmware hardcodes the PTK offset to 0, so we have to
-		 * configure it there. As a result, this workaround exists to
-		 * let the caller set the key offset (hw_key_idx), see d3.c.
-		 */
-		keyconf->hw_key_idx = iwl_mvm_set_fw_key_idx(mvm);
-		if (keyconf->hw_key_idx == STA_KEY_IDX_INVALID)
+	/* If the key_offset is not pre-assigned, we need to find a
+	 * new offset to use.  In normal cases, the offset is not
+	 * pre-assigned, but during HW_RESTART we want to reuse the
+	 * same indices, so we pass them when this function is called.
+	 *
+	 * In D3 entry, we need to hardcoded the indices (because the
+	 * firmware hardcodes the PTK offset to 0).  In this case, we
+	 * need to make sure we don't overwrite the hw_key_idx in the
+	 * keyconf structure, because otherwise we cannot configure
+	 * the original ones back when resuming.
+	 */
+	if (key_offset == STA_KEY_IDX_INVALID) {
+		key_offset  = iwl_mvm_set_fw_key_idx(mvm);
+		if (key_offset == STA_KEY_IDX_INVALID)
 			return -ENOSPC;
+		keyconf->hw_key_idx = key_offset;
 	}
 
-	ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, mcast);
+	ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, key_offset, mcast);
 	if (ret) {
 		__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
 		goto end;
@@ -1495,7 +1518,8 @@
 	 */
 	if (keyconf->cipher == WLAN_CIPHER_SUITE_WEP40 ||
 	    keyconf->cipher == WLAN_CIPHER_SUITE_WEP104) {
-		ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, !mcast);
+		ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf,
+					    key_offset, !mcast);
 		if (ret) {
 			__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
 			__iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
@@ -1521,7 +1545,7 @@
 	lockdep_assert_held(&mvm->mutex);
 
 	/* Get the station id from the mvm local station table */
-	sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 
 	IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
 		      keyconf->keyidx, sta_id);
@@ -1547,24 +1571,6 @@
 		return 0;
 	}
 
-	/*
-	 * It is possible that the 'sta' parameter is NULL, and thus
-	 * there is a need to retrieve the sta from the local station table,
-	 * for example when a GTK is removed (where the sta_id will then be
-	 * the AP ID, and no station was passed by mac80211.)
-	 */
-	if (!sta) {
-		sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
-						lockdep_is_held(&mvm->mutex));
-		if (!sta) {
-			IWL_ERR(mvm, "Invalid station id\n");
-			return -EINVAL;
-		}
-	}
-
-	if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
-		return -EINVAL;
-
 	ret = __iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
 	if (ret)
 		return ret;
@@ -1584,7 +1590,7 @@
 			     u16 *phase1key)
 {
 	struct iwl_mvm_sta *mvm_sta;
-	u8 sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	u8 sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 	bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
 
 	if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT))
@@ -1602,7 +1608,7 @@
 
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-			     iv32, phase1key, CMD_ASYNC);
+			     iv32, phase1key, CMD_ASYNC, keyconf->hw_key_idx);
 	rcu_read_unlock();
 }
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.h b/drivers/net/wireless/iwlwifi/mvm/sta.h
index eedb215..0631cc0 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.h
@@ -365,8 +365,8 @@
 int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 			struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta,
-			struct ieee80211_key_conf *key,
-			bool have_key_offset);
+			struct ieee80211_key_conf *keyconf,
+			u8 key_offset);
 int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 			   struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 644b58b..639761f 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -423,14 +423,21 @@
 /* 8000 Series */
 	{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)},
@@ -438,18 +445,28 @@
 	{IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)},
 #endif /* CONFIG_IWLMVM */
 
 	{0}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 6e9418e..bbb789f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -2272,7 +2272,7 @@
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 
-	if (!rtlpci->int_clear)
+	if (rtlpci->int_clear)
 		rtl8821ae_clear_interrupt(hw);/*clear it here first*/
 
 	rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index 8ee141a..142bdff 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -448,7 +448,7 @@
 MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
 MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
 MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
-MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n");
+MODULE_PARM_DESC(int_clear, "Set to 0 to disable interrupt clear before set (default 1)\n");
 
 static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);
 
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 219dc206..a5fe239 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,5 @@
 
 obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
 
-nvme-y		+= pci.o scsi.o lightnvm.o
+lightnvm-$(CONFIG_NVM)	:= lightnvm.o
+nvme-y		+= pci.o scsi.o $(lightnvm-y)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index e0b7b95..06c3364 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -22,8 +22,6 @@
 
 #include "nvme.h"
 
-#ifdef CONFIG_NVM
-
 #include <linux/nvme.h>
 #include <linux/bitops.h>
 #include <linux/lightnvm.h>
@@ -93,7 +91,7 @@
 	__le16			cdw14[6];
 };
 
-struct nvme_nvm_bbtbl {
+struct nvme_nvm_getbbtbl {
 	__u8			opcode;
 	__u8			flags;
 	__u16			command_id;
@@ -101,10 +99,23 @@
 	__u64			rsvd[2];
 	__le64			prp1;
 	__le64			prp2;
-	__le32			prp1_len;
-	__le32			prp2_len;
-	__le32			lbb;
-	__u32			rsvd11[3];
+	__le64			spba;
+	__u32			rsvd4[4];
+};
+
+struct nvme_nvm_setbbtbl {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__le64			rsvd[2];
+	__le64			prp1;
+	__le64			prp2;
+	__le64			spba;
+	__le16			nlb;
+	__u8			value;
+	__u8			rsvd3;
+	__u32			rsvd4[3];
 };
 
 struct nvme_nvm_erase_blk {
@@ -129,8 +140,8 @@
 		struct nvme_nvm_hb_rw hb_rw;
 		struct nvme_nvm_ph_rw ph_rw;
 		struct nvme_nvm_l2ptbl l2p;
-		struct nvme_nvm_bbtbl get_bb;
-		struct nvme_nvm_bbtbl set_bb;
+		struct nvme_nvm_getbbtbl get_bb;
+		struct nvme_nvm_setbbtbl set_bb;
 		struct nvme_nvm_erase_blk erase;
 	};
 };
@@ -142,11 +153,13 @@
 	__u8			num_ch;
 	__u8			num_lun;
 	__u8			num_pln;
+	__u8			rsvd1;
 	__le16			num_blk;
 	__le16			num_pg;
 	__le16			fpg_sz;
 	__le16			csecs;
 	__le16			sos;
+	__le16			rsvd2;
 	__le32			trdt;
 	__le32			trdm;
 	__le32			tprt;
@@ -154,8 +167,9 @@
 	__le32			tbet;
 	__le32			tbem;
 	__le32			mpos;
+	__le32			mccap;
 	__le16			cpar;
-	__u8			reserved[913];
+	__u8			reserved[906];
 } __packed;
 
 struct nvme_nvm_addr_format {
@@ -178,15 +192,28 @@
 	__u8			ver_id;
 	__u8			vmnt;
 	__u8			cgrps;
-	__u8			res[5];
+	__u8			res;
 	__le32			cap;
 	__le32			dom;
 	struct nvme_nvm_addr_format ppaf;
-	__u8			ppat;
-	__u8			resv[223];
+	__u8			resv[228];
 	struct nvme_nvm_id_group groups[4];
 } __packed;
 
+struct nvme_nvm_bb_tbl {
+	__u8	tblid[4];
+	__le16	verid;
+	__le16	revid;
+	__le32	rvsd1;
+	__le32	tblks;
+	__le32	tfact;
+	__le32	tgrown;
+	__le32	tdresv;
+	__le32	thresv;
+	__le32	rsvd2[8];
+	__u8	blk[0];
+};
+
 /*
  * Check we didn't inadvertently grow the command struct
  */
@@ -195,12 +222,14 @@
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_nvm_bbtbl) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != 4096);
+	BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 512);
 }
 
 static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
@@ -234,6 +263,7 @@
 		dst->tbet = le32_to_cpu(src->tbet);
 		dst->tbem = le32_to_cpu(src->tbem);
 		dst->mpos = le32_to_cpu(src->mpos);
+		dst->mccap = le32_to_cpu(src->mccap);
 
 		dst->cpar = le16_to_cpu(src->cpar);
 	}
@@ -244,6 +274,7 @@
 static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
 {
 	struct nvme_ns *ns = q->queuedata;
+	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_id *nvme_nvm_id;
 	struct nvme_nvm_command c = {};
 	int ret;
@@ -256,8 +287,8 @@
 	if (!nvme_nvm_id)
 		return -ENOMEM;
 
-	ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, nvme_nvm_id,
-						sizeof(struct nvme_nvm_id));
+	ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+				nvme_nvm_id, sizeof(struct nvme_nvm_id));
 	if (ret) {
 		ret = -EIO;
 		goto out;
@@ -268,6 +299,8 @@
 	nvm_id->cgrps = nvme_nvm_id->cgrps;
 	nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap);
 	nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom);
+	memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf,
+					sizeof(struct nvme_nvm_addr_format));
 
 	ret = init_grps(nvm_id, nvme_nvm_id);
 out:
@@ -281,7 +314,7 @@
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
-	u32 len = queue_max_hw_sectors(q) << 9;
+	u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
 	u32 nlb_pr_rq = len / sizeof(u64);
 	u64 cmd_slba = slba;
 	void *entries;
@@ -299,8 +332,8 @@
 		c.l2p.slba = cpu_to_le64(cmd_slba);
 		c.l2p.nlb = cpu_to_le32(cmd_nlb);
 
-		ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c,
-								entries, len);
+		ret = nvme_submit_sync_cmd(dev->admin_q,
+				(struct nvme_command *)&c, entries, len);
 		if (ret) {
 			dev_err(dev->dev, "L2P table transfer failed (%d)\n",
 									ret);
@@ -322,43 +355,84 @@
 	return ret;
 }
 
-static int nvme_nvm_get_bb_tbl(struct request_queue *q, int lunid,
-				unsigned int nr_blocks,
-				nvm_bb_update_fn *update_bbtbl, void *priv)
+static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
+				int nr_blocks, nvm_bb_update_fn *update_bbtbl,
+				void *priv)
 {
+	struct request_queue *q = nvmdev->q;
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
-	void *bb_bitmap;
-	u16 bb_bitmap_size;
+	struct nvme_nvm_bb_tbl *bb_tbl;
+	int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
 	int ret = 0;
 
 	c.get_bb.opcode = nvme_nvm_admin_get_bb_tbl;
 	c.get_bb.nsid = cpu_to_le32(ns->ns_id);
-	c.get_bb.lbb = cpu_to_le32(lunid);
-	bb_bitmap_size = ((nr_blocks >> 15) + 1) * PAGE_SIZE;
-	bb_bitmap = kmalloc(bb_bitmap_size, GFP_KERNEL);
-	if (!bb_bitmap)
+	c.get_bb.spba = cpu_to_le64(ppa.ppa);
+
+	bb_tbl = kzalloc(tblsz, GFP_KERNEL);
+	if (!bb_tbl)
 		return -ENOMEM;
 
-	bitmap_zero(bb_bitmap, nr_blocks);
-
-	ret = nvme_submit_sync_cmd(q, (struct nvme_command *)&c, bb_bitmap,
-								bb_bitmap_size);
+	ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+								bb_tbl, tblsz);
 	if (ret) {
 		dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
 		ret = -EIO;
 		goto out;
 	}
 
-	ret = update_bbtbl(lunid, bb_bitmap, nr_blocks, priv);
+	if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
+		bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
+		dev_err(dev->dev, "bbt format mismatch\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (le16_to_cpu(bb_tbl->verid) != 1) {
+		ret = -EINVAL;
+		dev_err(dev->dev, "bbt version not supported\n");
+		goto out;
+	}
+
+	if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
+		ret = -EINVAL;
+		dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+					le32_to_cpu(bb_tbl->tblks), nr_blocks);
+		goto out;
+	}
+
+	ppa = dev_to_generic_addr(nvmdev, ppa);
+	ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
 	if (ret) {
 		ret = -EINTR;
 		goto out;
 	}
 
 out:
-	kfree(bb_bitmap);
+	kfree(bb_tbl);
+	return ret;
+}
+
+static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd,
+								int type)
+{
+	struct nvme_ns *ns = q->queuedata;
+	struct nvme_dev *dev = ns->dev;
+	struct nvme_nvm_command c = {};
+	int ret = 0;
+
+	c.set_bb.opcode = nvme_nvm_admin_set_bb_tbl;
+	c.set_bb.nsid = cpu_to_le32(ns->ns_id);
+	c.set_bb.spba = cpu_to_le64(rqd->ppa_addr.ppa);
+	c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
+	c.set_bb.value = type;
+
+	ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+								NULL, 0);
+	if (ret)
+		dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
 	return ret;
 }
 
@@ -474,6 +548,7 @@
 	.get_l2p_tbl		= nvme_nvm_get_l2p_tbl,
 
 	.get_bb_tbl		= nvme_nvm_get_bb_tbl,
+	.set_bb_tbl		= nvme_nvm_set_bb_tbl,
 
 	.submit_io		= nvme_nvm_submit_io,
 	.erase_block		= nvme_nvm_erase_block,
@@ -496,31 +571,27 @@
 	nvm_unregister(disk_name);
 }
 
+/* move to shared place when used in multiple places. */
+#define PCI_VENDOR_ID_CNEX 0x1d1d
+#define PCI_DEVICE_ID_CNEX_WL 0x2807
+#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
+
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	struct nvme_dev *dev = ns->dev;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
-	if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
+	if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+				pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
 							id->vs[0] == 0x1)
 		return 1;
 
 	/* CNEX Labs - PCI ID + Vendor specific bit */
-	if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
+	if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+				pdev->device == PCI_DEVICE_ID_CNEX_WL &&
 							id->vs[0] == 0x1)
 		return 1;
 
 	return 0;
 }
-#else
-int nvme_nvm_register(struct request_queue *q, char *disk_name)
-{
-	return 0;
-}
-void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
-	return 0;
-}
-#endif /* CONFIG_NVM */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fdb4e5b..044253d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -136,8 +136,22 @@
 int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
 int nvme_sg_get_version_num(int __user *ip);
 
+#ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
 int nvme_nvm_register(struct request_queue *q, char *disk_name);
 void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
+#else
+static inline int nvme_nvm_register(struct request_queue *q, char *disk_name)
+{
+	return 0;
+}
+
+static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
+
+static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
+{
+	return 0;
+}
+#endif /* CONFIG_NVM */
 
 #endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8187df2..9e294ff 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -896,19 +896,28 @@
 			goto retry_cmd;
 		}
 		if (blk_integrity_rq(req)) {
-			if (blk_rq_count_integrity_sg(req->q, req->bio) != 1)
+			if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
+				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+						dma_dir);
 				goto error_cmd;
+			}
 
 			sg_init_table(iod->meta_sg, 1);
 			if (blk_rq_map_integrity_sg(
-					req->q, req->bio, iod->meta_sg) != 1)
+					req->q, req->bio, iod->meta_sg) != 1) {
+				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+						dma_dir);
 				goto error_cmd;
+			}
 
 			if (rq_data_dir(req))
 				nvme_dif_remap(req, nvme_dif_prep);
 
-			if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir))
+			if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
+				dma_unmap_sg(dev->dev, iod->sg, iod->nents,
+						dma_dir);
 				goto error_cmd;
+			}
 		}
 	}
 
@@ -968,7 +977,8 @@
 	if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
 		return;
 
-	writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+	if (likely(nvmeq->cq_vector >= 0))
+		writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
 	nvmeq->cq_head = head;
 	nvmeq->cq_phase = phase;
 
@@ -1727,9 +1737,13 @@
 	u32 aqa;
 	u64 cap = lo_hi_readq(&dev->bar->cap);
 	struct nvme_queue *nvmeq;
-	unsigned page_shift = PAGE_SHIFT;
+	/*
+	 * default to a 4K page size, with the intention to update this
+	 * path in the future to accomodate architectures with differing
+	 * kernel and IO page sizes.
+	 */
+	unsigned page_shift = 12;
 	unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
-	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
 
 	if (page_shift < dev_page_min) {
 		dev_err(dev->dev,
@@ -1738,13 +1752,6 @@
 				1 << page_shift);
 		return -ENODEV;
 	}
-	if (page_shift > dev_page_max) {
-		dev_info(dev->dev,
-				"Device maximum page size (%u) smaller than "
-				"host (%u); enabling work-around\n",
-				1 << dev_page_max, 1 << page_shift);
-		page_shift = dev_page_max;
-	}
 
 	dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
 						NVME_CAP_NSSRC(cap) : 0;
@@ -2268,7 +2275,7 @@
 	if (dev->max_hw_sectors) {
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
 		blk_queue_max_segments(ns->queue,
-			((dev->max_hw_sectors << 9) / dev->page_size) + 1);
+			(dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
 	}
 	if (dev->stripe_size)
 		blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
@@ -2701,6 +2708,18 @@
 	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	/*
+	 * Temporary fix for the Apple controller found in the MacBook8,1 and
+	 * some MacBook7,1 to avoid controller resets and data loss.
+	 */
+	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
+		dev->q_depth = 2;
+		dev_warn(dev->dev, "detected Apple NVMe controller, set "
+			"queue depth=%u to work around controller resets\n",
+			dev->q_depth);
+	}
+
 	if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
 		dev->cmb = nvme_map_cmb(dev);
 
@@ -2787,6 +2806,10 @@
 {
 	struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
 	nvme_put_dq(dq);
+
+	spin_lock_irq(&nvmeq->q_lock);
+	nvme_process_cq(nvmeq);
+	spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 540f077..02a7452 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -440,7 +440,6 @@
 					 ret, pp->io);
 				continue;
 			}
-			pp->io_base = pp->io->start;
 			break;
 		case IORESOURCE_MEM:
 			pp->mem = win->res;
diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c
index 35457ec..163671a 100644
--- a/drivers/pci/host/pcie-hisi.c
+++ b/drivers/pci/host/pcie-hisi.c
@@ -111,7 +111,7 @@
 	.link_up = hisi_pcie_link_up,
 };
 
-static int __init hisi_add_pcie_port(struct pcie_port *pp,
+static int hisi_add_pcie_port(struct pcie_port *pp,
 				     struct platform_device *pdev)
 {
 	int ret;
@@ -139,7 +139,7 @@
 	return 0;
 }
 
-static int __init hisi_pcie_probe(struct platform_device *pdev)
+static int hisi_pcie_probe(struct platform_device *pdev)
 {
 	struct hisi_pcie *hisi_pcie;
 	struct pcie_port *pp;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 4446fcb..d7ffd66 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1146,9 +1146,21 @@
 	pci_dev->state_saved = false;
 	pci_dev->no_d3cold = false;
 	error = pm->runtime_suspend(dev);
-	suspend_report_result(pm->runtime_suspend, error);
-	if (error)
+	if (error) {
+		/*
+		 * -EBUSY and -EAGAIN is used to request the runtime PM core
+		 * to schedule a new suspend, so log the event only with debug
+		 * log level.
+		 */
+		if (error == -EBUSY || error == -EAGAIN)
+			dev_dbg(dev, "can't suspend now (%pf returned %d)\n",
+				pm->runtime_suspend, error);
+		else
+			dev_err(dev, "can't suspend (%pf returned %d)\n",
+				pm->runtime_suspend, error);
+
 		return error;
+	}
 	if (!pci_dev->d3cold_allowed)
 		pci_dev->no_d3cold = true;
 
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 9261868..eead54c 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -216,7 +216,10 @@
 	if (ret)
 		return ret;
 
-	if (node >= MAX_NUMNODES || !node_online(node))
+	if ((node < 0 && node != NUMA_NO_NODE) || node >= MAX_NUMNODES)
+		return -EINVAL;
+
+	if (node != NUMA_NO_NODE && !node_online(node))
 		return -EINVAL;
 
 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index fd2f03f..d390fc1 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -337,6 +337,4 @@
 }
 #endif
 
-struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
-
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e735c72..edb1984 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1685,8 +1685,8 @@
 {
 	struct device *bridge = pci_get_host_bridge_device(dev);
 
-	if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) {
-		if (bridge->parent)
+	if (IS_ENABLED(CONFIG_OF) &&
+		bridge->parent && bridge->parent->of_node) {
 			of_dma_configure(&dev->dev, bridge->parent->of_node);
 	} else if (has_acpi_companion(bridge)) {
 		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 7e32730..c2dd52e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3405,7 +3405,9 @@
 	return 0;
 }
 
-#include "../gpu/drm/i915/i915_reg.h"
+#define SOUTH_CHICKEN2		0xc2004
+#define PCH_PP_STATUS		0xc7200
+#define PCH_PP_CONTROL		0xc7204
 #define MSG_CTL			0x45010
 #define NSDE_PWR_STATE		0xd0100
 #define IGD_OPERATION_TIMEOUT	10000     /* set timeout 10 seconds */
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index b422e4e..312c78b 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -5,8 +5,6 @@
 config PINCTRL
 	bool
 
-if PINCTRL
-
 menu "Pin controllers"
 	depends on PINCTRL
 
@@ -274,5 +272,3 @@
 	select GPIOLIB
 
 endmenu
-
-endif
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
index 88a7fac..acaf84c 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
@@ -538,8 +538,10 @@
 		func->groups[i] = child->name;
 		grp = &info->groups[grp_index++];
 		ret = imx1_pinctrl_parse_groups(child, grp, info, i++);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM) {
+			of_node_put(child);
 			return ret;
+		}
 	}
 
 	return 0;
@@ -582,8 +584,10 @@
 
 	for_each_child_of_node(np, child) {
 		ret = imx1_pinctrl_parse_functions(child, info, ifunc++);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM) {
+			of_node_put(child);
 			return -ENOMEM;
+		}
 	}
 
 	return 0;
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index f307f1d..5c71727 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -747,7 +747,7 @@
 	reg_addr =  mtk_get_port(pctl, offset) + pctl->devdata->dir_offset;
 	bit = BIT(offset & 0xf);
 	regmap_read(pctl->regmap1, reg_addr, &read_val);
-	return !!(read_val & bit);
+	return !(read_val & bit);
 }
 
 static int mtk_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -757,12 +757,8 @@
 	unsigned int read_val = 0;
 	struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev);
 
-	if (mtk_gpio_get_direction(chip, offset))
-		reg_addr = mtk_get_port(pctl, offset) +
-			pctl->devdata->dout_offset;
-	else
-		reg_addr = mtk_get_port(pctl, offset) +
-			pctl->devdata->din_offset;
+	reg_addr = mtk_get_port(pctl, offset) +
+		pctl->devdata->din_offset;
 
 	bit = BIT(offset & 0xf);
 	regmap_read(pctl->regmap1, reg_addr, &read_val);
@@ -997,6 +993,7 @@
 	.owner			= THIS_MODULE,
 	.request		= gpiochip_generic_request,
 	.free			= gpiochip_generic_free,
+	.get_direction		= mtk_gpio_get_direction,
 	.direction_input	= mtk_gpio_direction_input,
 	.direction_output	= mtk_gpio_direction_output,
 	.get			= mtk_gpio_get,
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index d809c9e..19a3c3b 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -672,7 +672,7 @@
 		return -ENOMEM;
 
 	pctrl->dev = &pdev->dev;
-	pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+	pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
 
 	pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 8982027..b868ef1 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -763,7 +763,7 @@
 		return -ENOMEM;
 
 	pctrl->dev = &pdev->dev;
-	pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+	pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
 
 	pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
index e7deb51..9842bb1 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
@@ -31,11 +31,11 @@
 	PORT_GP_12(5, fn, sfx)
 
 #undef _GP_DATA
-#define _GP_DATA(bank, pin, name, sfx)					\
+#define _GP_DATA(bank, pin, name, sfx, cfg)				\
 	PINMUX_DATA(name##_DATA, name##_FN, name##_IN, name##_OUT)
 
-#define _GP_INOUTSEL(bank, pin, name, sfx)	name##_IN, name##_OUT
-#define _GP_INDT(bank, pin, name, sfx)		name##_DATA
+#define _GP_INOUTSEL(bank, pin, name, sfx, cfg)	name##_IN, name##_OUT
+#define _GP_INDT(bank, pin, name, sfx, cfg)	name##_DATA
 #define GP_INOUTSEL(bank)	PORT_GP_32_REV(bank, _GP_INOUTSEL, unused)
 #define GP_INDT(bank)		PORT_GP_32_REV(bank, _GP_INDT, unused)
 
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 8b3130f..9e03d15 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1478,6 +1478,8 @@
 
 static void __exit remoteproc_exit(void)
 {
+	ida_destroy(&rproc_dev_index);
+
 	rproc_exit_debugfs();
 }
 module_exit(remoteproc_exit);
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index 9d30809..916af50 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -156,7 +156,7 @@
 	char buf[10];
 	int ret;
 
-	if (count > sizeof(buf))
+	if (count < 1 || count > sizeof(buf))
 		return count;
 
 	ret = copy_from_user(buf, user_buf, count);
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 188006c..aa705bb 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -15,9 +15,6 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/pm_wakeirq.h>
 #include <linux/rtc/ds1307.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -117,7 +114,6 @@
 #define HAS_ALARM	1		/* bit 1 == irq claimed */
 	struct i2c_client	*client;
 	struct rtc_device	*rtc;
-	int			wakeirq;
 	s32 (*read_block_data)(const struct i2c_client *client, u8 command,
 			       u8 length, u8 *values);
 	s32 (*write_block_data)(const struct i2c_client *client, u8 command,
@@ -1138,7 +1134,10 @@
 				bin2bcd(tmp));
 	}
 
-	device_set_wakeup_capable(&client->dev, want_irq);
+	if (want_irq) {
+		device_set_wakeup_capable(&client->dev, true);
+		set_bit(HAS_ALARM, &ds1307->flags);
+	}
 	ds1307->rtc = devm_rtc_device_register(&client->dev, client->name,
 				rtc_ops, THIS_MODULE);
 	if (IS_ERR(ds1307->rtc)) {
@@ -1146,43 +1145,19 @@
 	}
 
 	if (want_irq) {
-		struct device_node *node = client->dev.of_node;
-
 		err = devm_request_threaded_irq(&client->dev,
 						client->irq, NULL, irq_handler,
 						IRQF_SHARED | IRQF_ONESHOT,
 						ds1307->rtc->name, client);
 		if (err) {
 			client->irq = 0;
+			device_set_wakeup_capable(&client->dev, false);
+			clear_bit(HAS_ALARM, &ds1307->flags);
 			dev_err(&client->dev, "unable to request IRQ!\n");
-			goto no_irq;
-		}
-
-		set_bit(HAS_ALARM, &ds1307->flags);
-		dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
-
-		/* Currently supported by OF code only! */
-		if (!node)
-			goto no_irq;
-
-		err = of_irq_get(node, 1);
-		if (err <= 0) {
-			if (err == -EPROBE_DEFER)
-				goto exit;
-			goto no_irq;
-		}
-		ds1307->wakeirq = err;
-
-		err = dev_pm_set_dedicated_wake_irq(&client->dev,
-						    ds1307->wakeirq);
-		if (err) {
-			dev_err(&client->dev, "unable to setup wakeIRQ %d!\n",
-				err);
-			goto exit;
-		}
+		} else
+			dev_dbg(&client->dev, "got IRQ %d\n", client->irq);
 	}
 
-no_irq:
 	if (chip->nvram_size) {
 
 		ds1307->nvram = devm_kzalloc(&client->dev,
@@ -1226,9 +1201,6 @@
 {
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
 
-	if (ds1307->wakeirq)
-		dev_pm_clear_wake_irq(&client->dev);
-
 	if (test_and_clear_bit(HAS_NVRAM, &ds1307->flags))
 		sysfs_remove_bin_file(&client->dev.kobj, ds1307->nvram);
 
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 548a189..a831d18 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1080,28 +1080,10 @@
 	free_page((unsigned long)sei_page);
 }
 
-int chsc_enable_facility(int operation_code)
+int __chsc_enable_facility(struct chsc_sda_area *sda_area, int operation_code)
 {
-	unsigned long flags;
 	int ret;
-	struct {
-		struct chsc_header request;
-		u8 reserved1:4;
-		u8 format:4;
-		u8 reserved2;
-		u16 operation_code;
-		u32 reserved3;
-		u32 reserved4;
-		u32 operation_data_area[252];
-		struct chsc_header response;
-		u32 reserved5:4;
-		u32 format2:4;
-		u32 reserved6:24;
-	} __attribute__ ((packed)) *sda_area;
 
-	spin_lock_irqsave(&chsc_page_lock, flags);
-	memset(chsc_page, 0, PAGE_SIZE);
-	sda_area = chsc_page;
 	sda_area->request.length = 0x0400;
 	sda_area->request.code = 0x0031;
 	sda_area->operation_code = operation_code;
@@ -1119,10 +1101,25 @@
 	default:
 		ret = chsc_error_from_response(sda_area->response.code);
 	}
+out:
+	return ret;
+}
+
+int chsc_enable_facility(int operation_code)
+{
+	struct chsc_sda_area *sda_area;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chsc_page_lock, flags);
+	memset(chsc_page, 0, PAGE_SIZE);
+	sda_area = chsc_page;
+
+	ret = __chsc_enable_facility(sda_area, operation_code);
 	if (ret != 0)
 		CIO_CRW_EVENT(2, "chsc: sda (oc=%x) failed (rc=%04x)\n",
 			      operation_code, sda_area->response.code);
-out:
+
 	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 76c9b50..0de134c 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -115,6 +115,20 @@
 	u8 data[PAGE_SIZE - 20];
 } __attribute__ ((packed));
 
+struct chsc_sda_area {
+	struct chsc_header request;
+	u8 :4;
+	u8 format:4;
+	u8 :8;
+	u16 operation_code;
+	u32 :32;
+	u32 :32;
+	u32 operation_data_area[252];
+	struct chsc_header response;
+	u32 :4;
+	u32 format2:4;
+	u32 :24;
+} __packed __aligned(PAGE_SIZE);
 
 extern int chsc_get_ssd_info(struct subchannel_id schid,
 			     struct chsc_ssd_info *ssd);
@@ -122,6 +136,7 @@
 extern int chsc_init(void);
 extern void chsc_init_cleanup(void);
 
+int __chsc_enable_facility(struct chsc_sda_area *sda_area, int operation_code);
 extern int chsc_enable_facility(int);
 struct channel_subsystem;
 extern int chsc_secm(struct channel_subsystem *, int);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index b5620e8..690b854 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -925,18 +925,32 @@
 
 int __init cio_get_iplinfo(struct cio_iplinfo *iplinfo)
 {
+	static struct chsc_sda_area sda_area __initdata;
 	struct subchannel_id schid;
 	struct schib schib;
 
 	schid = *(struct subchannel_id *)&S390_lowcore.subchannel_id;
 	if (!schid.one)
 		return -ENODEV;
+
+	if (schid.ssid) {
+		/*
+		 * Firmware should have already enabled MSS but whoever started
+		 * the kernel might have initiated a channel subsystem reset.
+		 * Ensure that MSS is enabled.
+		 */
+		memset(&sda_area, 0, sizeof(sda_area));
+		if (__chsc_enable_facility(&sda_area, CHSC_SDA_OC_MSS))
+			return -ENODEV;
+	}
 	if (stsch_err(schid, &schib))
 		return -ENODEV;
 	if (schib.pmcw.st != SUBCHANNEL_TYPE_IO)
 		return -ENODEV;
 	if (!schib.pmcw.dnv)
 		return -ENODEV;
+
+	iplinfo->ssid = schid.ssid;
 	iplinfo->devno = schib.pmcw.dev;
 	iplinfo->is_qdio = schib.pmcw.qf;
 	return 0;
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 2ee3053..489e703 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -702,17 +702,12 @@
 		css->global_pgid.pgid_high.ext_cssid.version = 0x80;
 		css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid;
 	} else {
-#ifdef CONFIG_SMP
 		css->global_pgid.pgid_high.cpu_addr = stap();
-#else
-		css->global_pgid.pgid_high.cpu_addr = 0;
-#endif
 	}
 	get_cpu_id(&cpu_id);
 	css->global_pgid.cpu_id = cpu_id.ident;
 	css->global_pgid.cpu_model = cpu_id.machine;
 	css->global_pgid.tod_high = tod_high;
-
 }
 
 static void
diff --git a/drivers/s390/crypto/Makefile b/drivers/s390/crypto/Makefile
index 57f710b..b8ab186 100644
--- a/drivers/s390/crypto/Makefile
+++ b/drivers/s390/crypto/Makefile
@@ -3,6 +3,9 @@
 #
 
 ap-objs := ap_bus.o
-obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o zcrypt_pcixcc.o
-obj-$(CONFIG_ZCRYPT) += zcrypt_cex2a.o zcrypt_cex4.o
+# zcrypt_api depends on ap
+obj-$(CONFIG_ZCRYPT) += ap.o zcrypt_api.o
+# msgtype* depend on zcrypt_api
 obj-$(CONFIG_ZCRYPT) += zcrypt_msgtype6.o zcrypt_msgtype50.o
+# adapter drivers depend on ap, zcrypt_api and msgtype*
+obj-$(CONFIG_ZCRYPT) += zcrypt_pcixcc.o zcrypt_cex2a.o zcrypt_cex4.o
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 9cb3dfb..61f7685 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -74,6 +74,7 @@
 static struct ap_config_info *ap_configuration;
 static DEFINE_SPINLOCK(ap_device_list_lock);
 static LIST_HEAD(ap_device_list);
+static bool initialised;
 
 /*
  * Workqueue timer for bus rescan.
@@ -1384,6 +1385,9 @@
 {
 	struct device_driver *drv = &ap_drv->driver;
 
+	if (!initialised)
+		return -ENODEV;
+
 	drv->bus = &ap_bus_type;
 	drv->probe = ap_device_probe;
 	drv->remove = ap_device_remove;
@@ -1808,6 +1812,7 @@
 		goto out_pm;
 
 	queue_work(system_long_wq, &ap_scan_work);
+	initialised = true;
 
 	return 0;
 
@@ -1837,6 +1842,7 @@
 {
 	int i;
 
+	initialised = false;
 	ap_reset_domain();
 	ap_poll_thread_stop();
 	del_timer_sync(&ap_config_timer);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index a9603eb..9f8fa42 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -317,11 +317,9 @@
 
 void zcrypt_msgtype_register(struct zcrypt_ops *zops)
 {
-	if (zops->owner) {
-		spin_lock_bh(&zcrypt_ops_list_lock);
-		list_add_tail(&zops->list, &zcrypt_ops_list);
-		spin_unlock_bh(&zcrypt_ops_list_lock);
-	}
+	spin_lock_bh(&zcrypt_ops_list_lock);
+	list_add_tail(&zops->list, &zcrypt_ops_list);
+	spin_unlock_bh(&zcrypt_ops_list_lock);
 }
 EXPORT_SYMBOL(zcrypt_msgtype_register);
 
@@ -342,7 +340,7 @@
 	spin_lock_bh(&zcrypt_ops_list_lock);
 	list_for_each_entry(zops, &zcrypt_ops_list, list) {
 		if ((zops->variant == variant) &&
-		    (!strncmp(zops->owner->name, name, MODULE_NAME_LEN))) {
+		    (!strncmp(zops->name, name, sizeof(zops->name)))) {
 			found = 1;
 			break;
 		}
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 7508768..38618f0 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -96,6 +96,7 @@
 	struct list_head list;		/* zcrypt ops list. */
 	struct module *owner;
 	int variant;
+	char name[128];
 };
 
 struct zcrypt_device {
diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c
index 71ceee9..74edf29 100644
--- a/drivers/s390/crypto/zcrypt_msgtype50.c
+++ b/drivers/s390/crypto/zcrypt_msgtype50.c
@@ -513,6 +513,7 @@
 	.rsa_modexpo = zcrypt_cex2a_modexpo,
 	.rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
 	.owner = THIS_MODULE,
+	.name = MSGTYPE50_NAME,
 	.variant = MSGTYPE50_VARIANT_DEFAULT,
 };
 
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 7476221..9a2dd47 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -1119,6 +1119,7 @@
  */
 static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
 	.owner = THIS_MODULE,
+	.name = MSGTYPE06_NAME,
 	.variant = MSGTYPE06_VARIANT_NORNG,
 	.rsa_modexpo = zcrypt_msgtype6_modexpo,
 	.rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
@@ -1127,6 +1128,7 @@
 
 static struct zcrypt_ops zcrypt_msgtype6_ops = {
 	.owner = THIS_MODULE,
+	.name = MSGTYPE06_NAME,
 	.variant = MSGTYPE06_VARIANT_DEFAULT,
 	.rsa_modexpo = zcrypt_msgtype6_modexpo,
 	.rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
@@ -1136,6 +1138,7 @@
 
 static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
 	.owner = THIS_MODULE,
+	.name = MSGTYPE06_NAME,
 	.variant = MSGTYPE06_VARIANT_EP11,
 	.rsa_modexpo = NULL,
 	.rsa_modexpo_crt = NULL,
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 5f692ae..64eed87 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -364,6 +364,7 @@
 	tristate "HP Smart Array SCSI driver"
 	depends on PCI && SCSI
 	select CHECK_SIGNATURE
+	select SCSI_SAS_ATTRS
 	help
 	  This driver supports HP Smart Array Controllers (circa 2009).
 	  It is a SCSI alternative to the cciss driver, which is a block
@@ -499,6 +500,7 @@
 	tristate "AdvanSys SCSI support"
 	depends on SCSI
 	depends on ISA || EISA || PCI
+	depends on ISA_DMA_API || !ISA
 	help
 	  This is a driver for all SCSI host adapters manufactured by
 	  AdvanSys. It is documented in the kernel source in
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 519f9a4..febbd83 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7803,7 +7803,7 @@
 		return ASC_BUSY;
 	}
 	scsiqp->sense_addr = cpu_to_le32(sense_addr);
-	scsiqp->sense_len = cpu_to_le32(SCSI_SENSE_BUFFERSIZE);
+	scsiqp->sense_len = SCSI_SENSE_BUFFERSIZE;
 
 	/* Build ADV_SCSI_REQ_Q */
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 323982f..82ac1cd 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -333,6 +333,17 @@
 		kfree(queuedata);
 	}
 
+	if (shost->shost_state == SHOST_CREATED) {
+		/*
+		 * Free the shost_dev device name here if scsi_host_alloc()
+		 * and scsi_host_put() have been called but neither
+		 * scsi_host_add() nor scsi_host_remove() has been called.
+		 * This avoids that the memory allocated for the shost_dev
+		 * name is leaked.
+		 */
+		kfree(dev_name(&shost->shost_dev));
+	}
+
 	scsi_destroy_command_freelist(shost);
 	if (shost_use_blk_mq(shost)) {
 		if (shost->tag_set.tags)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 6a8f958..a386036 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8671,7 +8671,7 @@
 	if ((rc != 0)  || (c->err_info->CommandStatus != 0))
 		goto errout;
 
-	if (*options && HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
+	if (*options & HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
 		goto out;
 
 errout:
diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig
index 2906146..b736dbc 100644
--- a/drivers/scsi/mpt3sas/Kconfig
+++ b/drivers/scsi/mpt3sas/Kconfig
@@ -71,3 +71,12 @@
 	MAX_PHYS_SEGMENTS in most kernels.  However in SuSE kernels this
 	can be 256. However, it may decreased down to 16.  Decreasing this
 	parameter will reduce memory requirements on a per controller instance.
+
+config SCSI_MPT2SAS
+	tristate "Legacy MPT2SAS config option"
+	default n
+	select SCSI_MPT3SAS
+	depends on PCI && SCSI
+	---help---
+	Dummy config option for backwards compatiblity: configure the MPT3SAS
+	driver instead.
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index d95206b..9ab77b0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3905,8 +3905,7 @@
 	 * We do not expose raid functionality to upper layer for warpdrive.
 	 */
 	if (!ioc->is_warpdrive && !scsih_is_raid(&scmd->device->sdev_gendev)
-	    && (sas_device_priv_data->flags & MPT_DEVICE_TLR_ON) &&
-	    scmd->cmd_len != 32)
+	    && sas_is_tlr_enabled(scmd->device) && scmd->cmd_len != 32)
 		mpi_control |= MPI2_SCSIIO_CONTROL_TLR_ON;
 
 	smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->scsi_io_cb_idx, scmd);
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 90fdf0e..675e7fa 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -758,7 +758,7 @@
 			struct device_attribute *attr,
 			const char *buffer, size_t size)
 {
-	int val = 0;
+	unsigned int val = 0;
 	struct mvs_info *mvi = NULL;
 	struct Scsi_Host *shost = class_to_shost(cdev);
 	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
@@ -766,7 +766,7 @@
 	if (buffer == NULL)
 		return size;
 
-	if (sscanf(buffer, "%d", &val) != 1)
+	if (sscanf(buffer, "%u", &val) != 1)
 		return -EINVAL;
 
 	if (val >= 0x10000) {
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index eb0cc54..b6b4cfd 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -433,7 +433,7 @@
 	if (off_in < QLA82XX_PCI_CRBSPACE)
 		return -1;
 
-	*off_out = (void __iomem *)(off_in - QLA82XX_PCI_CRBSPACE);
+	off_in -= QLA82XX_PCI_CRBSPACE;
 
 	/* Try direct map */
 	m = &crb_128M_2M_map[CRB_BLK(off_in)].sub_block[CRB_SUBBLK(off_in)];
@@ -443,6 +443,7 @@
 		return 0;
 	}
 	/* Not in direct map, use crb window */
+	*off_out = (void __iomem *)off_in;
 	return 1;
 }
 
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 3ba2e95..81af294 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -902,7 +902,7 @@
 	return sprintf(page, "%d\n", tpg->tpg_attrib.fabric_prot_type);
 }
 
-CONFIGFS_ATTR_WO(tcm_qla2xxx_tpg_, enable);
+CONFIGFS_ATTR(tcm_qla2xxx_tpg_, enable);
 CONFIGFS_ATTR_RO(tcm_qla2xxx_tpg_, dynamic_sessions);
 CONFIGFS_ATTR(tcm_qla2xxx_tpg_, fabric_prot_type);
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index dfcc45b..d09d602 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -465,8 +465,9 @@
 	     0} },
 	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* MAINT OUT */
 	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* VERIFY */
-	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+	{0, 0x2f, 0, F_D_OUT_MAYBE | FF_DIRECT_IO, NULL, NULL, /* VERIFY(10) */
+	    {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
+	     0, 0, 0, 0, 0, 0} },
 	{1, 0x7f, 0x9, F_SA_HIGH | F_D_IN | FF_DIRECT_IO, resp_read_dt0,
 	    vl_iarr, {32,  0xc7, 0, 0, 0, 0, 0x1f, 0x18, 0x0, 0x9, 0xfe, 0,
 		      0xff, 0xff, 0xff, 0xff} },/* VARIABLE LENGTH, READ(32) */
@@ -477,8 +478,8 @@
 	    {10,  0x13, 0xff, 0xff, 0, 0, 0, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0,
 	     0} },
 /* 20 */
-	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ALLOW REMOVAL */
-	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+	{0, 0x1e, 0, 0, NULL, NULL, /* ALLOW REMOVAL */
+	    {6,  0, 0, 0, 0x3, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 	{0, 0x1, 0, 0, resp_start_stop, NULL, /* REWIND ?? */
 	    {6,  0x1, 0, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ATA_PT */
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8324539..054923e 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -701,9 +701,12 @@
 	 * strings.
 	 */
 	if (sdev->inquiry_len < 36) {
-		sdev_printk(KERN_INFO, sdev,
-			    "scsi scan: INQUIRY result too short (%d),"
-			    " using 36\n", sdev->inquiry_len);
+		if (!sdev->host->short_inquiry) {
+			shost_printk(KERN_INFO, sdev->host,
+				    "scsi scan: INQUIRY result too short (%d),"
+				    " using 36\n", sdev->inquiry_len);
+			sdev->host->short_inquiry = 1;
+		}
 		sdev->inquiry_len = 36;
 	}
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 8d23122..21930c9 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1102,6 +1102,14 @@
 {
 	struct device *dev = &sdev->sdev_gendev;
 
+	/*
+	 * This cleanup path is not reentrant and while it is impossible
+	 * to get a new reference with scsi_device_get() someone can still
+	 * hold a previously acquired one.
+	 */
+	if (sdev->sdev_state == SDEV_DEL)
+		return;
+
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
 			return;
@@ -1110,7 +1118,9 @@
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		scsi_dh_remove_device(sdev);
-	}
+		device_del(dev);
+	} else
+		put_device(&sdev->sdev_dev);
 
 	/*
 	 * Stop accepting new requests and wait until all queuecommand() and
@@ -1121,16 +1131,6 @@
 	blk_cleanup_queue(sdev->request_queue);
 	cancel_work_sync(&sdev->requeue_work);
 
-	/*
-	 * Remove the device after blk_cleanup_queue() has been called such
-	 * a possible bdi_register() call with the same name occurs after
-	 * blk_cleanup_queue() has called bdi_destroy().
-	 */
-	if (sdev->is_visible)
-		device_del(dev);
-	else
-		put_device(&sdev->sdev_dev);
-
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 5451980..3d22fc3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -638,11 +638,24 @@
 	unsigned int max_blocks = 0;
 
 	q->limits.discard_zeroes_data = 0;
-	q->limits.discard_alignment = sdkp->unmap_alignment *
-		logical_block_size;
-	q->limits.discard_granularity =
-		max(sdkp->physical_block_size,
-		    sdkp->unmap_granularity * logical_block_size);
+
+	/*
+	 * When LBPRZ is reported, discard alignment and granularity
+	 * must be fixed to the logical block size. Otherwise the block
+	 * layer will drop misaligned portions of the request which can
+	 * lead to data corruption. If LBPRZ is not set, we honor the
+	 * device preference.
+	 */
+	if (sdkp->lbprz) {
+		q->limits.discard_alignment = 0;
+		q->limits.discard_granularity = 1;
+	} else {
+		q->limits.discard_alignment = sdkp->unmap_alignment *
+			logical_block_size;
+		q->limits.discard_granularity =
+			max(sdkp->physical_block_size,
+			    sdkp->unmap_granularity * logical_block_size);
+	}
 
 	sdkp->provisioning_mode = mode;
 
@@ -2321,11 +2334,8 @@
 		}
 	}
 
-	if (sdkp->capacity > 0xffffffff) {
+	if (sdkp->capacity > 0xffffffff)
 		sdp->use_16_for_rw = 1;
-		sdkp->max_xfer_blocks = SD_MAX_XFER_BLOCKS;
-	} else
-		sdkp->max_xfer_blocks = SD_DEF_XFER_BLOCKS;
 
 	/* Rescale capacity to 512-byte units */
 	if (sector_size == 4096)
@@ -2642,7 +2652,6 @@
 {
 	unsigned int sector_sz = sdkp->device->sector_size;
 	const int vpd_len = 64;
-	u32 max_xfer_length;
 	unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL);
 
 	if (!buffer ||
@@ -2650,14 +2659,11 @@
 	    scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len))
 		goto out;
 
-	max_xfer_length = get_unaligned_be32(&buffer[8]);
-	if (max_xfer_length)
-		sdkp->max_xfer_blocks = max_xfer_length;
-
 	blk_queue_io_min(sdkp->disk->queue,
 			 get_unaligned_be16(&buffer[6]) * sector_sz);
-	blk_queue_io_opt(sdkp->disk->queue,
-			 get_unaligned_be32(&buffer[12]) * sector_sz);
+
+	sdkp->max_xfer_blocks = get_unaligned_be32(&buffer[8]);
+	sdkp->opt_xfer_blocks = get_unaligned_be32(&buffer[12]);
 
 	if (buffer[3] == 0x3c) {
 		unsigned int lba_count, desc_count;
@@ -2806,6 +2812,11 @@
 	return 0;
 }
 
+static inline u32 logical_to_sectors(struct scsi_device *sdev, u32 blocks)
+{
+	return blocks << (ilog2(sdev->sector_size) - 9);
+}
+
 /**
  *	sd_revalidate_disk - called the first time a new disk is seen,
  *	performs disk spin up, read_capacity, etc.
@@ -2815,8 +2826,9 @@
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdp = sdkp->device;
+	struct request_queue *q = sdkp->disk->queue;
 	unsigned char *buffer;
-	unsigned int max_xfer;
+	unsigned int dev_max, rw_max;
 
 	SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
 				      "sd_revalidate_disk\n"));
@@ -2864,11 +2876,26 @@
 	 */
 	sd_set_flush_flag(sdkp);
 
-	max_xfer = sdkp->max_xfer_blocks;
-	max_xfer <<= ilog2(sdp->sector_size) - 9;
+	/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
+	dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
 
-	sdkp->disk->queue->limits.max_sectors =
-		min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer);
+	/* Some devices report a maximum block count for READ/WRITE requests. */
+	dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
+	q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+
+	/*
+	 * Use the device's preferred I/O size for reads and writes
+	 * unless the reported value is unreasonably large (or garbage).
+	 */
+	if (sdkp->opt_xfer_blocks && sdkp->opt_xfer_blocks <= dev_max &&
+	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS)
+		rw_max = q->limits.io_opt =
+			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+	else
+		rw_max = BLK_DEF_MAX_SECTORS;
+
+	/* Combine with controller limits */
+	q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
 
 	set_capacity(disk, sdkp->capacity);
 	sd_config_write_same(sdkp);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 63ba5ca..5f2a84a 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -67,6 +67,7 @@
 	atomic_t	openers;
 	sector_t	capacity;	/* size in 512-byte sectors */
 	u32		max_xfer_blocks;
+	u32		opt_xfer_blocks;
 	u32		max_ws_blocks;
 	u32		max_unmap_blocks;
 	u32		unmap_granularity;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e0a1e52..2e52295 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4083,6 +4083,7 @@
 	}
 	cdev->owner = THIS_MODULE;
 	cdev->ops = &st_fops;
+	STm->cdevs[rew] = cdev;
 
 	error = cdev_add(cdev, cdev_devno, 1);
 	if (error) {
@@ -4091,7 +4092,6 @@
 		pr_err("st%d: Device not attached.\n", dev_num);
 		goto out_free;
 	}
-	STm->cdevs[rew] = cdev;
 
 	i = mode << (4 - ST_NBR_MODE_BITS);
 	snprintf(name, 10, "%s%s%s", rew ? "n" : "",
@@ -4110,8 +4110,9 @@
 	return 0;
 out_free:
 	cdev_del(STm->cdevs[rew]);
-	STm->cdevs[rew] = NULL;
 out:
+	STm->cdevs[rew] = NULL;
+	STm->devs[rew] = NULL;
 	return error;
 }
 
diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index 25abd4e..91a00301 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c
@@ -34,7 +34,7 @@
 
 static int __init sh_pm_runtime_init(void)
 {
-	if (IS_ENABLED(CONFIG_ARCH_SHMOBILE_MULTI)) {
+	if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
 		if (!of_find_compatible_node(NULL, NULL,
 					     "renesas,cpg-mstp-clocks"))
 			return 0;
diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index 9d50682..0a4ea80 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -23,6 +23,7 @@
 config MTK_SCPSYS
 	bool "MediaTek SCPSYS Support"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
+	default ARM64 && ARCH_MEDIATEK
 	select REGMAP
 	select MTK_INFRACFG
 	select PM_GENERIC_DOMAINS if PM
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index f3a0b6a..8c03a80 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1179,7 +1179,7 @@
 
 		block++;
 		if (!block->size)
-			return 0;
+			continue;
 
 		dev_dbg(kdev->dev, "linkram1: phys:%x, virt:%p, size:%x\n",
 			block->phys, block->virt, block->size);
@@ -1519,9 +1519,9 @@
 
 	for (i = 0; i < ARRAY_SIZE(knav_acc_firmwares); i++) {
 		if (knav_acc_firmwares[i]) {
-			ret = request_firmware(&fw,
-					       knav_acc_firmwares[i],
-					       kdev->dev);
+			ret = request_firmware_direct(&fw,
+						      knav_acc_firmwares[i],
+						      kdev->dev);
 			if (!ret) {
 				found = true;
 				break;
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 06858e0..bf9a610 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -562,8 +562,8 @@
 		goto out_clk_disable;
 	}
 
-	dev_info(dev, "at 0x%08x (irq %d, FIFOs size %d)\n",
-		 r->start, irq, bs->fifo_size);
+	dev_info(dev, "at %pr (irq %d, FIFOs size %d)\n",
+		 r, irq, bs->fifo_size);
 
 	return 0;
 
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 563954a..7840067 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -410,7 +410,7 @@
 	if (!spi->controller_data)
 		spi->controller_data = (void *)&mtk_default_chip_info;
 
-	if (mdata->dev_comp->need_pad_sel)
+	if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio))
 		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
 
 	return 0;
@@ -632,13 +632,23 @@
 			goto err_put_master;
 		}
 
-		for (i = 0; i < master->num_chipselect; i++) {
-			ret = devm_gpio_request(&pdev->dev, master->cs_gpios[i],
-						dev_name(&pdev->dev));
-			if (ret) {
-				dev_err(&pdev->dev,
-					"can't get CS GPIO %i\n", i);
-				goto err_put_master;
+		if (!master->cs_gpios && master->num_chipselect > 1) {
+			dev_err(&pdev->dev,
+				"cs_gpios not specified and num_chipselect > 1\n");
+			ret = -EINVAL;
+			goto err_put_master;
+		}
+
+		if (master->cs_gpios) {
+			for (i = 0; i < master->num_chipselect; i++) {
+				ret = devm_gpio_request(&pdev->dev,
+							master->cs_gpios[i],
+							dev_name(&pdev->dev));
+				if (ret) {
+					dev_err(&pdev->dev,
+						"can't get CS GPIO %i\n", i);
+					goto err_put_master;
+				}
 			}
 		}
 	}
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 94af806..5e5fd77 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1171,19 +1171,31 @@
 static int pl022_dma_autoprobe(struct pl022 *pl022)
 {
 	struct device *dev = &pl022->adev->dev;
+	struct dma_chan *chan;
+	int err;
 
 	/* automatically configure DMA channels from platform, normally using DT */
-	pl022->dma_rx_channel = dma_request_slave_channel(dev, "rx");
-	if (!pl022->dma_rx_channel)
+	chan = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR(chan)) {
+		err = PTR_ERR(chan);
 		goto err_no_rxchan;
+	}
 
-	pl022->dma_tx_channel = dma_request_slave_channel(dev, "tx");
-	if (!pl022->dma_tx_channel)
+	pl022->dma_rx_channel = chan;
+
+	chan = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(chan)) {
+		err = PTR_ERR(chan);
 		goto err_no_txchan;
+	}
+
+	pl022->dma_tx_channel = chan;
 
 	pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!pl022->dummypage)
+	if (!pl022->dummypage) {
+		err = -ENOMEM;
 		goto err_no_dummypage;
+	}
 
 	return 0;
 
@@ -1194,7 +1206,7 @@
 	dma_release_channel(pl022->dma_rx_channel);
 	pl022->dma_rx_channel = NULL;
 err_no_rxchan:
-	return -ENODEV;
+	return err;
 }
 		
 static void terminate_dma(struct pl022 *pl022)
@@ -2236,6 +2248,10 @@
 
 	/* Get DMA channels, try autoconfiguration first */
 	status = pl022_dma_autoprobe(pl022);
+	if (status == -EPROBE_DEFER) {
+		dev_dbg(dev, "deferring probe to get DMA channel\n");
+		goto err_no_irq;
+	}
 
 	/* If that failed, use channels from platform_info */
 	if (status == 0)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e2415be..2b0a8ec 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -376,6 +376,7 @@
 
 /**
  * __spi_register_driver - register a SPI driver
+ * @owner: owner module of the driver to register
  * @sdrv: the driver to register
  * Context: can sleep
  *
@@ -2130,6 +2131,7 @@
 	 * Set transfer tx_nbits and rx_nbits as single transfer default
 	 * (SPI_NBITS_SINGLE) if it is not set for this transfer.
 	 */
+	message->frame_length = 0;
 	list_for_each_entry(xfer, &message->transfers, transfer_list) {
 		message->frame_length += xfer->len;
 		if (!xfer->bits_per_word)
diff --git a/drivers/staging/iio/Kconfig b/drivers/staging/iio/Kconfig
index 6d5b38d..9d7f000 100644
--- a/drivers/staging/iio/Kconfig
+++ b/drivers/staging/iio/Kconfig
@@ -18,7 +18,8 @@
 source "drivers/staging/iio/trigger/Kconfig"
 
 config IIO_DUMMY_EVGEN
-       tristate
+	tristate
+	select IRQ_WORK
 
 config IIO_SIMPLE_DUMMY
        tristate "An example driver with no hardware requirements"
diff --git a/drivers/staging/iio/adc/lpc32xx_adc.c b/drivers/staging/iio/adc/lpc32xx_adc.c
index d11c54b..b51f237 100644
--- a/drivers/staging/iio/adc/lpc32xx_adc.c
+++ b/drivers/staging/iio/adc/lpc32xx_adc.c
@@ -76,7 +76,7 @@
 
 	if (mask == IIO_CHAN_INFO_RAW) {
 		mutex_lock(&indio_dev->mlock);
-		clk_enable(info->clk);
+		clk_prepare_enable(info->clk);
 		/* Measurement setup */
 		__raw_writel(AD_INTERNAL | (chan->address) | AD_REFp | AD_REFm,
 			     LPC32XX_ADC_SELECT(info->adc_base));
@@ -84,7 +84,7 @@
 		__raw_writel(AD_PDN_CTRL | AD_STROBE,
 			     LPC32XX_ADC_CTRL(info->adc_base));
 		wait_for_completion(&info->completion); /* set by ISR */
-		clk_disable(info->clk);
+		clk_disable_unprepare(info->clk);
 		*val = info->value;
 		mutex_unlock(&indio_dev->mlock);
 
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
index f5d741f..485ab26 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -110,7 +110,6 @@
 #define IOC_LIBCFS_CLEAR_DEBUG	     _IOWR('e', 31, long)
 #define IOC_LIBCFS_MARK_DEBUG	      _IOWR('e', 32, long)
 #define IOC_LIBCFS_MEMHOG		  _IOWR('e', 36, long)
-#define IOC_LIBCFS_PING_TEST	       _IOWR('e', 37, long)
 /* lnet ioctls */
 #define IOC_LIBCFS_GET_NI		  _IOWR('e', 50, long)
 #define IOC_LIBCFS_FAIL_NID		_IOWR('e', 51, long)
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 07a6859..e7c2b26 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -274,23 +274,6 @@
 		}
 		break;
 
-	case IOC_LIBCFS_PING_TEST: {
-		extern void (kping_client)(struct libcfs_ioctl_data *);
-		void (*ping)(struct libcfs_ioctl_data *);
-
-		CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
-		       data->ioc_count, libcfs_nid2str(data->ioc_nid),
-		       libcfs_nid2str(data->ioc_nid));
-		ping = symbol_get(kping_client);
-		if (!ping)
-			CERROR("symbol_get failed\n");
-		else {
-			ping(data);
-			symbol_put(kping_client);
-		}
-		return 0;
-	}
-
 	default: {
 		struct libcfs_ioctl_handler *hand;
 
diff --git a/drivers/staging/wilc1000/coreconfigurator.c b/drivers/staging/wilc1000/coreconfigurator.c
index e10c6ff..9568bdb 100644
--- a/drivers/staging/wilc1000/coreconfigurator.c
+++ b/drivers/staging/wilc1000/coreconfigurator.c
@@ -13,12 +13,8 @@
 #include "wilc_wlan.h"
 #include <linux/errno.h>
 #include <linux/slab.h>
-#include <linux/etherdevice.h>
 #define TAG_PARAM_OFFSET	(MAC_HDR_LEN + TIME_STAMP_LEN + \
 							BEACON_INTERVAL_LEN + CAP_INFO_LEN)
-#define ADDR1 4
-#define ADDR2 10
-#define ADDR3 16
 
 /* Basic Frame Type Codes (2-bit) */
 enum basic_frame_type {
@@ -175,32 +171,38 @@
 	return ((header[1] & 0x02) >> 1);
 }
 
+/* This function extracts the MAC Address in 'address1' field of the MAC     */
+/* header and updates the MAC Address in the allocated 'addr' variable.      */
+static inline void get_address1(u8 *pu8msa, u8 *addr)
+{
+	memcpy(addr, pu8msa + 4, 6);
+}
+
+/* This function extracts the MAC Address in 'address2' field of the MAC     */
+/* header and updates the MAC Address in the allocated 'addr' variable.      */
+static inline void get_address2(u8 *pu8msa, u8 *addr)
+{
+	memcpy(addr, pu8msa + 10, 6);
+}
+
+/* This function extracts the MAC Address in 'address3' field of the MAC     */
+/* header and updates the MAC Address in the allocated 'addr' variable.      */
+static inline void get_address3(u8 *pu8msa, u8 *addr)
+{
+	memcpy(addr, pu8msa + 16, 6);
+}
+
 /* This function extracts the BSSID from the incoming WLAN packet based on   */
-/* the 'from ds' bit, and updates the MAC Address in the allocated 'data'    */
+/* the 'from ds' bit, and updates the MAC Address in the allocated 'addr'    */
 /* variable.                                                                 */
 static inline void get_BSSID(u8 *data, u8 *bssid)
 {
 	if (get_from_ds(data) == 1)
-		/*
-		 * Extract the MAC Address in 'address2' field of the MAC
-		 * header and update the MAC Address in the allocated 'data'
-		 *  variable.
-		 */
-		ether_addr_copy(data, bssid + ADDR2);
+		get_address2(data, bssid);
 	else if (get_to_ds(data) == 1)
-		/*
-		 * Extract the MAC Address in 'address1' field of the MAC
-		 * header and update the MAC Address in the allocated 'data'
-		 * variable.
-		 */
-		ether_addr_copy(data, bssid + ADDR1);
+		get_address1(data, bssid);
 	else
-		/*
-		 * Extract the MAC Address in 'address3' field of the MAC
-		 * header and update the MAC Address in the allocated 'data'
-		 * variable.
-		 */
-		ether_addr_copy(data, bssid + ADDR3);
+		get_address3(data, bssid);
 }
 
 /* This function extracts the SSID from a beacon/probe response frame        */
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 342a07c..72204fb 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -4074,6 +4074,17 @@
 	return iscsit_add_reject(conn, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
 }
 
+static bool iscsi_target_check_conn_state(struct iscsi_conn *conn)
+{
+	bool ret;
+
+	spin_lock_bh(&conn->state_lock);
+	ret = (conn->conn_state != TARG_CONN_STATE_LOGGED_IN);
+	spin_unlock_bh(&conn->state_lock);
+
+	return ret;
+}
+
 int iscsi_target_rx_thread(void *arg)
 {
 	int ret, rc;
@@ -4091,7 +4102,7 @@
 	 * incoming iscsi/tcp socket I/O, and/or failing the connection.
 	 */
 	rc = wait_for_completion_interruptible(&conn->rx_login_comp);
-	if (rc < 0)
+	if (rc < 0 || iscsi_target_check_conn_state(conn))
 		return 0;
 
 	if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) {
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 5c964c0..9fc9117 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -388,6 +388,7 @@
 	if (login->login_complete) {
 		if (conn->rx_thread && conn->rx_thread_active) {
 			send_sig(SIGINT, conn->rx_thread, 1);
+			complete(&conn->rx_login_comp);
 			kthread_stop(conn->rx_thread);
 		}
 		if (conn->tx_thread && conn->tx_thread_active) {
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 51d1734..2cbea2a 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -208,7 +208,7 @@
 	if (!pl) {
 		pr_err("Unable to allocate memory for"
 				" struct iscsi_param_list.\n");
-		return -1 ;
+		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&pl->param_list);
 	INIT_LIST_HEAD(&pl->extra_response_list);
@@ -578,7 +578,7 @@
 	param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL);
 	if (!param_list) {
 		pr_err("Unable to allocate memory for struct iscsi_param_list.\n");
-		return -1;
+		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&param_list->param_list);
 	INIT_LIST_HEAD(&param_list->extra_response_list);
@@ -629,7 +629,7 @@
 
 err_out:
 	iscsi_release_param_list(param_list);
-	return -1;
+	return -ENOMEM;
 }
 
 static void iscsi_release_extra_responses(struct iscsi_param_list *param_list)
@@ -729,7 +729,7 @@
 	if (!extra_response) {
 		pr_err("Unable to allocate memory for"
 			" struct iscsi_extra_response.\n");
-		return -1;
+		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&extra_response->er_list);
 
@@ -1370,7 +1370,7 @@
 	tmpbuf = kzalloc(length + 1, GFP_KERNEL);
 	if (!tmpbuf) {
 		pr_err("Unable to allocate %u + 1 bytes for tmpbuf.\n", length);
-		return -1;
+		return -ENOMEM;
 	}
 
 	memcpy(tmpbuf, textbuf, length);
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 0b4b2a6..98698d8 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -371,7 +371,8 @@
 	return 0;
 }
 
-static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success)
+static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success,
+					   int *post_ret)
 {
 	unsigned char *buf, *addr;
 	struct scatterlist *sg;
@@ -437,7 +438,8 @@
 			       cmd->data_direction);
 }
 
-static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success,
+					     int *post_ret)
 {
 	struct se_device *dev = cmd->se_dev;
 
@@ -447,8 +449,10 @@
 	 * sent to the backend driver.
 	 */
 	spin_lock_irq(&cmd->t_state_lock);
-	if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status)
+	if ((cmd->transport_state & CMD_T_SENT) && !cmd->scsi_status) {
 		cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+		*post_ret = 1;
+	}
 	spin_unlock_irq(&cmd->t_state_lock);
 
 	/*
@@ -460,7 +464,8 @@
 	return TCM_NO_SENSE;
 }
 
-static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success)
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success,
+						 int *post_ret)
 {
 	struct se_device *dev = cmd->se_dev;
 	struct scatterlist *write_sg = NULL, *sg;
@@ -556,11 +561,11 @@
 
 		if (block_size < PAGE_SIZE) {
 			sg_set_page(&write_sg[i], m.page, block_size,
-				    block_size);
+				    m.piter.sg->offset + block_size);
 		} else {
 			sg_miter_next(&m);
 			sg_set_page(&write_sg[i], m.page, block_size,
-				    0);
+				    m.piter.sg->offset);
 		}
 		len -= block_size;
 		i++;
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index 273c72b..81a6b3e 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -246,7 +246,7 @@
 	char str[sizeof(dev->t10_wwn.model)+1];
 
 	/* scsiLuProductId */
-	for (i = 0; i < sizeof(dev->t10_wwn.vendor); i++)
+	for (i = 0; i < sizeof(dev->t10_wwn.model); i++)
 		str[i] = ISPRINT(dev->t10_wwn.model[i]) ?
 			dev->t10_wwn.model[i] : ' ';
 	str[i] = '\0';
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 5b28203..28fb301 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -130,6 +130,9 @@
 		if (tmr->ref_task_tag != ref_tag)
 			continue;
 
+		if (!kref_get_unless_zero(&se_cmd->cmd_kref))
+			continue;
+
 		printk("ABORT_TASK: Found referenced %s task_tag: %llu\n",
 			se_cmd->se_tfo->get_fabric_name(), ref_tag);
 
@@ -139,13 +142,15 @@
 			       " skipping\n", ref_tag);
 			spin_unlock(&se_cmd->t_state_lock);
 			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+			target_put_sess_cmd(se_cmd);
+
 			goto out;
 		}
 		se_cmd->transport_state |= CMD_T_ABORTED;
 		spin_unlock(&se_cmd->t_state_lock);
 
 		list_del_init(&se_cmd->se_cmd_list);
-		kref_get(&se_cmd->cmd_kref);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 		cancel_work_sync(&se_cmd->work);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 5bacc7b..4fdcee2 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1658,7 +1658,7 @@
 void transport_generic_request_failure(struct se_cmd *cmd,
 		sense_reason_t sense_reason)
 {
-	int ret = 0;
+	int ret = 0, post_ret = 0;
 
 	pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx"
 		" CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]);
@@ -1680,7 +1680,7 @@
 	 */
 	if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
 	     cmd->transport_complete_callback)
-		cmd->transport_complete_callback(cmd, false);
+		cmd->transport_complete_callback(cmd, false, &post_ret);
 
 	switch (sense_reason) {
 	case TCM_NON_EXISTENT_LUN:
@@ -2068,11 +2068,13 @@
 	 */
 	if (cmd->transport_complete_callback) {
 		sense_reason_t rc;
+		bool caw = (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE);
+		bool zero_dl = !(cmd->data_length);
+		int post_ret = 0;
 
-		rc = cmd->transport_complete_callback(cmd, true);
-		if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) {
-			if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
-			    !cmd->data_length)
+		rc = cmd->transport_complete_callback(cmd, true, &post_ret);
+		if (!rc && !post_ret) {
+			if (caw && zero_dl)
 				goto queue_rsp;
 
 			return;
@@ -2507,23 +2509,24 @@
 EXPORT_SYMBOL(target_get_sess_cmd);
 
 static void target_release_cmd_kref(struct kref *kref)
-		__releases(&se_cmd->se_sess->sess_cmd_lock)
 {
 	struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
 	struct se_session *se_sess = se_cmd->se_sess;
+	unsigned long flags;
 
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	if (list_empty(&se_cmd->se_cmd_list)) {
-		spin_unlock(&se_sess->sess_cmd_lock);
+		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 		se_cmd->se_tfo->release_cmd(se_cmd);
 		return;
 	}
 	if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
-		spin_unlock(&se_sess->sess_cmd_lock);
+		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 		complete(&se_cmd->cmd_wait_comp);
 		return;
 	}
 	list_del(&se_cmd->se_cmd_list);
-	spin_unlock(&se_sess->sess_cmd_lock);
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 	se_cmd->se_tfo->release_cmd(se_cmd);
 }
@@ -2539,8 +2542,7 @@
 		se_cmd->se_tfo->release_cmd(se_cmd);
 		return 1;
 	}
-	return kref_put_spinlock_irqsave(&se_cmd->cmd_kref, target_release_cmd_kref,
-			&se_sess->sess_cmd_lock);
+	return kref_put(&se_cmd->cmd_kref, target_release_cmd_kref);
 }
 EXPORT_SYMBOL(target_put_sess_cmd);
 
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 937cebf..5e6d6cb 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -638,7 +638,7 @@
 	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
 		return 0;
 
-	if (!time_after(cmd->deadline, jiffies))
+	if (!time_after(jiffies, cmd->deadline))
 		return 0;
 
 	set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
@@ -1101,8 +1101,6 @@
 
 static const struct target_backend_ops tcmu_ops = {
 	.name			= "user",
-	.inquiry_prod		= "USER",
-	.inquiry_rev		= TCMU_VERSION,
 	.owner			= THIS_MODULE,
 	.transport_flags	= TRANSPORT_FLAG_PASSTHROUGH,
 	.attach_hba		= tcmu_attach_hba,
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index c463c89..8cc4ac6 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -382,7 +382,7 @@
 
 config QCOM_SPMI_TEMP_ALARM
 	tristate "Qualcomm SPMI PMIC Temperature Alarm"
-	depends on OF && (SPMI || COMPILE_TEST) && IIO
+	depends on OF && SPMI && IIO
 	select REGMAP_SPMI
 	help
 	  This enables a thermal sysfs driver for Qualcomm plug-and-play (QPNP)
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index c8fe3ca..c5547bd 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -55,6 +55,7 @@
 #define TEMPSENSE2_PANIC_VALUE_SHIFT	16
 #define TEMPSENSE2_PANIC_VALUE_MASK	0xfff0000
 
+#define OCOTP_MEM0			0x0480
 #define OCOTP_ANA1			0x04e0
 
 /* The driver supports 1 passive trip point and 1 critical trip point */
@@ -64,12 +65,6 @@
 	IMX_TRIP_NUM,
 };
 
-/*
- * It defines the temperature in millicelsius for passive trip point
- * that will trigger cooling action when crossed.
- */
-#define IMX_TEMP_PASSIVE		85000
-
 #define IMX_POLLING_DELAY		2000 /* millisecond */
 #define IMX_PASSIVE_DELAY		1000
 
@@ -100,12 +95,14 @@
 	u32 c1, c2; /* See formula in imx_get_sensor_data() */
 	int temp_passive;
 	int temp_critical;
+	int temp_max;
 	int alarm_temp;
 	int last_temp;
 	bool irq_enabled;
 	int irq;
 	struct clk *thermal_clk;
 	const struct thermal_soc_data *socdata;
+	const char *temp_grade;
 };
 
 static void imx_set_panic_temp(struct imx_thermal_data *data,
@@ -285,10 +282,12 @@
 {
 	struct imx_thermal_data *data = tz->devdata;
 
+	/* do not allow changing critical threshold */
 	if (trip == IMX_TRIP_CRITICAL)
 		return -EPERM;
 
-	if (temp < 0 || temp > IMX_TEMP_PASSIVE)
+	/* do not allow passive to be set higher than critical */
+	if (temp < 0 || temp > data->temp_critical)
 		return -EINVAL;
 
 	data->temp_passive = temp;
@@ -404,17 +403,39 @@
 	data->c1 = temp64;
 	data->c2 = n1 * data->c1 + 1000 * t1;
 
-	/*
-	 * Set the default passive cooling trip point,
-	 * can be changed from userspace.
-	 */
-	data->temp_passive = IMX_TEMP_PASSIVE;
+	/* use OTP for thermal grade */
+	ret = regmap_read(map, OCOTP_MEM0, &val);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to read temp grade: %d\n", ret);
+		return ret;
+	}
+
+	/* The maximum die temp is specified by the Temperature Grade */
+	switch ((val >> 6) & 0x3) {
+	case 0: /* Commercial (0 to 95C) */
+		data->temp_grade = "Commercial";
+		data->temp_max = 95000;
+		break;
+	case 1: /* Extended Commercial (-20 to 105C) */
+		data->temp_grade = "Extended Commercial";
+		data->temp_max = 105000;
+		break;
+	case 2: /* Industrial (-40 to 105C) */
+		data->temp_grade = "Industrial";
+		data->temp_max = 105000;
+		break;
+	case 3: /* Automotive (-40 to 125C) */
+		data->temp_grade = "Automotive";
+		data->temp_max = 125000;
+		break;
+	}
 
 	/*
-	 * The maximum die temperature set to 20 C higher than
-	 * IMX_TEMP_PASSIVE.
+	 * Set the critical trip point at 5C under max
+	 * Set the passive trip point at 10C under max (can change via sysfs)
 	 */
-	data->temp_critical = 1000 * 20 + data->temp_passive;
+	data->temp_critical = data->temp_max - (1000 * 5);
+	data->temp_passive = data->temp_max - (1000 * 10);
 
 	return 0;
 }
@@ -551,6 +572,11 @@
 		return ret;
 	}
 
+	dev_info(&pdev->dev, "%s CPU temperature grade - max:%dC"
+		 " critical:%dC passive:%dC\n", data->temp_grade,
+		 data->temp_max / 1000, data->temp_critical / 1000,
+		 data->temp_passive / 1000);
+
 	/* Enable measurements at ~ 10 Hz */
 	regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
 	measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 42b7d42..be4eedc 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -964,7 +964,7 @@
 
 	np = of_find_node_by_name(NULL, "thermal-zones");
 	if (!np) {
-		pr_err("unable to find thermal zones\n");
+		pr_debug("unable to find thermal zones\n");
 		return;
 	}
 
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index f0fbea3..1246aa6 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -174,7 +174,6 @@
 /**
  * pid_controller() - PID controller
  * @tz:	thermal zone we are operating in
- * @current_temp:	the current temperature in millicelsius
  * @control_temp:	the target temperature in millicelsius
  * @max_allocatable_power:	maximum allocatable power for this thermal zone
  *
@@ -191,7 +190,6 @@
  * Return: The power budget for the next period.
  */
 static u32 pid_controller(struct thermal_zone_device *tz,
-			  int current_temp,
 			  int control_temp,
 			  u32 max_allocatable_power)
 {
@@ -211,7 +209,7 @@
 				       true);
 	}
 
-	err = control_temp - current_temp;
+	err = control_temp - tz->temperature;
 	err = int_to_frac(err);
 
 	/* Calculate the proportional term */
@@ -332,7 +330,6 @@
 }
 
 static int allocate_power(struct thermal_zone_device *tz,
-			  int current_temp,
 			  int control_temp)
 {
 	struct thermal_instance *instance;
@@ -418,8 +415,7 @@
 		i++;
 	}
 
-	power_range = pid_controller(tz, current_temp, control_temp,
-				     max_allocatable_power);
+	power_range = pid_controller(tz, control_temp, max_allocatable_power);
 
 	divvy_up_power(weighted_req_power, max_power, num_actors,
 		       total_weighted_req_power, power_range, granted_power,
@@ -444,8 +440,8 @@
 	trace_thermal_power_allocator(tz, req_power, total_req_power,
 				      granted_power, total_granted_power,
 				      num_actors, power_range,
-				      max_allocatable_power, current_temp,
-				      control_temp - current_temp);
+				      max_allocatable_power, tz->temperature,
+				      control_temp - tz->temperature);
 
 	kfree(req_power);
 unlock:
@@ -612,7 +608,7 @@
 static int power_allocator_throttle(struct thermal_zone_device *tz, int trip)
 {
 	int ret;
-	int switch_on_temp, control_temp, current_temp;
+	int switch_on_temp, control_temp;
 	struct power_allocator_params *params = tz->governor_data;
 
 	/*
@@ -622,15 +618,9 @@
 	if (trip != params->trip_max_desired_temperature)
 		return 0;
 
-	ret = thermal_zone_get_temp(tz, &current_temp);
-	if (ret) {
-		dev_warn(&tz->device, "Failed to get temperature: %d\n", ret);
-		return ret;
-	}
-
 	ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
 				     &switch_on_temp);
-	if (!ret && (current_temp < switch_on_temp)) {
+	if (!ret && (tz->temperature < switch_on_temp)) {
 		tz->passive = 0;
 		reset_pid_controller(params);
 		allow_maximum_power(tz);
@@ -648,7 +638,7 @@
 		return ret;
 	}
 
-	return allocate_power(tz, current_temp, control_temp);
+	return allocate_power(tz, control_temp);
 }
 
 static struct thermal_governor thermal_gov_power_allocator = {
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 5d4ae7d..13d01ed 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -361,6 +361,24 @@
 /*
  *		platform functions
  */
+static int rcar_thermal_remove(struct platform_device *pdev)
+{
+	struct rcar_thermal_common *common = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	struct rcar_thermal_priv *priv;
+
+	rcar_thermal_for_each_priv(priv, common) {
+		if (rcar_has_irq_support(priv))
+			rcar_thermal_irq_disable(priv);
+		thermal_zone_device_unregister(priv->zone);
+	}
+
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
 static int rcar_thermal_probe(struct platform_device *pdev)
 {
 	struct rcar_thermal_common *common;
@@ -377,6 +395,8 @@
 	if (!common)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, common);
+
 	INIT_LIST_HEAD(&common->head);
 	spin_lock_init(&common->lock);
 	common->dev = dev;
@@ -454,43 +474,16 @@
 		rcar_thermal_common_write(common, ENR, enr_bits);
 	}
 
-	platform_set_drvdata(pdev, common);
-
 	dev_info(dev, "%d sensor probed\n", i);
 
 	return 0;
 
 error_unregister:
-	rcar_thermal_for_each_priv(priv, common) {
-		if (rcar_has_irq_support(priv))
-			rcar_thermal_irq_disable(priv);
-		thermal_zone_device_unregister(priv->zone);
-	}
-
-	pm_runtime_put(dev);
-	pm_runtime_disable(dev);
+	rcar_thermal_remove(pdev);
 
 	return ret;
 }
 
-static int rcar_thermal_remove(struct platform_device *pdev)
-{
-	struct rcar_thermal_common *common = platform_get_drvdata(pdev);
-	struct device *dev = &pdev->dev;
-	struct rcar_thermal_priv *priv;
-
-	rcar_thermal_for_each_priv(priv, common) {
-		if (rcar_has_irq_support(priv))
-			rcar_thermal_irq_disable(priv);
-		thermal_zone_device_unregister(priv->zone);
-	}
-
-	pm_runtime_put(dev);
-	pm_runtime_disable(dev);
-
-	return 0;
-}
-
 static const struct of_device_id rcar_thermal_dt_ids[] = {
 	{ .compatible = "renesas,rcar-thermal", },
 	{},
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index 9787e8a..e845841 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -1,6 +1,9 @@
 /*
  * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
  *
+ * Copyright (c) 2015, Fuzhou Rockchip Electronics Co., Ltd
+ * Caesar Wang <wxt@rock-chips.com>
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
  * version 2, as published by the Free Software Foundation.
@@ -45,17 +48,50 @@
 };
 
 /**
- * The system has three Temperature Sensors.  channel 0 is reserved,
- * channel 1 is for CPU, and channel 2 is for GPU.
+ * The system has two Temperature Sensors.
+ * sensor0 is for CPU, and sensor1 is for GPU.
  */
 enum sensor_id {
-	SENSOR_CPU = 1,
+	SENSOR_CPU = 0,
 	SENSOR_GPU,
 };
 
+/**
+* The conversion table has the adc value and temperature.
+* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
+* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
+*/
+enum adc_sort_mode {
+	ADC_DECREMENT = 0,
+	ADC_INCREMENT,
+};
+
+/**
+ * The max sensors is two in rockchip SoCs.
+ * Two sensors: CPU and GPU sensor.
+ */
+#define SOC_MAX_SENSORS	2
+
+struct chip_tsadc_table {
+	const struct tsadc_table *id;
+
+	/* the array table size*/
+	unsigned int length;
+
+	/* that analogic mask data */
+	u32 data_mask;
+
+	/* the sort mode is adc value that increment or decrement in table */
+	enum adc_sort_mode mode;
+};
+
 struct rockchip_tsadc_chip {
+	/* The sensor id of chip correspond to the ADC channel */
+	int chn_id[SOC_MAX_SENSORS];
+	int chn_num;
+
 	/* The hardware-controlled tshut property */
-	long tshut_temp;
+	int tshut_temp;
 	enum tshut_mode tshut_mode;
 	enum tshut_polarity tshut_polarity;
 
@@ -65,37 +101,40 @@
 	void (*control)(void __iomem *reg, bool on);
 
 	/* Per-sensor methods */
-	int (*get_temp)(int chn, void __iomem *reg, int *temp);
-	void (*set_tshut_temp)(int chn, void __iomem *reg, long temp);
+	int (*get_temp)(struct chip_tsadc_table table,
+			int chn, void __iomem *reg, int *temp);
+	void (*set_tshut_temp)(struct chip_tsadc_table table,
+			       int chn, void __iomem *reg, int temp);
 	void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
+
+	/* Per-table methods */
+	struct chip_tsadc_table table;
 };
 
 struct rockchip_thermal_sensor {
 	struct rockchip_thermal_data *thermal;
 	struct thermal_zone_device *tzd;
-	enum sensor_id id;
+	int id;
 };
 
-#define NUM_SENSORS	2 /* Ignore unused sensor 0 */
-
 struct rockchip_thermal_data {
 	const struct rockchip_tsadc_chip *chip;
 	struct platform_device *pdev;
 	struct reset_control *reset;
 
-	struct rockchip_thermal_sensor sensors[NUM_SENSORS];
+	struct rockchip_thermal_sensor sensors[SOC_MAX_SENSORS];
 
 	struct clk *clk;
 	struct clk *pclk;
 
 	void __iomem *regs;
 
-	long tshut_temp;
+	int tshut_temp;
 	enum tshut_mode tshut_mode;
 	enum tshut_polarity tshut_polarity;
 };
 
-/* TSADC V2 Sensor info define: */
+/* TSADC Sensor info define: */
 #define TSADCV2_AUTO_CON			0x04
 #define TSADCV2_INT_EN				0x08
 #define TSADCV2_INT_PD				0x0c
@@ -117,6 +156,8 @@
 #define TSADCV2_INT_PD_CLEAR_MASK		~BIT(8)
 
 #define TSADCV2_DATA_MASK			0xfff
+#define TSADCV3_DATA_MASK			0x3ff
+
 #define TSADCV2_HIGHT_INT_DEBOUNCE_COUNT	4
 #define TSADCV2_HIGHT_TSHUT_DEBOUNCE_COUNT	4
 #define TSADCV2_AUTO_PERIOD_TIME		250 /* msec */
@@ -124,7 +165,7 @@
 
 struct tsadc_table {
 	u32 code;
-	long temp;
+	int temp;
 };
 
 static const struct tsadc_table v2_code_table[] = {
@@ -165,21 +206,61 @@
 	{3421, 125000},
 };
 
-static u32 rk_tsadcv2_temp_to_code(long temp)
+static const struct tsadc_table v3_code_table[] = {
+	{0, -40000},
+	{106, -40000},
+	{108, -35000},
+	{110, -30000},
+	{112, -25000},
+	{114, -20000},
+	{116, -15000},
+	{118, -10000},
+	{120, -5000},
+	{122, 0},
+	{124, 5000},
+	{126, 10000},
+	{128, 15000},
+	{130, 20000},
+	{132, 25000},
+	{134, 30000},
+	{136, 35000},
+	{138, 40000},
+	{140, 45000},
+	{142, 50000},
+	{144, 55000},
+	{146, 60000},
+	{148, 65000},
+	{150, 70000},
+	{152, 75000},
+	{154, 80000},
+	{156, 85000},
+	{158, 90000},
+	{160, 95000},
+	{162, 100000},
+	{163, 105000},
+	{165, 110000},
+	{167, 115000},
+	{169, 120000},
+	{171, 125000},
+	{TSADCV3_DATA_MASK, 125000},
+};
+
+static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
+				   int temp)
 {
 	int high, low, mid;
 
 	low = 0;
-	high = ARRAY_SIZE(v2_code_table) - 1;
+	high = table.length - 1;
 	mid = (high + low) / 2;
 
-	if (temp < v2_code_table[low].temp || temp > v2_code_table[high].temp)
+	if (temp < table.id[low].temp || temp > table.id[high].temp)
 		return 0;
 
 	while (low <= high) {
-		if (temp == v2_code_table[mid].temp)
-			return v2_code_table[mid].code;
-		else if (temp < v2_code_table[mid].temp)
+		if (temp == table.id[mid].temp)
+			return table.id[mid].code;
+		else if (temp < table.id[mid].temp)
 			high = mid - 1;
 		else
 			low = mid + 1;
@@ -189,29 +270,54 @@
 	return 0;
 }
 
-static int rk_tsadcv2_code_to_temp(u32 code, int *temp)
+static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,
+				   int *temp)
 {
 	unsigned int low = 1;
-	unsigned int high = ARRAY_SIZE(v2_code_table) - 1;
+	unsigned int high = table.length - 1;
 	unsigned int mid = (low + high) / 2;
 	unsigned int num;
 	unsigned long denom;
 
-	BUILD_BUG_ON(ARRAY_SIZE(v2_code_table) < 2);
+	WARN_ON(table.length < 2);
 
-	code &= TSADCV2_DATA_MASK;
-	if (code < v2_code_table[high].code)
-		return -EAGAIN;		/* Incorrect reading */
+	switch (table.mode) {
+	case ADC_DECREMENT:
+		code &= table.data_mask;
+		if (code < table.id[high].code)
+			return -EAGAIN;		/* Incorrect reading */
 
-	while (low <= high) {
-		if (code >= v2_code_table[mid].code &&
-		    code < v2_code_table[mid - 1].code)
-			break;
-		else if (code < v2_code_table[mid].code)
-			low = mid + 1;
-		else
-			high = mid - 1;
-		mid = (low + high) / 2;
+		while (low <= high) {
+			if (code >= table.id[mid].code &&
+			    code < table.id[mid - 1].code)
+				break;
+			else if (code < table.id[mid].code)
+				low = mid + 1;
+			else
+				high = mid - 1;
+
+			mid = (low + high) / 2;
+		}
+		break;
+	case ADC_INCREMENT:
+		code &= table.data_mask;
+		if (code < table.id[low].code)
+			return -EAGAIN;		/* Incorrect reading */
+
+		while (low <= high) {
+			if (code >= table.id[mid - 1].code &&
+			    code < table.id[mid].code)
+				break;
+			else if (code > table.id[mid].code)
+				low = mid + 1;
+			else
+				high = mid - 1;
+
+			mid = (low + high) / 2;
+		}
+		break;
+	default:
+		pr_err("Invalid the conversion table\n");
 	}
 
 	/*
@@ -220,24 +326,28 @@
 	 * temperature between 2 table entries is linear and interpolate
 	 * to produce less granular result.
 	 */
-	num = v2_code_table[mid].temp - v2_code_table[mid - 1].temp;
-	num *= v2_code_table[mid - 1].code - code;
-	denom = v2_code_table[mid - 1].code - v2_code_table[mid].code;
-	*temp = v2_code_table[mid - 1].temp + (num / denom);
+	num = table.id[mid].temp - v2_code_table[mid - 1].temp;
+	num *= abs(table.id[mid - 1].code - code);
+	denom = abs(table.id[mid - 1].code - table.id[mid].code);
+	*temp = table.id[mid - 1].temp + (num / denom);
 
 	return 0;
 }
 
 /**
- * rk_tsadcv2_initialize - initialize TASDC Controller
- * (1) Set TSADCV2_AUTO_PERIOD, configure the interleave between
- * every two accessing of TSADC in normal operation.
- * (2) Set TSADCV2_AUTO_PERIOD_HT, configure the interleave between
- * every two accessing of TSADC after the temperature is higher
- * than COM_SHUT or COM_INT.
- * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE,
- * if the temperature is higher than COMP_INT or COMP_SHUT for
- * "debounce" times, TSADC controller will generate interrupt or TSHUT.
+ * rk_tsadcv2_initialize - initialize TASDC Controller.
+ *
+ * (1) Set TSADC_V2_AUTO_PERIOD:
+ *     Configure the interleave between every two accessing of
+ *     TSADC in normal operation.
+ *
+ * (2) Set TSADCV2_AUTO_PERIOD_HT:
+ *     Configure the interleave between every two accessing of
+ *     TSADC after the temperature is higher than COM_SHUT or COM_INT.
+ *
+ * (3) Set TSADCV2_HIGH_INT_DEBOUNCE and TSADC_HIGHT_TSHUT_DEBOUNCE:
+ *     If the temperature is higher than COMP_INT or COMP_SHUT for
+ *     "debounce" times, TSADC controller will generate interrupt or TSHUT.
  */
 static void rk_tsadcv2_initialize(void __iomem *regs,
 				  enum tshut_polarity tshut_polarity)
@@ -279,20 +389,22 @@
 	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
 }
 
-static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp)
+static int rk_tsadcv2_get_temp(struct chip_tsadc_table table,
+			       int chn, void __iomem *regs, int *temp)
 {
 	u32 val;
 
 	val = readl_relaxed(regs + TSADCV2_DATA(chn));
 
-	return rk_tsadcv2_code_to_temp(val, temp);
+	return rk_tsadcv2_code_to_temp(table, val, temp);
 }
 
-static void rk_tsadcv2_tshut_temp(int chn, void __iomem *regs, long temp)
+static void rk_tsadcv2_tshut_temp(struct chip_tsadc_table table,
+				  int chn, void __iomem *regs, int temp)
 {
 	u32 tshut_value, val;
 
-	tshut_value = rk_tsadcv2_temp_to_code(temp);
+	tshut_value = rk_tsadcv2_temp_to_code(table, temp);
 	writel_relaxed(tshut_value, regs + TSADCV2_COMP_SHUT(chn));
 
 	/* TSHUT will be valid */
@@ -318,6 +430,10 @@
 }
 
 static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
+	.chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
+	.chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
+	.chn_num = 2, /* two channels for tsadc */
+
 	.tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
 	.tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
 	.tshut_temp = 95000,
@@ -328,6 +444,37 @@
 	.get_temp = rk_tsadcv2_get_temp,
 	.set_tshut_temp = rk_tsadcv2_tshut_temp,
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+	.table = {
+		.id = v2_code_table,
+		.length = ARRAY_SIZE(v2_code_table),
+		.data_mask = TSADCV2_DATA_MASK,
+		.mode = ADC_DECREMENT,
+	},
+};
+
+static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
+	.chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+	.chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+	.chn_num = 2, /* two channels for tsadc */
+
+	.tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+	.tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+	.tshut_temp = 95000,
+
+	.initialize = rk_tsadcv2_initialize,
+	.irq_ack = rk_tsadcv2_irq_ack,
+	.control = rk_tsadcv2_control,
+	.get_temp = rk_tsadcv2_get_temp,
+	.set_tshut_temp = rk_tsadcv2_tshut_temp,
+	.set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+	.table = {
+		.id = v3_code_table,
+		.length = ARRAY_SIZE(v3_code_table),
+		.data_mask = TSADCV3_DATA_MASK,
+		.mode = ADC_INCREMENT,
+	},
 };
 
 static const struct of_device_id of_rockchip_thermal_match[] = {
@@ -335,6 +482,10 @@
 		.compatible = "rockchip,rk3288-tsadc",
 		.data = (void *)&rk3288_tsadc_data,
 	},
+	{
+		.compatible = "rockchip,rk3368-tsadc",
+		.data = (void *)&rk3368_tsadc_data,
+	},
 	{ /* end */ },
 };
 MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -357,7 +508,7 @@
 
 	thermal->chip->irq_ack(thermal->regs);
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+	for (i = 0; i < thermal->chip->chn_num; i++)
 		thermal_zone_device_update(thermal->sensors[i].tzd);
 
 	return IRQ_HANDLED;
@@ -370,7 +521,8 @@
 	const struct rockchip_tsadc_chip *tsadc = sensor->thermal->chip;
 	int retval;
 
-	retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp);
+	retval = tsadc->get_temp(tsadc->table,
+				 sensor->id, thermal->regs, out_temp);
 	dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %d, retval: %d\n",
 		sensor->id, *out_temp, retval);
 
@@ -389,7 +541,7 @@
 
 	if (of_property_read_u32(np, "rockchip,hw-tshut-temp", &shut_temp)) {
 		dev_warn(dev,
-			 "Missing tshut temp property, using default %ld\n",
+			 "Missing tshut temp property, using default %d\n",
 			 thermal->chip->tshut_temp);
 		thermal->tshut_temp = thermal->chip->tshut_temp;
 	} else {
@@ -397,7 +549,7 @@
 	}
 
 	if (thermal->tshut_temp > INT_MAX) {
-		dev_err(dev, "Invalid tshut temperature specified: %ld\n",
+		dev_err(dev, "Invalid tshut temperature specified: %d\n",
 			thermal->tshut_temp);
 		return -ERANGE;
 	}
@@ -442,13 +594,14 @@
 rockchip_thermal_register_sensor(struct platform_device *pdev,
 				 struct rockchip_thermal_data *thermal,
 				 struct rockchip_thermal_sensor *sensor,
-				 enum sensor_id id)
+				 int id)
 {
 	const struct rockchip_tsadc_chip *tsadc = thermal->chip;
 	int error;
 
 	tsadc->set_tshut_mode(id, thermal->regs, thermal->tshut_mode);
-	tsadc->set_tshut_temp(id, thermal->regs, thermal->tshut_temp);
+	tsadc->set_tshut_temp(tsadc->table, id, thermal->regs,
+			      thermal->tshut_temp);
 
 	sensor->thermal = thermal;
 	sensor->id = id;
@@ -481,7 +634,7 @@
 	const struct of_device_id *match;
 	struct resource *res;
 	int irq;
-	int i;
+	int i, j;
 	int error;
 
 	match = of_match_node(of_rockchip_thermal_match, np);
@@ -556,22 +709,19 @@
 
 	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
 
-	error = rockchip_thermal_register_sensor(pdev, thermal,
-						 &thermal->sensors[0],
-						 SENSOR_CPU);
-	if (error) {
-		dev_err(&pdev->dev,
-			"failed to register CPU thermal sensor: %d\n", error);
-		goto err_disable_pclk;
-	}
-
-	error = rockchip_thermal_register_sensor(pdev, thermal,
-						 &thermal->sensors[1],
-						 SENSOR_GPU);
-	if (error) {
-		dev_err(&pdev->dev,
-			"failed to register GPU thermal sensor: %d\n", error);
-		goto err_unregister_cpu_sensor;
+	for (i = 0; i < thermal->chip->chn_num; i++) {
+		error = rockchip_thermal_register_sensor(pdev, thermal,
+						&thermal->sensors[i],
+						thermal->chip->chn_id[i]);
+		if (error) {
+			dev_err(&pdev->dev,
+				"failed to register sensor[%d] : error = %d\n",
+				i, error);
+			for (j = 0; j < i; j++)
+				thermal_zone_of_sensor_unregister(&pdev->dev,
+						thermal->sensors[j].tzd);
+			goto err_disable_pclk;
+		}
 	}
 
 	error = devm_request_threaded_irq(&pdev->dev, irq, NULL,
@@ -581,22 +731,23 @@
 	if (error) {
 		dev_err(&pdev->dev,
 			"failed to request tsadc irq: %d\n", error);
-		goto err_unregister_gpu_sensor;
+		goto err_unregister_sensor;
 	}
 
 	thermal->chip->control(thermal->regs, true);
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+	for (i = 0; i < thermal->chip->chn_num; i++)
 		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
 
 	platform_set_drvdata(pdev, thermal);
 
 	return 0;
 
-err_unregister_gpu_sensor:
-	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[1].tzd);
-err_unregister_cpu_sensor:
-	thermal_zone_of_sensor_unregister(&pdev->dev, thermal->sensors[0].tzd);
+err_unregister_sensor:
+	while (i--)
+		thermal_zone_of_sensor_unregister(&pdev->dev,
+						  thermal->sensors[i].tzd);
+
 err_disable_pclk:
 	clk_disable_unprepare(thermal->pclk);
 err_disable_clk:
@@ -610,7 +761,7 @@
 	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
+	for (i = 0; i < thermal->chip->chn_num; i++) {
 		struct rockchip_thermal_sensor *sensor = &thermal->sensors[i];
 
 		rockchip_thermal_toggle_sensor(sensor, false);
@@ -631,7 +782,7 @@
 	struct rockchip_thermal_data *thermal = platform_get_drvdata(pdev);
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+	for (i = 0; i < thermal->chip->chn_num; i++)
 		rockchip_thermal_toggle_sensor(&thermal->sensors[i], false);
 
 	thermal->chip->control(thermal->regs, false);
@@ -663,18 +814,19 @@
 
 	thermal->chip->initialize(thermal->regs, thermal->tshut_polarity);
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++) {
-		enum sensor_id id = thermal->sensors[i].id;
+	for (i = 0; i < thermal->chip->chn_num; i++) {
+		int id = thermal->sensors[i].id;
 
 		thermal->chip->set_tshut_mode(id, thermal->regs,
 					      thermal->tshut_mode);
-		thermal->chip->set_tshut_temp(id, thermal->regs,
+		thermal->chip->set_tshut_temp(thermal->chip->table,
+					      id, thermal->regs,
 					      thermal->tshut_temp);
 	}
 
 	thermal->chip->control(thermal->regs, true);
 
-	for (i = 0; i < ARRAY_SIZE(thermal->sensors); i++)
+	for (i = 0; i < thermal->chip->chn_num; i++)
 		rockchip_thermal_toggle_sensor(&thermal->sensors[i], true);
 
 	pinctrl_pm_select_default_state(dev);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 1384426..ed77614 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -169,7 +169,7 @@
 {
 	struct n_tty_data *ldata = tty->disc_data;
 
-	tty_audit_add_data(tty, to, n, ldata->icanon);
+	tty_audit_add_data(tty, from, n, ldata->icanon);
 	return copy_to_user(to, from, n);
 }
 
diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index c0533a5..910bfee 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -60,3 +60,4 @@
 	spin_unlock_irqrestore(&up->port.lock, flags);
 	return 1;
 }
+EXPORT_SYMBOL_GPL(fsl8250_handle_irq);
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index e6f5e12..6412f14 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -373,6 +373,7 @@
 	depends on SERIAL_8250 && PCI
 	select HSU_DMA if SERIAL_8250_DMA
 	select HSU_DMA_PCI if X86_INTEL_MID
+	select RATIONAL
 	help
 	  Selecting this option will enable handling of the extra features
 	  present on the UART found on Intel Medfield SOC and various other
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 1aec440..f38beb2 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1539,7 +1539,6 @@
 	tristate "Freescale lpuart serial port support"
 	depends on HAS_DMA
 	select SERIAL_CORE
-	select SERIAL_EARLYCON
 	help
 	  Support for the on-chip lpuart on some Freescale SOCs.
 
@@ -1547,6 +1546,7 @@
 	bool "Console on Freescale lpuart serial port"
 	depends on SERIAL_FSL_LPUART=y
 	select SERIAL_CORE_CONSOLE
+	select SERIAL_EARLYCON
 	help
 	  If you have enabled the lpuart serial port on the Freescale SoCs,
 	  you can make it the console by answering Y to this option.
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 681e0f3..a1c0a89 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -474,7 +474,7 @@
 
 	/* register irq and enable rx interrupts */
 	ret = request_irq(port->irq, bcm_uart_interrupt, 0,
-			  bcm_uart_type(port), port);
+			  dev_name(port->dev), port);
 	if (ret)
 		return ret;
 	bcm_uart_writel(port, UART_RX_INT_MASK, UART_IR_REG);
diff --git a/drivers/tty/serial/etraxfs-uart.c b/drivers/tty/serial/etraxfs-uart.c
index 6813e31..2f80bc7 100644
--- a/drivers/tty/serial/etraxfs-uart.c
+++ b/drivers/tty/serial/etraxfs-uart.c
@@ -894,7 +894,7 @@
 	up->regi_ser = of_iomap(np, 0);
 	up->port.dev = &pdev->dev;
 
-	up->gpios = mctrl_gpio_init(&pdev->dev, 0);
+	up->gpios = mctrl_gpio_init_noauto(&pdev->dev, 0);
 	if (IS_ERR(up->gpios))
 		return PTR_ERR(up->gpios);
 
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 90ca082..3d245cd 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -265,7 +265,7 @@
  *
  *	Audit @data of @size from @tty, if necessary.
  */
-void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			size_t size, unsigned icanon)
 {
 	struct tty_audit_buf *buf;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 0c41dbc..bcc8e1e 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1282,18 +1282,22 @@
 	int	was_stopped = tty->stopped;
 
 	if (tty->ops->send_xchar) {
+		down_read(&tty->termios_rwsem);
 		tty->ops->send_xchar(tty, ch);
+		up_read(&tty->termios_rwsem);
 		return 0;
 	}
 
 	if (tty_write_lock(tty, 0) < 0)
 		return -ERESTARTSYS;
 
+	down_read(&tty->termios_rwsem);
 	if (was_stopped)
 		start_tty(tty);
 	tty->ops->write(tty, &ch, 1);
 	if (was_stopped)
 		stop_tty(tty);
+	up_read(&tty->termios_rwsem);
 	tty_write_unlock(tty);
 	return 0;
 }
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 9c5aebf..1445dd3 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -1147,16 +1147,12 @@
 			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCIOFF:
-			down_read(&tty->termios_rwsem);
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
 				retval = tty_send_xchar(tty, STOP_CHAR(tty));
-			up_read(&tty->termios_rwsem);
 			break;
 		case TCION:
-			down_read(&tty->termios_rwsem);
 			if (START_CHAR(tty) != __DISABLED_CHAR)
 				retval = tty_send_xchar(tty, START_CHAR(tty));
-			up_read(&tty->termios_rwsem);
 			break;
 		default:
 			return -EINVAL;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 5af8f18..629e3c8 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -592,7 +592,7 @@
 
 	/* Restart the work queue in case no characters kick it off. Safe if
 	   already running */
-	schedule_work(&tty->port->buf.work);
+	tty_buffer_restart_work(tty->port);
 
 	tty_unlock(tty);
 	return retval;
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 6ccbf60..5a048b7 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -84,6 +84,12 @@
 	struct imx_usbmisc_data *usbmisc_data;
 	bool supports_runtime_pm;
 	bool in_lpm;
+	/* SoC before i.mx6 (except imx23/imx28) needs three clks */
+	bool need_three_clks;
+	struct clk *clk_ipg;
+	struct clk *clk_ahb;
+	struct clk *clk_per;
+	/* --------------------------------- */
 };
 
 /* Common functions shared by usbmisc drivers */
@@ -135,6 +141,102 @@
 }
 
 /* End of common functions shared by usbmisc drivers*/
+static int imx_get_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+	int ret = 0;
+
+	data->clk_ipg = devm_clk_get(dev, "ipg");
+	if (IS_ERR(data->clk_ipg)) {
+		/* If the platform only needs one clocks */
+		data->clk = devm_clk_get(dev, NULL);
+		if (IS_ERR(data->clk)) {
+			ret = PTR_ERR(data->clk);
+			dev_err(dev,
+				"Failed to get clks, err=%ld,%ld\n",
+				PTR_ERR(data->clk), PTR_ERR(data->clk_ipg));
+			return ret;
+		}
+		return ret;
+	}
+
+	data->clk_ahb = devm_clk_get(dev, "ahb");
+	if (IS_ERR(data->clk_ahb)) {
+		ret = PTR_ERR(data->clk_ahb);
+		dev_err(dev,
+			"Failed to get ahb clock, err=%d\n", ret);
+		return ret;
+	}
+
+	data->clk_per = devm_clk_get(dev, "per");
+	if (IS_ERR(data->clk_per)) {
+		ret = PTR_ERR(data->clk_per);
+		dev_err(dev,
+			"Failed to get per clock, err=%d\n", ret);
+		return ret;
+	}
+
+	data->need_three_clks = true;
+	return ret;
+}
+
+static int imx_prepare_enable_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (data->need_three_clks) {
+		ret = clk_prepare_enable(data->clk_ipg);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable ipg clk, err=%d\n",
+				ret);
+			return ret;
+		}
+
+		ret = clk_prepare_enable(data->clk_ahb);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable ahb clk, err=%d\n",
+				ret);
+			clk_disable_unprepare(data->clk_ipg);
+			return ret;
+		}
+
+		ret = clk_prepare_enable(data->clk_per);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable per clk, err=%d\n",
+				ret);
+			clk_disable_unprepare(data->clk_ahb);
+			clk_disable_unprepare(data->clk_ipg);
+			return ret;
+		}
+	} else {
+		ret = clk_prepare_enable(data->clk);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable clk, err=%d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void imx_disable_unprepare_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+
+	if (data->need_three_clks) {
+		clk_disable_unprepare(data->clk_per);
+		clk_disable_unprepare(data->clk_ahb);
+		clk_disable_unprepare(data->clk_ipg);
+	} else {
+		clk_disable_unprepare(data->clk);
+	}
+}
 
 static int ci_hdrc_imx_probe(struct platform_device *pdev)
 {
@@ -145,31 +247,31 @@
 		.flags		= CI_HDRC_SET_NON_ZERO_TTHA,
 	};
 	int ret;
-	const struct of_device_id *of_id =
-			of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev);
-	const struct ci_hdrc_imx_platform_flag *imx_platform_flag = of_id->data;
+	const struct of_device_id *of_id;
+	const struct ci_hdrc_imx_platform_flag *imx_platform_flag;
+
+	of_id = of_match_device(ci_hdrc_imx_dt_ids, &pdev->dev);
+	if (!of_id)
+		return -ENODEV;
+
+	imx_platform_flag = of_id->data;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, data);
 	data->usbmisc_data = usbmisc_get_init_data(&pdev->dev);
 	if (IS_ERR(data->usbmisc_data))
 		return PTR_ERR(data->usbmisc_data);
 
-	data->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(data->clk)) {
-		dev_err(&pdev->dev,
-			"Failed to get clock, err=%ld\n", PTR_ERR(data->clk));
-		return PTR_ERR(data->clk);
-	}
-
-	ret = clk_prepare_enable(data->clk);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"Failed to prepare or enable clock, err=%d\n", ret);
+	ret = imx_get_clks(&pdev->dev);
+	if (ret)
 		return ret;
-	}
+
+	ret = imx_prepare_enable_clks(&pdev->dev);
+	if (ret)
+		return ret;
 
 	data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0);
 	if (IS_ERR(data->phy)) {
@@ -212,8 +314,6 @@
 		goto disable_device;
 	}
 
-	platform_set_drvdata(pdev, data);
-
 	if (data->supports_runtime_pm) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
@@ -226,7 +326,7 @@
 disable_device:
 	ci_hdrc_remove_device(data->ci_pdev);
 err_clk:
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(&pdev->dev);
 	return ret;
 }
 
@@ -240,7 +340,7 @@
 		pm_runtime_put_noidle(&pdev->dev);
 	}
 	ci_hdrc_remove_device(data->ci_pdev);
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(&pdev->dev);
 
 	return 0;
 }
@@ -252,7 +352,7 @@
 
 	dev_dbg(dev, "at %s\n", __func__);
 
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(dev);
 	data->in_lpm = true;
 
 	return 0;
@@ -270,7 +370,7 @@
 		return 0;
 	}
 
-	ret = clk_prepare_enable(data->clk);
+	ret = imx_prepare_enable_clks(dev);
 	if (ret)
 		return ret;
 
@@ -285,7 +385,7 @@
 	return 0;
 
 clk_disable:
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(dev);
 	return ret;
 }
 
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 080b7be..58c8485 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -322,8 +322,10 @@
 		return -EINVAL;
 
 	pm_runtime_get_sync(ci->dev);
+	disable_irq(ci->irq);
 	ci_role_stop(ci);
 	ret = ci_role_start(ci, role);
+	enable_irq(ci->irq);
 	pm_runtime_put_sync(ci->dev);
 
 	return ret ? ret : count;
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 8223fe7..391a122 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1751,6 +1751,22 @@
 	return retval;
 }
 
+static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci)
+{
+	if (!ci_otg_is_fsm_mode(ci))
+		return;
+
+	mutex_lock(&ci->fsm.lock);
+	if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) {
+		ci->fsm.a_bidl_adis_tmout = 1;
+		ci_hdrc_otg_fsm_start(ci);
+	} else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) {
+		ci->fsm.protocol = PROTO_UNDEF;
+		ci->fsm.otg->state = OTG_STATE_UNDEFINED;
+	}
+	mutex_unlock(&ci->fsm.lock);
+}
+
 /**
  * ci_udc_stop: unregister a gadget driver
  */
@@ -1775,6 +1791,7 @@
 	ci->driver = NULL;
 	spin_unlock_irqrestore(&ci->lock, flags);
 
+	ci_udc_stop_for_otg_fsm(ci);
 	return 0;
 }
 
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index fcea4eb..ab8b027 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -500,7 +500,11 @@
 {
 	struct resource	*res;
 	struct imx_usbmisc *data;
-	struct of_device_id *tmp_dev;
+	const struct of_device_id *of_id;
+
+	of_id = of_match_device(usbmisc_imx_dt_ids, &pdev->dev);
+	if (!of_id)
+		return -ENODEV;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -513,9 +517,7 @@
 	if (IS_ERR(data->base))
 		return PTR_ERR(data->base);
 
-	tmp_dev = (struct of_device_id *)
-		of_match_device(usbmisc_imx_dt_ids, &pdev->dev);
-	data->ops = (const struct usbmisc_ops *)tmp_dev->data;
+	data->ops = (const struct usbmisc_ops *)of_id->data;
 	platform_set_drvdata(pdev, data);
 
 	return 0;
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 433bbc3..071964c 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -884,11 +884,11 @@
 
 	add_wait_queue(&usblp->wwait, &waita);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
 		if (mutex_lock_interruptible(&usblp->mut)) {
 			rc = -EINTR;
 			break;
 		}
+		set_current_state(TASK_INTERRUPTIBLE);
 		rc = usblp_wtest(usblp, nonblock);
 		mutex_unlock(&usblp->mut);
 		if (rc <= 0)
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index a99c89e..dd28010 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -77,8 +77,7 @@
 
 config USB_OTG_FSM
 	tristate "USB 2.0 OTG FSM implementation"
-	depends on USB
-	select USB_OTG
+	depends on USB && USB_OTG
 	select USB_PHY
 	help
 	  Implements OTG Finite State Machine as specified in On-The-Go
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index e79baf7..571c217 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -324,12 +324,13 @@
  */
 static void dwc2_hcd_rem_wakeup(struct dwc2_hsotg *hsotg)
 {
-	if (hsotg->lx_state == DWC2_L2) {
+	if (hsotg->bus_suspended) {
 		hsotg->flags.b.port_suspend_change = 1;
 		usb_hcd_resume_root_hub(hsotg->priv);
-	} else {
-		hsotg->flags.b.port_l1_change = 1;
 	}
+
+	if (hsotg->lx_state == DWC2_L1)
+		hsotg->flags.b.port_l1_change = 1;
 }
 
 /**
@@ -1428,8 +1429,8 @@
 	dev_dbg(hsotg->dev, "Clear Resume: HPRT0=%0x\n",
 		dwc2_readl(hsotg->regs + HPRT0));
 
-	hsotg->bus_suspended = 0;
 	dwc2_hcd_rem_wakeup(hsotg);
+	hsotg->bus_suspended = 0;
 
 	/* Change to L0 state */
 	hsotg->lx_state = DWC2_L0;
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 5859b0f..e61d773 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -108,7 +108,8 @@
 	.host_ls_low_power_phy_clk	= -1,
 	.ts_dline			= -1,
 	.reload_ctl			= -1,
-	.ahbcfg				= 0x7, /* INCR16 */
+	.ahbcfg				= GAHBCFG_HBSTLEN_INCR16 <<
+					  GAHBCFG_HBSTLEN_SHIFT,
 	.uframe_sched			= -1,
 	.external_id_pin_ctl		= -1,
 	.hibernation			= -1,
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 77a622c..009d830 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -34,6 +34,8 @@
 #define PCI_DEVICE_ID_INTEL_BSW			0x22b7
 #define PCI_DEVICE_ID_INTEL_SPTLP		0x9d30
 #define PCI_DEVICE_ID_INTEL_SPTH		0xa130
+#define PCI_DEVICE_ID_INTEL_BXT			0x0aaa
+#define PCI_DEVICE_ID_INTEL_APL			0x5aaa
 
 static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
 static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -210,6 +212,8 @@
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTLP), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SPTH), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
 	{  }	/* Terminating Entry */
 };
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 55ba447..e24a01c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2744,12 +2744,34 @@
 	}
 
 	dwc->gadget.ops			= &dwc3_gadget_ops;
-	dwc->gadget.max_speed		= USB_SPEED_SUPER;
 	dwc->gadget.speed		= USB_SPEED_UNKNOWN;
 	dwc->gadget.sg_supported	= true;
 	dwc->gadget.name		= "dwc3-gadget";
 
 	/*
+	 * FIXME We might be setting max_speed to <SUPER, however versions
+	 * <2.20a of dwc3 have an issue with metastability (documented
+	 * elsewhere in this driver) which tells us we can't set max speed to
+	 * anything lower than SUPER.
+	 *
+	 * Because gadget.max_speed is only used by composite.c and function
+	 * drivers (i.e. it won't go into dwc3's registers) we are allowing this
+	 * to happen so we avoid sending SuperSpeed Capability descriptor
+	 * together with our BOS descriptor as that could confuse host into
+	 * thinking we can handle super speed.
+	 *
+	 * Note that, in fact, we won't even support GetBOS requests when speed
+	 * is less than super speed because we don't have means, yet, to tell
+	 * composite.c that we are USB 2.0 + LPM ECN.
+	 */
+	if (dwc->revision < DWC3_REVISION_220A)
+		dwc3_trace(trace_dwc3_gadget,
+				"Changing max_speed on rev %08x\n",
+				dwc->revision);
+
+	dwc->gadget.max_speed		= dwc->maximum_speed;
+
+	/*
 	 * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize
 	 * on ep out.
 	 */
diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c
index 23933bd..ddc3aad 100644
--- a/drivers/usb/gadget/function/f_loopback.c
+++ b/drivers/usb/gadget/function/f_loopback.c
@@ -329,7 +329,7 @@
 	for (i = 0; i < loop->qlen && result == 0; i++) {
 		result = -ENOMEM;
 
-		in_req = usb_ep_alloc_request(loop->in_ep, GFP_KERNEL);
+		in_req = usb_ep_alloc_request(loop->in_ep, GFP_ATOMIC);
 		if (!in_req)
 			goto fail;
 
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index f0f2b06..f92f5af 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1633,7 +1633,7 @@
 	spin_lock(&udc->lock);
 
 	int_enb = usba_int_enb_get(udc);
-	status = usba_readl(udc, INT_STA) & int_enb;
+	status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED);
 	DBG(DBG_INT, "irq, status=%#08x\n", status);
 
 	if (status & USBA_DET_SUSPEND) {
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 5d2d7e9..0230965 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -782,12 +782,15 @@
 			status |= USB_PORT_STAT_SUSPEND;
 		}
 	}
-	if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0
-			&& (raw_port_status & PORT_POWER)
-			&& (bus_state->suspended_ports & (1 << wIndex))) {
-		bus_state->suspended_ports &= ~(1 << wIndex);
-		if (hcd->speed < HCD_USB3)
-			bus_state->port_c_suspend |= 1 << wIndex;
+	if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 &&
+	    (raw_port_status & PORT_POWER)) {
+		if (bus_state->suspended_ports & (1 << wIndex)) {
+			bus_state->suspended_ports &= ~(1 << wIndex);
+			if (hcd->speed < HCD_USB3)
+				bus_state->port_c_suspend |= 1 << wIndex;
+		}
+		bus_state->resume_done[wIndex] = 0;
+		clear_bit(wIndex, &bus_state->resuming_ports);
 	}
 	if (raw_port_status & PORT_CONNECT) {
 		status |= USB_PORT_STAT_CONNECTION;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index fa83625..6c5e813 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3896,28 +3896,6 @@
 	return ret;
 }
 
-static int ep_ring_is_processing(struct xhci_hcd *xhci,
-		int slot_id, unsigned int ep_index)
-{
-	struct xhci_virt_device *xdev;
-	struct xhci_ring *ep_ring;
-	struct xhci_ep_ctx *ep_ctx;
-	struct xhci_virt_ep *xep;
-	dma_addr_t hw_deq;
-
-	xdev = xhci->devs[slot_id];
-	xep = &xhci->devs[slot_id]->eps[ep_index];
-	ep_ring = xep->ring;
-	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-
-	if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) != EP_STATE_RUNNING)
-		return 0;
-
-	hw_deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
-	return (hw_deq !=
-		xhci_trb_virt_to_dma(ep_ring->enq_seg, ep_ring->enqueue));
-}
-
 /*
  * Check transfer ring to guarantee there is enough room for the urb.
  * Update ISO URB start_frame and interval.
@@ -3983,10 +3961,12 @@
 	}
 
 	/* Calculate the start frame and put it in urb->start_frame. */
-	if (HCC_CFC(xhci->hcc_params) &&
-			ep_ring_is_processing(xhci, slot_id, ep_index)) {
-		urb->start_frame = xep->next_frame_id;
-		goto skip_start_over;
+	if (HCC_CFC(xhci->hcc_params) && !list_empty(&ep_ring->td_list)) {
+		if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) ==
+				EP_STATE_RUNNING) {
+			urb->start_frame = xep->next_frame_id;
+			goto skip_start_over;
+		}
 	}
 
 	start_frame = readl(&xhci->run_regs->microframe_index);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6e7dc6f..dfa44d3 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -175,6 +175,16 @@
 	command |= CMD_RESET;
 	writel(command, &xhci->op_regs->command);
 
+	/* Existing Intel xHCI controllers require a delay of 1 mS,
+	 * after setting the CMD_RESET bit, and before accessing any
+	 * HC registers. This allows the HC to complete the
+	 * reset operation and be ready for HC register access.
+	 * Without this delay, the subsequent HC register access,
+	 * may result in a system hang very rarely.
+	 */
+	if (xhci->quirks & XHCI_INTEL_HOST)
+		udelay(1000);
+
 	ret = xhci_handshake(&xhci->op_regs->command,
 			CMD_RESET, 0, 10 * 1000 * 1000);
 	if (ret)
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index ba13529..18cfc0a 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -132,7 +132,7 @@
 /*-------------------------------------------------------------------------*/
 
 #ifndef CONFIG_BLACKFIN
-static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
+static int musb_ulpi_read(struct usb_phy *phy, u32 reg)
 {
 	void __iomem *addr = phy->io_priv;
 	int	i = 0;
@@ -151,7 +151,7 @@
 	 * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM.
 	 */
 
-	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
+	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
 	musb_writeb(addr, MUSB_ULPI_REG_CONTROL,
 			MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR);
 
@@ -176,7 +176,7 @@
 	return ret;
 }
 
-static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data)
+static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg)
 {
 	void __iomem *addr = phy->io_priv;
 	int	i = 0;
@@ -191,8 +191,8 @@
 	power &= ~MUSB_POWER_SUSPENDM;
 	musb_writeb(addr, MUSB_POWER, power);
 
-	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
-	musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data);
+	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
+	musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val);
 	musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ);
 
 	while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL)
@@ -1668,7 +1668,7 @@
 static bool use_dma = 1;
 
 /* "modprobe ... use_dma=0" etc */
-module_param(use_dma, bool, 0);
+module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "enable/disable use of DMA");
 
 void musb_dma_completion(struct musb *musb, u8 epnum, u8 transmit)
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 26c65e6..795a45b 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -112,22 +112,32 @@
 	struct musb	*musb = ep->musb;
 	void __iomem	*epio = ep->regs;
 	u16		csr;
-	u16		lastcsr = 0;
 	int		retries = 1000;
 
 	csr = musb_readw(epio, MUSB_TXCSR);
 	while (csr & MUSB_TXCSR_FIFONOTEMPTY) {
-		if (csr != lastcsr)
-			dev_dbg(musb->controller, "Host TX FIFONOTEMPTY csr: %02x\n", csr);
-		lastcsr = csr;
 		csr |= MUSB_TXCSR_FLUSHFIFO | MUSB_TXCSR_TXPKTRDY;
 		musb_writew(epio, MUSB_TXCSR, csr);
 		csr = musb_readw(epio, MUSB_TXCSR);
-		if (WARN(retries-- < 1,
+
+		/*
+		 * FIXME: sometimes the tx fifo flush failed, it has been
+		 * observed during device disconnect on AM335x.
+		 *
+		 * To reproduce the issue, ensure tx urb(s) are queued when
+		 * unplug the usb device which is connected to AM335x usb
+		 * host port.
+		 *
+		 * I found using a usb-ethernet device and running iperf
+		 * (client on AM335x) has very high chance to trigger it.
+		 *
+		 * Better to turn on dev_dbg() in musb_cleanup_urb() with
+		 * CPPI enabled to see the issue when aborting the tx channel.
+		 */
+		if (dev_WARN_ONCE(musb->controller, retries-- < 1,
 				"Could not flush host TX%d fifo: csr: %04x\n",
 				ep->epnum, csr))
 			return;
-		mdelay(1);
 	}
 }
 
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 1731324..22e8ecb 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -21,7 +21,6 @@
 config FSL_USB2_OTG
 	bool "Freescale USB OTG Transceiver Driver"
 	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
-	select USB_OTG
 	select USB_PHY
 	help
 	  Enable this to support Freescale USB OTG transceiver.
@@ -168,8 +167,7 @@
 
 config USB_MV_OTG
 	tristate "Marvell USB OTG support"
-	depends on USB_EHCI_MV && USB_MV_UDC && PM
-	select USB_OTG
+	depends on USB_EHCI_MV && USB_MV_UDC && PM && USB_OTG
 	select USB_PHY
 	help
 	  Say Y here if you want to build Marvell USB OTG transciever
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index 4d863eb..b7536af 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -452,10 +452,13 @@
 	struct clk *clk;
 	struct mxs_phy *mxs_phy;
 	int ret;
-	const struct of_device_id *of_id =
-			of_match_device(mxs_phy_dt_ids, &pdev->dev);
+	const struct of_device_id *of_id;
 	struct device_node *np = pdev->dev.of_node;
 
+	of_id = of_match_device(mxs_phy_dt_ids, &pdev->dev);
+	if (!of_id)
+		return -ENODEV;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))
diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
index 1270906..c4bf2de 100644
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -105,7 +105,6 @@
 	extcon = extcon_get_extcon_dev(config->extcon);
 	if (!extcon)
 		return -EPROBE_DEFER;
-	otg_dev->extcon = extcon;
 
 	otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL);
 	if (!otg_dev)
@@ -115,6 +114,7 @@
 	if (IS_ERR(otg_dev->base))
 		return PTR_ERR(otg_dev->base);
 
+	otg_dev->extcon = extcon;
 	otg_dev->id_nb.notifier_call = omap_otg_id_notifier;
 	otg_dev->vbus_nb.notifier_call = omap_otg_vbus_notifier;
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 685fef7..f228060 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -161,6 +161,7 @@
 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED	0x9001
 #define NOVATELWIRELESS_PRODUCT_E362		0x9010
 #define NOVATELWIRELESS_PRODUCT_E371		0x9011
+#define NOVATELWIRELESS_PRODUCT_U620L		0x9022
 #define NOVATELWIRELESS_PRODUCT_G2		0xA010
 #define NOVATELWIRELESS_PRODUCT_MC551		0xB001
 
@@ -354,6 +355,7 @@
 /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick *
  * It seems to contain a Qualcomm QSC6240/6290 chipset            */
 #define FOUR_G_SYSTEMS_PRODUCT_W14		0x9603
+#define FOUR_G_SYSTEMS_PRODUCT_W100		0x9b01
 
 /* iBall 3.5G connect wireless modem */
 #define IBALL_3_5G_CONNECT			0x9605
@@ -519,6 +521,11 @@
 	.sendsetup = BIT(0) | BIT(1),
 };
 
+static const struct option_blacklist_info four_g_w100_blacklist = {
+	.sendsetup = BIT(1) | BIT(2),
+	.reserved = BIT(3),
+};
+
 static const struct option_blacklist_info alcatel_x200_blacklist = {
 	.sendsetup = BIT(0) | BIT(1),
 	.reserved = BIT(4),
@@ -1052,6 +1059,7 @@
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) },
 
 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
@@ -1641,6 +1649,9 @@
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
   	  .driver_info = (kernel_ulong_t)&four_g_w14_blacklist
   	},
+	{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
+	  .driver_info = (kernel_ulong_t)&four_g_w100_blacklist
+	},
 	{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 5022fcf..9919d2a 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -22,6 +22,8 @@
 #define DRIVER_AUTHOR "Qualcomm Inc"
 #define DRIVER_DESC "Qualcomm USB Serial driver"
 
+#define QUECTEL_EC20_PID	0x9215
+
 /* standard device layouts supported by this driver */
 enum qcserial_layouts {
 	QCSERIAL_G2K = 0,	/* Gobi 2000 */
@@ -171,6 +173,38 @@
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
+static int handle_quectel_ec20(struct device *dev, int ifnum)
+{
+	int altsetting = 0;
+
+	/*
+	 * Quectel EC20 Mini PCIe LTE module layout:
+	 * 0: DM/DIAG (use libqcdm from ModemManager for communication)
+	 * 1: NMEA
+	 * 2: AT-capable modem port
+	 * 3: Modem interface
+	 * 4: NDIS
+	 */
+	switch (ifnum) {
+	case 0:
+		dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n");
+		break;
+	case 1:
+		dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n");
+		break;
+	case 2:
+	case 3:
+		dev_dbg(dev, "Quectel EC20 Modem port found\n");
+		break;
+	case 4:
+		/* Don't claim the QMI/net interface */
+		altsetting = -1;
+		break;
+	}
+
+	return altsetting;
+}
+
 static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 {
 	struct usb_host_interface *intf = serial->interface->cur_altsetting;
@@ -181,6 +215,10 @@
 	int altsetting = -1;
 	bool sendsetup = false;
 
+	/* we only support vendor specific functions */
+	if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC)
+		goto done;
+
 	nintf = serial->dev->actconfig->desc.bNumInterfaces;
 	dev_dbg(dev, "Num Interfaces = %d\n", nintf);
 	ifnum = intf->desc.bInterfaceNumber;
@@ -240,6 +278,12 @@
 			altsetting = -1;
 		break;
 	case QCSERIAL_G2K:
+		/* handle non-standard layouts */
+		if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) {
+			altsetting = handle_quectel_ec20(dev, ifnum);
+			goto done;
+		}
+
 		/*
 		 * Gobi 2K+ USB layout:
 		 * 0: QMI/net
@@ -301,29 +345,39 @@
 		break;
 	case QCSERIAL_HWI:
 		/*
-		 * Huawei layout:
-		 * 0: AT-capable modem port
-		 * 1: DM/DIAG
-		 * 2: AT-capable modem port
-		 * 3: CCID-compatible PCSC interface
-		 * 4: QMI/net
-		 * 5: NMEA
+		 * Huawei devices map functions by subclass + protocol
+		 * instead of interface numbers. The protocol identify
+		 * a specific function, while the subclass indicate a
+		 * specific firmware source
+		 *
+		 * This is a blacklist of functions known to be
+		 * non-serial.  The rest are assumed to be serial and
+		 * will be handled by this driver
 		 */
-		switch (ifnum) {
-		case 0:
-		case 2:
-			dev_dbg(dev, "Modem port found\n");
-			break;
-		case 1:
-			dev_dbg(dev, "DM/DIAG interface found\n");
-			break;
-		case 5:
-			dev_dbg(dev, "NMEA GPS interface found\n");
-			break;
-		default:
-			/* don't claim any unsupported interface */
+		switch (intf->desc.bInterfaceProtocol) {
+			/* QMI combined (qmi_wwan) */
+		case 0x07:
+		case 0x37:
+		case 0x67:
+			/* QMI data (qmi_wwan) */
+		case 0x08:
+		case 0x38:
+		case 0x68:
+			/* QMI control (qmi_wwan) */
+		case 0x09:
+		case 0x39:
+		case 0x69:
+			/* NCM like (huawei_cdc_ncm) */
+		case 0x16:
+		case 0x46:
+		case 0x76:
 			altsetting = -1;
 			break;
+		default:
+			dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n",
+				intf->desc.bInterfaceClass,
+				intf->desc.bInterfaceSubClass,
+				intf->desc.bInterfaceProtocol);
 		}
 		break;
 	default:
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index e9da41d..2694df2 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -159,6 +159,7 @@
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) },
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
 	{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+	{ USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
@@ -191,6 +192,7 @@
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) },
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
 	{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+	{ USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h
index 4a2423e..98f35c6 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.h
+++ b/drivers/usb/serial/ti_usb_3410_5052.h
@@ -56,6 +56,10 @@
 #define ABBOTT_PRODUCT_ID		ABBOTT_STEREO_PLUG_ID
 #define ABBOTT_STRIP_PORT_ID		0x3420
 
+/* Honeywell vendor and product IDs */
+#define HONEYWELL_VENDOR_ID		0x10ac
+#define HONEYWELL_HGI80_PRODUCT_ID	0x0102  /* Honeywell HGI80 */
+
 /* Commands */
 #define TI_GET_VERSION			0x01
 #define TI_GET_PORT_STATUS		0x02
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 7a8a6c6..1c427be 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -446,7 +446,7 @@
 
 config IMX2_WDT
 	tristate "IMX2+ Watchdog"
-	depends on ARCH_MXC
+	depends on ARCH_MXC || ARCH_LAYERSCAPE
 	select REGMAP_MMIO
 	select WATCHDOG_CORE
 	help
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 6ad9df9..b751f43 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -123,6 +123,7 @@
 
 	reg = readl(wdt_base + WDT_MODE);
 	reg &= ~WDT_MODE_EN;
+	reg |= WDT_MODE_KEY;
 	iowrite32(reg, wdt_base + WDT_MODE);
 
 	return 0;
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index d96bee0..6f17c93 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -205,7 +205,7 @@
 
 static unsigned int omap_wdt_get_timeleft(struct watchdog_device *wdog)
 {
-	struct omap_wdt_dev *wdev = watchdog_get_drvdata(wdog);
+	struct omap_wdt_dev *wdev = to_omap_wdt_dev(wdog);
 	void __iomem *base = wdev->base;
 	u32 value;
 
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index 4224b3e..313cd1c 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -80,7 +80,7 @@
 
 static DEFINE_SPINLOCK(io_lock);
 static void __iomem	*wdt_base;
-struct clk		*wdt_clk;
+static struct clk	*wdt_clk;
 
 static int pnx4008_wdt_start(struct watchdog_device *wdd)
 {
@@ -161,7 +161,7 @@
 	if (IS_ERR(wdt_clk))
 		return PTR_ERR(wdt_clk);
 
-	ret = clk_enable(wdt_clk);
+	ret = clk_prepare_enable(wdt_clk);
 	if (ret)
 		return ret;
 
@@ -184,7 +184,7 @@
 	return 0;
 
 disable_clk:
-	clk_disable(wdt_clk);
+	clk_disable_unprepare(wdt_clk);
 	return ret;
 }
 
@@ -192,7 +192,7 @@
 {
 	watchdog_unregister_device(&pnx4008_wdd);
 
-	clk_disable(wdt_clk);
+	clk_disable_unprepare(wdt_clk);
 
 	return 0;
 }
diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c
index 7f97cdd..9ec5760 100644
--- a/drivers/watchdog/tegra_wdt.c
+++ b/drivers/watchdog/tegra_wdt.c
@@ -140,8 +140,10 @@
 {
 	wdd->timeout = timeout;
 
-	if (watchdog_active(wdd))
+	if (watchdog_active(wdd)) {
+		tegra_wdt_stop(wdd);
 		return tegra_wdt_start(wdd);
+	}
 
 	return 0;
 }
diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c
index 91bf55a..20e2bba 100644
--- a/drivers/watchdog/w83977f_wdt.c
+++ b/drivers/watchdog/w83977f_wdt.c
@@ -224,7 +224,7 @@
 
 static int wdt_set_timeout(int t)
 {
-	int tmrval;
+	unsigned int tmrval;
 
 	/*
 	 * Convert seconds to watchdog counter time units, rounding up.
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 849500e..524c221 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -39,6 +39,7 @@
 #include <asm/irq.h>
 #include <asm/idle.h>
 #include <asm/io_apic.h>
+#include <asm/i8259.h>
 #include <asm/xen/pci.h>
 #endif
 #include <asm/sync_bitops.h>
@@ -420,7 +421,7 @@
 		return xen_allocate_irq_dynamic();
 
 	/* Legacy IRQ descriptors are already allocated by the arch. */
-	if (gsi < NR_IRQS_LEGACY)
+	if (gsi < nr_legacy_irqs())
 		irq = gsi;
 	else
 		irq = irq_alloc_desc_at(gsi, -1);
@@ -446,7 +447,7 @@
 	kfree(info);
 
 	/* Legacy IRQ descriptors are managed by the arch. */
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs())
 		return;
 
 	irq_free_desc(irq);
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 00f40f0..38272ad 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -49,6 +49,8 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
 
 #include <xen/xen.h>
 #include <xen/events.h>
@@ -58,10 +60,10 @@
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
 	struct rb_root evtchns;
+	unsigned int nr_evtchns;
 
 	/* Notification ring, accessed via /dev/xen/evtchn. */
-#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
-#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	unsigned int ring_size;
 	evtchn_port_t *ring;
 	unsigned int ring_cons, ring_prod, ring_overflow;
 	struct mutex ring_cons_mutex; /* protect against concurrent readers */
@@ -80,10 +82,41 @@
 	bool enabled;
 };
 
+static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
+{
+	evtchn_port_t *ring;
+	size_t s = size * sizeof(*ring);
+
+	ring = kmalloc(s, GFP_KERNEL);
+	if (!ring)
+		ring = vmalloc(s);
+
+	return ring;
+}
+
+static void evtchn_free_ring(evtchn_port_t *ring)
+{
+	kvfree(ring);
+}
+
+static unsigned int evtchn_ring_offset(struct per_user_data *u,
+				       unsigned int idx)
+{
+	return idx & (u->ring_size - 1);
+}
+
+static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
+					unsigned int idx)
+{
+	return u->ring + evtchn_ring_offset(u, idx);
+}
+
 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
 	struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
 
+	u->nr_evtchns++;
+
 	while (*new) {
 		struct user_evtchn *this;
 
@@ -107,6 +140,7 @@
 
 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
+	u->nr_evtchns--;
 	rb_erase(&evtchn->node, &u->evtchns);
 	kfree(evtchn);
 }
@@ -144,8 +178,8 @@
 
 	spin_lock(&u->ring_prod_lock);
 
-	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
-		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
+	if ((u->ring_prod - u->ring_cons) < u->ring_size) {
+		*evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
 		wmb(); /* Ensure ring contents visible */
 		if (u->ring_cons == u->ring_prod++) {
 			wake_up_interruptible(&u->evtchn_wait);
@@ -200,10 +234,10 @@
 	}
 
 	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
-	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
-		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+	if (((c ^ p) & u->ring_size) != 0) {
+		bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
 			sizeof(evtchn_port_t);
-		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+		bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
 	} else {
 		bytes1 = (p - c) * sizeof(evtchn_port_t);
 		bytes2 = 0;
@@ -219,7 +253,7 @@
 
 	rc = -EFAULT;
 	rmb(); /* Ensure that we see the port before we copy it. */
-	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
 	    ((bytes2 != 0) &&
 	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
 		goto unlock_out;
@@ -278,6 +312,66 @@
 	return rc;
 }
 
+static int evtchn_resize_ring(struct per_user_data *u)
+{
+	unsigned int new_size;
+	evtchn_port_t *new_ring, *old_ring;
+	unsigned int p, c;
+
+	/*
+	 * Ensure the ring is large enough to capture all possible
+	 * events. i.e., one free slot for each bound event.
+	 */
+	if (u->nr_evtchns <= u->ring_size)
+		return 0;
+
+	if (u->ring_size == 0)
+		new_size = 64;
+	else
+		new_size = 2 * u->ring_size;
+
+	new_ring = evtchn_alloc_ring(new_size);
+	if (!new_ring)
+		return -ENOMEM;
+
+	old_ring = u->ring;
+
+	/*
+	 * Access to the ring contents is serialized by either the
+	 * prod /or/ cons lock so take both when resizing.
+	 */
+	mutex_lock(&u->ring_cons_mutex);
+	spin_lock_irq(&u->ring_prod_lock);
+
+	/*
+	 * Copy the old ring contents to the new ring.
+	 *
+	 * If the ring contents crosses the end of the current ring,
+	 * it needs to be copied in two chunks.
+	 *
+	 * +---------+    +------------------+
+	 * |34567  12| -> |       1234567    |
+	 * +-----p-c-+    +------------------+
+	 */
+	p = evtchn_ring_offset(u, u->ring_prod);
+	c = evtchn_ring_offset(u, u->ring_cons);
+	if (p < c) {
+		memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
+		memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
+	} else
+		memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
+
+	u->ring = new_ring;
+	u->ring_size = new_size;
+
+	spin_unlock_irq(&u->ring_prod_lock);
+	mutex_unlock(&u->ring_cons_mutex);
+
+	evtchn_free_ring(old_ring);
+
+	return 0;
+}
+
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
 	struct user_evtchn *evtchn;
@@ -305,6 +399,10 @@
 	if (rc < 0)
 		goto err;
 
+	rc = evtchn_resize_ring(u);
+	if (rc < 0)
+		goto err;
+
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
 				       u->name, evtchn);
 	if (rc < 0)
@@ -503,13 +601,6 @@
 
 	init_waitqueue_head(&u->evtchn_wait);
 
-	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
-	if (u->ring == NULL) {
-		kfree(u->name);
-		kfree(u);
-		return -ENOMEM;
-	}
-
 	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
 	spin_lock_init(&u->ring_prod_lock);
@@ -532,7 +623,7 @@
 		evtchn_unbind_from_user(u, evtchn);
 	}
 
-	free_page((unsigned long)u->ring);
+	evtchn_free_ring(u->ring);
 	kfree(u->name);
 	kfree(u);
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2ea0b3b..1be5dd0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -804,7 +804,7 @@
 
 	vma->vm_ops = &gntdev_vmops;
 
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
 
 	if (use_ptemod)
 		vma->vm_flags |= VM_DONTCOPY;
diff --git a/fs/Kconfig b/fs/Kconfig
index da3f32f..6ce72d8 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -46,6 +46,12 @@
 	  or if unsure, say N.  Saying Y will increase the size of the kernel
 	  by about 5kB.
 
+config FS_DAX_PMD
+	bool
+	default FS_DAX
+	depends on FS_DAX
+	depends on BROKEN
+
 endif # BLOCK
 
 # Posix ACL utility routines
diff --git a/fs/block_dev.c b/fs/block_dev.c
index bb0dfb1..c25639e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -390,9 +390,17 @@
 			struct page *page)
 {
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	int result = -EOPNOTSUPP;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
-		return -EOPNOTSUPP;
-	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+		return result;
+
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+	blk_queue_exit(bdev->bd_queue);
+	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_read_page);
 
@@ -421,14 +429,20 @@
 	int result;
 	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
+
 	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
+	result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
+	if (result)
+		return result;
+
 	set_page_writeback(page);
 	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
 	if (result)
 		end_page_writeback(page);
 	else
 		unlock_page(page);
+	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
 EXPORT_SYMBOL_GPL(bdev_write_page);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6dcdb2e..d453d62 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -355,7 +355,7 @@
 
 	index = srcu_read_lock(&fs_info->subvol_srcu);
 
-	root = btrfs_read_fs_root_no_name(fs_info, &root_key);
+	root = btrfs_get_fs_root(fs_info, &root_key, false);
 	if (IS_ERR(root)) {
 		srcu_read_unlock(&fs_info->subvol_srcu, index);
 		ret = PTR_ERR(root);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8c58191..35489e7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3416,6 +3416,7 @@
 struct btrfs_block_group_cache *btrfs_lookup_block_group(
 						 struct btrfs_fs_info *info,
 						 u64 bytenr);
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int get_block_group_index(struct btrfs_block_group_cache *cache);
 struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
@@ -3479,6 +3480,9 @@
 			   struct btrfs_root *root, u64 bytes_used,
 			   u64 type, u64 chunk_objectid, u64 chunk_offset,
 			   u64 size);
+struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
+				struct btrfs_fs_info *fs_info,
+				const u64 chunk_offset);
 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start,
 			     struct extent_map *em);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index acf3ed1..4b89680a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -124,7 +124,7 @@
 	return (cache->flags & bits) == bits;
 }
 
-static void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
 {
 	atomic_inc(&cache->count);
 }
@@ -5915,19 +5915,6 @@
 			set_extent_dirty(info->pinned_extents,
 					 bytenr, bytenr + num_bytes - 1,
 					 GFP_NOFS | __GFP_NOFAIL);
-			/*
-			 * No longer have used bytes in this block group, queue
-			 * it for deletion.
-			 */
-			if (old_val == 0) {
-				spin_lock(&info->unused_bgs_lock);
-				if (list_empty(&cache->bg_list)) {
-					btrfs_get_block_group(cache);
-					list_add_tail(&cache->bg_list,
-						      &info->unused_bgs);
-				}
-				spin_unlock(&info->unused_bgs_lock);
-			}
 		}
 
 		spin_lock(&trans->transaction->dirty_bgs_lock);
@@ -5939,6 +5926,22 @@
 		}
 		spin_unlock(&trans->transaction->dirty_bgs_lock);
 
+		/*
+		 * No longer have used bytes in this block group, queue it for
+		 * deletion. We do this after adding the block group to the
+		 * dirty list to avoid races between cleaner kthread and space
+		 * cache writeout.
+		 */
+		if (!alloc && old_val == 0) {
+			spin_lock(&info->unused_bgs_lock);
+			if (list_empty(&cache->bg_list)) {
+				btrfs_get_block_group(cache);
+				list_add_tail(&cache->bg_list,
+					      &info->unused_bgs);
+			}
+			spin_unlock(&info->unused_bgs_lock);
+		}
+
 		btrfs_put_block_group(cache);
 		total -= num_bytes;
 		bytenr += num_bytes;
@@ -8105,21 +8108,47 @@
 }
 
 /*
- * TODO: Modify related function to add related node/leaf to dirty_extent_root,
- * for later qgroup accounting.
- *
- * Current, this function does nothing.
+ * These may not be seen by the usual inc/dec ref code so we have to
+ * add them here.
  */
+static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
+				     struct btrfs_root *root, u64 bytenr,
+				     u64 num_bytes)
+{
+	struct btrfs_qgroup_extent_record *qrecord;
+	struct btrfs_delayed_ref_root *delayed_refs;
+
+	qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
+	if (!qrecord)
+		return -ENOMEM;
+
+	qrecord->bytenr = bytenr;
+	qrecord->num_bytes = num_bytes;
+	qrecord->old_roots = NULL;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	spin_lock(&delayed_refs->lock);
+	if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord))
+		kfree(qrecord);
+	spin_unlock(&delayed_refs->lock);
+
+	return 0;
+}
+
 static int account_leaf_items(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct extent_buffer *eb)
 {
 	int nr = btrfs_header_nritems(eb);
-	int i, extent_type;
+	int i, extent_type, ret;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
 	u64 bytenr, num_bytes;
 
+	/* We can be called directly from walk_up_proc() */
+	if (!root->fs_info->quota_enabled)
+		return 0;
+
 	for (i = 0; i < nr; i++) {
 		btrfs_item_key_to_cpu(eb, &key, i);
 
@@ -8138,6 +8167,10 @@
 			continue;
 
 		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+		ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+		if (ret)
+			return ret;
 	}
 	return 0;
 }
@@ -8206,8 +8239,6 @@
 
 /*
  * root_eb is the subtree root and is locked before this function is called.
- * TODO: Modify this function to mark all (including complete shared node)
- * to dirty_extent_root to allow it get accounted in qgroup.
  */
 static int account_shared_subtree(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root,
@@ -8285,6 +8316,11 @@
 			btrfs_tree_read_lock(eb);
 			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 			path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+			ret = record_one_subtree_extent(trans, root, child_bytenr,
+							root->nodesize);
+			if (ret)
+				goto out;
 		}
 
 		if (level == 0) {
@@ -10256,6 +10292,47 @@
 	return ret;
 }
 
+struct btrfs_trans_handle *
+btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info,
+				     const u64 chunk_offset)
+{
+	struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
+	struct extent_map *em;
+	struct map_lookup *map;
+	unsigned int num_items;
+
+	read_lock(&em_tree->lock);
+	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
+	read_unlock(&em_tree->lock);
+	ASSERT(em && em->start == chunk_offset);
+
+	/*
+	 * We need to reserve 3 + N units from the metadata space info in order
+	 * to remove a block group (done at btrfs_remove_chunk() and at
+	 * btrfs_remove_block_group()), which are used for:
+	 *
+	 * 1 unit for adding the free space inode's orphan (located in the tree
+	 * of tree roots).
+	 * 1 unit for deleting the block group item (located in the extent
+	 * tree).
+	 * 1 unit for deleting the free space item (located in tree of tree
+	 * roots).
+	 * N units for deleting N device extent items corresponding to each
+	 * stripe (located in the device tree).
+	 *
+	 * In order to remove a block group we also need to reserve units in the
+	 * system space info in order to update the chunk tree (update one or
+	 * more device items and remove one chunk item), but this is done at
+	 * btrfs_remove_chunk() through a call to check_system_chunk().
+	 */
+	map = (struct map_lookup *)em->bdev;
+	num_items = 3 + map->num_stripes;
+	free_extent_map(em);
+
+	return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
+							   num_items, 1);
+}
+
 /*
  * Process the unused_bgs list and remove any that don't have any allocated
  * space inside of them.
@@ -10322,8 +10399,8 @@
 		 * Want to do this before we do anything else so we can recover
 		 * properly if we fail to join the transaction.
 		 */
-		/* 1 for btrfs_orphan_reserve_metadata() */
-		trans = btrfs_start_transaction(root, 1);
+		trans = btrfs_start_trans_remove_block_group(fs_info,
+						     block_group->key.objectid);
 		if (IS_ERR(trans)) {
 			btrfs_dec_block_group_ro(root, block_group);
 			ret = PTR_ERR(trans);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 977e715..72e7346 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1882,8 +1882,13 @@
 	struct btrfs_log_ctx ctx;
 	int ret = 0;
 	bool full_sync = 0;
-	const u64 len = end - start + 1;
+	u64 len;
 
+	/*
+	 * The range length can be represented by u64, we have to do the typecasts
+	 * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+	 */
+	len = (u64)end - (u64)start + 1;
 	trace_btrfs_sync_file(file, datasync);
 
 	/*
@@ -2071,8 +2076,7 @@
 			}
 		}
 		if (!full_sync) {
-			ret = btrfs_wait_ordered_range(inode, start,
-						       end - start + 1);
+			ret = btrfs_wait_ordered_range(inode, start, len);
 			if (ret) {
 				btrfs_end_transaction(trans, root);
 				goto out;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 994490d..a70c579 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4046,9 +4046,7 @@
  */
 static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
 {
-	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
-	int ret;
 
 	/*
 	 * 1 for the possible orphan item
@@ -4057,27 +4055,7 @@
 	 * 1 for the inode ref
 	 * 1 for the inode
 	 */
-	trans = btrfs_start_transaction(root, 5);
-	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
-		return trans;
-
-	if (PTR_ERR(trans) == -ENOSPC) {
-		u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
-
-		trans = btrfs_start_transaction(root, 0);
-		if (IS_ERR(trans))
-			return trans;
-		ret = btrfs_cond_migrate_bytes(root->fs_info,
-					       &root->fs_info->trans_block_rsv,
-					       num_bytes, 5);
-		if (ret) {
-			btrfs_end_transaction(trans, root);
-			return ERR_PTR(ret);
-		}
-		trans->block_rsv = &root->fs_info->trans_block_rsv;
-		trans->bytes_reserved = num_bytes;
-	}
-	return trans;
+	return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
 }
 
 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93e12c1..5279fda 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -993,9 +993,10 @@
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (!fs_info->quota_root)
 		goto out;
-	spin_lock(&fs_info->qgroup_lock);
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
+	btrfs_qgroup_wait_for_completion(fs_info);
+	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
 	fs_info->quota_root = NULL;
 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
@@ -1461,6 +1462,8 @@
 	struct btrfs_qgroup_extent_record *entry;
 	u64 bytenr = record->bytenr;
 
+	assert_spin_locked(&delayed_refs->lock);
+
 	while (*p) {
 		parent_node = *p;
 		entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2907a77..b091d94 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3432,7 +3432,9 @@
 static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
 					  struct btrfs_device *scrub_dev,
 					  u64 chunk_offset, u64 length,
-					  u64 dev_offset, int is_dev_replace)
+					  u64 dev_offset,
+					  struct btrfs_block_group_cache *cache,
+					  int is_dev_replace)
 {
 	struct btrfs_mapping_tree *map_tree =
 		&sctx->dev_root->fs_info->mapping_tree;
@@ -3445,8 +3447,18 @@
 	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
 	read_unlock(&map_tree->map_tree.lock);
 
-	if (!em)
-		return -EINVAL;
+	if (!em) {
+		/*
+		 * Might have been an unused block group deleted by the cleaner
+		 * kthread or relocation.
+		 */
+		spin_lock(&cache->lock);
+		if (!cache->removed)
+			ret = -EINVAL;
+		spin_unlock(&cache->lock);
+
+		return ret;
+	}
 
 	map = (struct map_lookup *)em->bdev;
 	if (em->start != chunk_offset)
@@ -3483,6 +3495,7 @@
 	u64 length;
 	u64 chunk_offset;
 	int ret = 0;
+	int ro_set;
 	int slot;
 	struct extent_buffer *l;
 	struct btrfs_key key;
@@ -3568,7 +3581,21 @@
 		scrub_pause_on(fs_info);
 		ret = btrfs_inc_block_group_ro(root, cache);
 		scrub_pause_off(fs_info);
-		if (ret) {
+
+		if (ret == 0) {
+			ro_set = 1;
+		} else if (ret == -ENOSPC) {
+			/*
+			 * btrfs_inc_block_group_ro return -ENOSPC when it
+			 * failed in creating new chunk for metadata.
+			 * It is not a problem for scrub/replace, because
+			 * metadata are always cowed, and our scrub paused
+			 * commit_transactions.
+			 */
+			ro_set = 0;
+		} else {
+			btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n",
+				   ret);
 			btrfs_put_block_group(cache);
 			break;
 		}
@@ -3577,7 +3604,7 @@
 		dev_replace->cursor_left = found_key.offset;
 		dev_replace->item_needs_writeback = 1;
 		ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
-				  found_key.offset, is_dev_replace);
+				  found_key.offset, cache, is_dev_replace);
 
 		/*
 		 * flush, submit all pending read and write bios, afterwards
@@ -3611,7 +3638,30 @@
 
 		scrub_pause_off(fs_info);
 
-		btrfs_dec_block_group_ro(root, cache);
+		if (ro_set)
+			btrfs_dec_block_group_ro(root, cache);
+
+		/*
+		 * We might have prevented the cleaner kthread from deleting
+		 * this block group if it was already unused because we raced
+		 * and set it to RO mode first. So add it back to the unused
+		 * list, otherwise it might not ever be deleted unless a manual
+		 * balance is triggered or it becomes used and unused again.
+		 */
+		spin_lock(&cache->lock);
+		if (!cache->removed && !cache->ro && cache->reserved == 0 &&
+		    btrfs_block_group_used(&cache->item) == 0) {
+			spin_unlock(&cache->lock);
+			spin_lock(&fs_info->unused_bgs_lock);
+			if (list_empty(&cache->bg_list)) {
+				btrfs_get_block_group(cache);
+				list_add_tail(&cache->bg_list,
+					      &fs_info->unused_bgs);
+			}
+			spin_unlock(&fs_info->unused_bgs_lock);
+		} else {
+			spin_unlock(&cache->lock);
+		}
 
 		btrfs_put_block_group(cache);
 		if (ret)
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index c8c3d70..8b72b00 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -898,8 +898,10 @@
 	}
 
 	root = btrfs_alloc_dummy_root();
-	if (!root)
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
 		goto out;
+	}
 
 	root->fs_info = btrfs_alloc_dummy_fs_info();
 	if (!root->fs_info)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 418c6a2..3367a3c 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -592,6 +592,38 @@
 	return start_transaction(root, num_items, TRANS_START,
 				 BTRFS_RESERVE_FLUSH_ALL);
 }
+struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
+					struct btrfs_root *root,
+					unsigned int num_items,
+					int min_factor)
+{
+	struct btrfs_trans_handle *trans;
+	u64 num_bytes;
+	int ret;
+
+	trans = btrfs_start_transaction(root, num_items);
+	if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
+		return trans;
+
+	trans = btrfs_start_transaction(root, 0);
+	if (IS_ERR(trans))
+		return trans;
+
+	num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+	ret = btrfs_cond_migrate_bytes(root->fs_info,
+				       &root->fs_info->trans_block_rsv,
+				       num_bytes,
+				       min_factor);
+	if (ret) {
+		btrfs_end_transaction(trans, root);
+		return ERR_PTR(ret);
+	}
+
+	trans->block_rsv = &root->fs_info->trans_block_rsv;
+	trans->bytes_reserved = num_bytes;
+
+	return trans;
+}
 
 struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 					struct btrfs_root *root,
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b05b2f6..0da21ca 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -185,6 +185,10 @@
 			  struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   unsigned int num_items);
+struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
+					struct btrfs_root *root,
+					unsigned int num_items,
+					int min_factor);
 struct btrfs_trans_handle *btrfs_start_transaction_lflush(
 					struct btrfs_root *root,
 					unsigned int num_items);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6df8fd..4564522 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1973,8 +1973,7 @@
 	if (srcdev->writeable) {
 		fs_devices->rw_devices--;
 		/* zero out the old super if it is writable */
-		btrfs_scratch_superblocks(srcdev->bdev,
-					rcu_str_deref(srcdev->name));
+		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
 	}
 
 	if (srcdev->bdev)
@@ -2024,8 +2023,7 @@
 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
 
 	if (tgtdev->bdev) {
-		btrfs_scratch_superblocks(tgtdev->bdev,
-					rcu_str_deref(tgtdev->name));
+		btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
 		fs_info->fs_devices->open_devices--;
 	}
 	fs_info->fs_devices->num_devices--;
@@ -2853,7 +2851,8 @@
 	if (ret)
 		return ret;
 
-	trans = btrfs_start_transaction(root, 0);
+	trans = btrfs_start_trans_remove_block_group(root->fs_info,
+						     chunk_offset);
 	if (IS_ERR(trans)) {
 		ret = PTR_ERR(trans);
 		btrfs_std_error(root->fs_info, ret, NULL);
@@ -3123,7 +3122,7 @@
 	return 1;
 }
 
-static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
+static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
 			      struct btrfs_balance_args *bargs)
 {
 	struct btrfs_block_group_cache *cache;
@@ -3156,7 +3155,7 @@
 	return ret;
 }
 
-static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info,
+static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
 		u64 chunk_offset, struct btrfs_balance_args *bargs)
 {
 	struct btrfs_block_group_cache *cache;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ec57123..d5c84f6 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -382,7 +382,7 @@
 #define BTRFS_BALANCE_ARGS_LIMIT	(1ULL << 5)
 #define BTRFS_BALANCE_ARGS_LIMIT_RANGE	(1ULL << 6)
 #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7)
-#define BTRFS_BALANCE_ARGS_USAGE_RANGE	(1ULL << 8)
+#define BTRFS_BALANCE_ARGS_USAGE_RANGE	(1ULL << 10)
 
 #define BTRFS_BALANCE_ARGS_MASK			\
 	(BTRFS_BALANCE_ARGS_PROFILES |		\
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 7a6b02f..c0f3da3 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -879,7 +879,7 @@
 	loff_t pos, eof;
 	size_t len;
 	void *data;
-	int ret;
+	int ret = -ENOBUFS;
 
 	ASSERT(op != NULL);
 	ASSERT(page != NULL);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c81ce7f..a7a1b21 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1636,6 +1636,116 @@
 	.iterate	= configfs_readdir,
 };
 
+/**
+ * configfs_register_group - creates a parent-child relation between two groups
+ * @parent_group:	parent group
+ * @group:		child group
+ *
+ * link groups, creates dentry for the child and attaches it to the
+ * parent dentry.
+ *
+ * Return: 0 on success, negative errno code on error
+ */
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group)
+{
+	struct configfs_subsystem *subsys = parent_group->cg_subsys;
+	struct dentry *parent;
+	int ret;
+
+	mutex_lock(&subsys->su_mutex);
+	link_group(parent_group, group);
+	mutex_unlock(&subsys->su_mutex);
+
+	parent = parent_group->cg_item.ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	ret = create_default_group(parent_group, group);
+	if (!ret) {
+		spin_lock(&configfs_dirent_lock);
+		configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
+		spin_unlock(&configfs_dirent_lock);
+	}
+	mutex_unlock(&d_inode(parent)->i_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(configfs_register_group);
+
+/**
+ * configfs_unregister_group() - unregisters a child group from its parent
+ * @group: parent group to be unregistered
+ *
+ * Undoes configfs_register_group()
+ */
+void configfs_unregister_group(struct config_group *group)
+{
+	struct configfs_subsystem *subsys = group->cg_subsys;
+	struct dentry *dentry = group->cg_item.ci_dentry;
+	struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
+
+	mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+	spin_lock(&configfs_dirent_lock);
+	configfs_detach_prep(dentry, NULL);
+	spin_unlock(&configfs_dirent_lock);
+
+	configfs_detach_group(&group->cg_item);
+	d_inode(dentry)->i_flags |= S_DEAD;
+	dont_mount(dentry);
+	d_delete(dentry);
+	mutex_unlock(&d_inode(parent)->i_mutex);
+
+	dput(dentry);
+
+	mutex_lock(&subsys->su_mutex);
+	unlink_group(group);
+	mutex_unlock(&subsys->su_mutex);
+}
+EXPORT_SYMBOL(configfs_unregister_group);
+
+/**
+ * configfs_register_default_group() - allocates and registers a child group
+ * @parent_group:	parent group
+ * @name:		child group name
+ * @item_type:		child item type description
+ *
+ * boilerplate to allocate and register a child group with its parent. We need
+ * kzalloc'ed memory because child's default_group is initially empty.
+ *
+ * Return: allocated config group or ERR_PTR() on error
+ */
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type)
+{
+	int ret;
+	struct config_group *group;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+	config_group_init_type_name(group, name, item_type);
+
+	ret = configfs_register_group(parent_group, group);
+	if (ret) {
+		kfree(group);
+		return ERR_PTR(ret);
+	}
+	return group;
+}
+EXPORT_SYMBOL(configfs_register_default_group);
+
+/**
+ * configfs_unregister_default_group() - unregisters and frees a child group
+ * @group:	the group to act on
+ */
+void configfs_unregister_default_group(struct config_group *group)
+{
+	configfs_unregister_group(group);
+	kfree(group);
+}
+EXPORT_SYMBOL(configfs_unregister_default_group);
+
 int configfs_register_subsystem(struct configfs_subsystem *subsys)
 {
 	int err;
diff --git a/fs/dax.c b/fs/dax.c
index d1e5cb7..43671b6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -541,6 +541,10 @@
 	unsigned long pfn;
 	int result = 0;
 
+	/* dax pmd mappings are broken wrt gup and fork */
+	if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
+		return VM_FAULT_FALLBACK;
+
 	/* Fall back to PTEs if we're going to COW */
 	if (write && !(vma->vm_flags & VM_SHARED))
 		return VM_FAULT_FALLBACK;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cb5337d..1c75a3a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1169,6 +1169,15 @@
 		}
 	}
 
+	/* Once we sampled i_size check for reads beyond EOF */
+	dio->i_size = i_size_read(inode);
+	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
+		if (dio->flags & DIO_LOCKING)
+			mutex_unlock(&inode->i_mutex);
+		kmem_cache_free(dio_cache, dio);
+		goto out;
+	}
+
 	/*
 	 * For file extending writes updating i_size before data writeouts
 	 * complete can expose uninitialized blocks in dumb filesystems.
@@ -1222,7 +1231,6 @@
 	sdio.next_block_for_io = -1;
 
 	dio->iocb = iocb;
-	dio->i_size = i_size_read(inode);
 
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 87e9d79..3a37bd3 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -421,7 +421,7 @@
 
 	if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
 		con->sock->sk->sk_write_pending--;
-		clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
 	}
 
 	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
@@ -1448,7 +1448,7 @@
 					      msg_flags);
 			if (ret == -EAGAIN || ret == 0) {
 				if (ret == -EAGAIN &&
-				    test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
+				    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
 				    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
 					/* Notify TCP that we're limited by the
 					 * application window size.
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 3a71cea..748d35a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -569,6 +569,8 @@
 			/* Fall through */
 		case Opt_dax:
 #ifdef CONFIG_FS_DAX
+			ext2_msg(sb, KERN_WARNING,
+		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
 			set_opt(sbi->s_mount_opt, DAX);
 #else
 			ext2_msg(sb, KERN_INFO, "dax option not supported");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 753f4e6..c9ab67d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1664,8 +1664,12 @@
 		}
 		sbi->s_jquota_fmt = m->mount_opt;
 #endif
-#ifndef CONFIG_FS_DAX
 	} else if (token == Opt_dax) {
+#ifdef CONFIG_FS_DAX
+		ext4_msg(sb, KERN_WARNING,
+		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+			sbi->s_mount_opt |= m->mount_opt;
+#else
 		ext4_msg(sb, KERN_INFO, "dax option not supported");
 		return -1;
 #endif
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4afc4d9..8b2127f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -610,9 +610,9 @@
 		int status = fat_parse_long(inode, &cpos, &bh, &de,
 					    &unicode, &nr_slots);
 		if (status < 0) {
-			ctx->pos = cpos;
+			bh = NULL;
 			ret = status;
-			goto out;
+			goto end_of_dir;
 		} else if (status == PARSE_INVALID)
 			goto record_end;
 		else if (status == PARSE_NOT_LONGNAME)
@@ -654,8 +654,9 @@
 	fill_len = short_len;
 
 start_filldir:
-	if (!fake_offset)
-		ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
+	ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
+	if (fake_offset && ctx->pos < 2)
+		ctx->pos = 2;
 
 	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
 		if (!dir_emit_dot(file, ctx))
@@ -681,14 +682,19 @@
 	fake_offset = 0;
 	ctx->pos = cpos;
 	goto get_new;
+
 end_of_dir:
-	ctx->pos = cpos;
+	if (fake_offset && cpos < 2)
+		ctx->pos = 2;
+	else
+		ctx->pos = cpos;
 fill_failed:
 	brelse(bh);
 	if (unicode)
 		__putname(unicode);
 out:
 	mutex_unlock(&sbi->s_lock);
+
 	return ret;
 }
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 316adb9..de4bdfa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,12 +332,17 @@
  * truncation is indicated by end of range being LLONG_MAX
  *	In this case, we first scan the range and release found pages.
  *	After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
- *	maps and global counts.
+ *	maps and global counts.  Page faults can not race with truncation
+ *	in this routine.  hugetlb_no_page() prevents page faults in the
+ *	truncated range.  It checks i_size before allocation, and again after
+ *	with the page table lock for the page held.  The same lock must be
+ *	acquired to unmap a page.
  * hole punch is indicated if end is not LLONG_MAX
  *	In the hole punch case we scan the range and release found pages.
  *	Only when releasing a page is the associated region/reserv map
  *	deleted.  The region/reserv map for ranges without associated
- *	pages are not modified.
+ *	pages are not modified.  Page faults can race with hole punch.
+ *	This is indicated if we find a mapped page.
  * Note: If the passed end of range value is beyond the end of file, but
  * not LLONG_MAX this routine still performs a hole punch operation.
  */
@@ -361,46 +366,37 @@
 	next = start;
 	while (next < end) {
 		/*
-		 * Make sure to never grab more pages that we
-		 * might possibly need.
+		 * Don't grab more pages than the number left in the range.
 		 */
 		if (end - next < lookup_nr)
 			lookup_nr = end - next;
 
 		/*
-		 * This pagevec_lookup() may return pages past 'end',
-		 * so we must check for page->index > end.
+		 * When no more pages are found, we are done.
 		 */
-		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) {
-			if (next == start)
-				break;
-			next = start;
-			continue;
-		}
+		if (!pagevec_lookup(&pvec, mapping, next, lookup_nr))
+			break;
 
 		for (i = 0; i < pagevec_count(&pvec); ++i) {
 			struct page *page = pvec.pages[i];
 			u32 hash;
 
+			/*
+			 * The page (index) could be beyond end.  This is
+			 * only possible in the punch hole case as end is
+			 * max page offset in the truncate case.
+			 */
+			next = page->index;
+			if (next >= end)
+				break;
+
 			hash = hugetlb_fault_mutex_hash(h, current->mm,
 							&pseudo_vma,
 							mapping, next, 0);
 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
 			lock_page(page);
-			if (page->index >= end) {
-				unlock_page(page);
-				mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-				next = end;	/* we are done */
-				break;
-			}
-
-			/*
-			 * If page is mapped, it was faulted in after being
-			 * unmapped.  Do nothing in this race case.  In the
-			 * normal case page is not mapped.
-			 */
-			if (!page_mapped(page)) {
+			if (likely(!page_mapped(page))) {
 				bool rsv_on_error = !PagePrivate(page);
 				/*
 				 * We must free the huge page and remove
@@ -421,17 +417,23 @@
 						hugetlb_fix_reserve_counts(
 							inode, rsv_on_error);
 				}
+			} else {
+				/*
+				 * If page is mapped, it was faulted in after
+				 * being unmapped.  It indicates a race between
+				 * hole punch and page fault.  Do nothing in
+				 * this case.  Getting here in a truncate
+				 * operation is a bug.
+				 */
+				BUG_ON(truncate_op);
 			}
 
-			if (page->index > next)
-				next = page->index;
-
-			++next;
 			unlock_page(page);
-
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 		}
+		++next;
 		huge_pagevec_release(&pvec);
+		cond_resched();
 	}
 
 	if (truncate_op)
@@ -647,9 +649,6 @@
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
 		i_size_write(inode, offset + len);
 	inode->i_ctime = CURRENT_TIME;
-	spin_lock(&inode->i_lock);
-	inode->i_private = NULL;
-	spin_unlock(&inode->i_lock);
 out:
 	mutex_unlock(&inode->i_mutex);
 	return error;
diff --git a/fs/namei.c b/fs/namei.c
index d84d7c7..0c3974c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1996,7 +1996,6 @@
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
 	nd->depth = 0;
-	nd->total_link_count = 0;
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 79b1130..0a3f9b5 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -525,6 +525,8 @@
 			switch (rqdata.cmd) {
 				case NCP_LOCK_EX:
 				case NCP_LOCK_SH:
+						if (rqdata.timeout < 0)
+							return -EINVAL;
 						if (rqdata.timeout == 0)
 							rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT;
 						else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac..beac58b 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -78,7 +78,8 @@
 
 	p = xdr_inline_decode(xdr, nbytes);
 	if (unlikely(p == NULL))
-		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
+		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed "
+							"or truncated request.\n");
 	return p;
 }
 
@@ -889,6 +890,7 @@
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
+	struct xdr_buf *rq_arg = &rqstp->rq_arg;
 	__be32 *p, status;
 	struct cb_process_state cps = {
 		.drc_status = 0,
@@ -900,7 +902,8 @@
 
 	dprintk("%s: start\n", __func__);
 
-	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+	rq_arg->len = rq_arg->head[0].iov_len + rq_arg->page_len;
+	xdr_init_decode(&xdr_in, rq_arg, rq_arg->head[0].iov_base);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e1..31b0a52 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -618,7 +618,10 @@
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		nfs_vmtruncate(inode, attr->ia_size);
 	}
-	nfs_update_inode(inode, fattr);
+	if (fattr->valid)
+		nfs_update_inode(inode, fattr);
+	else
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1824,7 +1827,11 @@
 		if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
 			nfsi->attr_gencount = fattr->gencount;
 	}
-	invalid &= ~NFS_INO_INVALID_ATTR;
+
+	/* Don't declare attrcache up to date if there were no attrs! */
+	if (fattr->valid != 0)
+		invalid &= ~NFS_INO_INVALID_ATTR;
+
 	/* Don't invalidate the data if we were to blame */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 				|| S_ISLNK(inode->i_mode)))
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 3e92a3c..6b1ce98 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -14,7 +14,7 @@
 #include "pnfs.h"
 #include "internal.h"
 
-#define NFSDBG_FACILITY NFSDBG_PNFS
+#define NFSDBG_FACILITY NFSDBG_PROC
 
 static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
 				fmode_t fmode)
@@ -284,6 +284,7 @@
 		.dst_fh = NFS_FH(dst_inode),
 		.src_offset = src_offset,
 		.dst_offset = dst_offset,
+		.count = count,
 		.dst_bitmask = server->cache_consistency_bitmask,
 	};
 	struct nfs42_clone_res res = {
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 223bedd..10410e8 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -33,7 +33,7 @@
 		return ret;
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->nfs_client_lock);
-	ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+	ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
 	if (ret >= 0)
 		clp->cl_cb_ident = ret;
 	spin_unlock(&nn->nfs_client_lock);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4aa5719..db9b5fe 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,6 +7,7 @@
 #include <linux/file.h>
 #include <linux/falloc.h>
 #include <linux/nfs_fs.h>
+#include <uapi/linux/btrfs.h>	/* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */
 #include "delegation.h"
 #include "internal.h"
 #include "iostat.h"
@@ -203,6 +204,7 @@
 	struct fd src_file;
 	struct inode *src_inode;
 	unsigned int bs = server->clone_blksize;
+	bool same_inode = false;
 	int ret;
 
 	/* dst file must be opened for writing */
@@ -221,10 +223,8 @@
 
 	src_inode = file_inode(src_file.file);
 
-	/* src and dst must be different files */
-	ret = -EINVAL;
 	if (src_inode == dst_inode)
-		goto out_fput;
+		same_inode = true;
 
 	/* src file must be opened for reading */
 	if (!(src_file.file->f_mode & FMODE_READ))
@@ -249,8 +249,16 @@
 			goto out_fput;
 	}
 
+	/* verify if ranges are overlapped within the same file */
+	if (same_inode) {
+		if (dst_off + count > src_off && dst_off < src_off + count)
+			goto out_fput;
+	}
+
 	/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
-	if (dst_inode < src_inode) {
+	if (same_inode) {
+		mutex_lock(&src_inode->i_mutex);
+	} else if (dst_inode < src_inode) {
 		mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
 		mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
 	} else {
@@ -275,7 +283,9 @@
 		truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
 
 out_unlock:
-	if (dst_inode < src_inode) {
+	if (same_inode) {
+		mutex_unlock(&src_inode->i_mutex);
+	} else if (dst_inode < src_inode) {
 		mutex_unlock(&src_inode->i_mutex);
 		mutex_unlock(&dst_inode->i_mutex);
 	} else {
@@ -291,46 +301,31 @@
 
 static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
 {
-	struct nfs_ioctl_clone_range_args args;
+	struct btrfs_ioctl_clone_range_args args;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
 		return -EFAULT;
 
-	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count);
+	return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
+				 args.dest_offset, args.src_length);
 }
-#else
-static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
-		u64 src_off, u64 dst_off, u64 count)
-{
-	return -ENOTTY;
-}
-
-static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
-{
-	return -ENOTTY;
-}
-#endif /* CONFIG_NFS_V4_2 */
 
 long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
 
 	switch (cmd) {
-	case NFS_IOC_CLONE:
+	case BTRFS_IOC_CLONE:
 		return nfs42_ioctl_clone(file, arg, 0, 0, 0);
-	case NFS_IOC_CLONE_RANGE:
+	case BTRFS_IOC_CLONE_RANGE:
 		return nfs42_ioctl_clone_range(file, argp);
 	}
 
 	return -ENOTTY;
 }
+#endif /* CONFIG_NFS_V4_2 */
 
 const struct file_operations nfs4_file_operations = {
-#ifdef CONFIG_NFS_V4_2
-	.llseek		= nfs4_file_llseek,
-#else
-	.llseek		= nfs_file_llseek,
-#endif
 	.read_iter	= nfs_file_read,
 	.write_iter	= nfs_file_write,
 	.mmap		= nfs_file_mmap,
@@ -342,14 +337,14 @@
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-#ifdef CONFIG_NFS_V4_2
-	.fallocate	= nfs42_fallocate,
-#endif /* CONFIG_NFS_V4_2 */
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_NFS_V4_2
+	.llseek		= nfs4_file_llseek,
+	.fallocate	= nfs42_fallocate,
 	.unlocked_ioctl = nfs4_ioctl,
-#else
 	.compat_ioctl	= nfs4_ioctl,
-#endif /* CONFIG_COMPAT */
+#else
+	.llseek		= nfs_file_llseek,
+#endif
 };
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 765a035..8981803 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7866,7 +7866,7 @@
 			spin_unlock(&inode->i_lock);
 		goto out_restart;
 	}
-	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN)
 		goto out_restart;
 out:
 	dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index dfed4f5..4e44412 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3615,6 +3615,7 @@
 	status = 0;
 	if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS)))
 		goto out;
+	bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS;
 	status = -EIO;
 	/* Ignore borken servers that return unrequested attrs */
 	if (unlikely(res == NULL))
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 93496c0..5a8ae21 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -872,33 +872,38 @@
 
 	dprintk("--> %s\n", __func__);
 
-	lgp = kzalloc(sizeof(*lgp), gfp_flags);
-	if (lgp == NULL)
-		return NULL;
-
-	i_size = i_size_read(ino);
-
-	lgp->args.minlength = PAGE_CACHE_SIZE;
-	if (lgp->args.minlength > range->length)
-		lgp->args.minlength = range->length;
-	if (range->iomode == IOMODE_READ) {
-		if (range->offset >= i_size)
-			lgp->args.minlength = 0;
-		else if (i_size - range->offset < lgp->args.minlength)
-			lgp->args.minlength = i_size - range->offset;
-	}
-	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
-	lgp->args.range = *range;
-	lgp->args.type = server->pnfs_curr_ld->id;
-	lgp->args.inode = ino;
-	lgp->args.ctx = get_nfs_open_context(ctx);
-	lgp->gfp_flags = gfp_flags;
-	lgp->cred = lo->plh_lc_cred;
-
-	/* Synchronously retrieve layout information from server and
-	 * store in lseg.
+	/*
+	 * Synchronously retrieve layout information from server and
+	 * store in lseg. If we race with a concurrent seqid morphing
+	 * op, then re-send the LAYOUTGET.
 	 */
-	lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+	do {
+		lgp = kzalloc(sizeof(*lgp), gfp_flags);
+		if (lgp == NULL)
+			return NULL;
+
+		i_size = i_size_read(ino);
+
+		lgp->args.minlength = PAGE_CACHE_SIZE;
+		if (lgp->args.minlength > range->length)
+			lgp->args.minlength = range->length;
+		if (range->iomode == IOMODE_READ) {
+			if (range->offset >= i_size)
+				lgp->args.minlength = 0;
+			else if (i_size - range->offset < lgp->args.minlength)
+				lgp->args.minlength = i_size - range->offset;
+		}
+		lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+		lgp->args.range = *range;
+		lgp->args.type = server->pnfs_curr_ld->id;
+		lgp->args.inode = ino;
+		lgp->args.ctx = get_nfs_open_context(ctx);
+		lgp->gfp_flags = gfp_flags;
+		lgp->cred = lo->plh_lc_cred;
+
+		lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+	} while (lseg == ERR_PTR(-EAGAIN));
+
 	if (IS_ERR(lseg)) {
 		switch (PTR_ERR(lseg)) {
 		case -ENOMEM:
@@ -1687,6 +1692,7 @@
 		/* existing state ID, make sure the sequence number matches. */
 		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
 			dprintk("%s forget reply due to sequence\n", __func__);
+			status = -EAGAIN;
 			goto out_forget_reply;
 		}
 		pnfs_set_layout_stateid(lo, &res->stateid, false);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3b48ac2..a03f6f4 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -372,6 +372,8 @@
 		mlog_errno(status);
 		goto leave;
 	}
+	/* update inode->i_mode after mask with "umask". */
+	inode->i_mode = mode;
 
 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
 							    S_ISDIR(mode),
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 871fcb6..0a89834 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -195,8 +195,7 @@
 
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, struct iattr *attr,
-			      const char *link)
+			      struct kstat *stat, const char *link)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
@@ -240,8 +239,6 @@
 
 	mutex_lock(&newdentry->d_inode->i_mutex);
 	err = ovl_set_attr(newdentry, stat);
-	if (!err && attr)
-		err = notify_change(newdentry, attr, NULL);
 	mutex_unlock(&newdentry->d_inode->i_mutex);
 	if (err)
 		goto out_cleanup;
@@ -286,8 +283,7 @@
  * that point the file will have already been copied up anyway.
  */
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-		    struct path *lowerpath, struct kstat *stat,
-		    struct iattr *attr)
+		    struct path *lowerpath, struct kstat *stat)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
 	int err;
@@ -345,26 +341,19 @@
 	}
 	upperdentry = ovl_dentry_upper(dentry);
 	if (upperdentry) {
-		unlock_rename(workdir, upperdir);
+		/* Raced with another copy-up?  Nothing to do, then... */
 		err = 0;
-		/* Raced with another copy-up?  Do the setattr here */
-		if (attr) {
-			mutex_lock(&upperdentry->d_inode->i_mutex);
-			err = notify_change(upperdentry, attr, NULL);
-			mutex_unlock(&upperdentry->d_inode->i_mutex);
-		}
-		goto out_put_cred;
+		goto out_unlock;
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, attr, link);
+				 stat, link);
 	if (!err) {
 		/* Restore timestamps on parent (best effort) */
 		ovl_set_timestamps(upperdir, &pstat);
 	}
 out_unlock:
 	unlock_rename(workdir, upperdir);
-out_put_cred:
 	revert_creds(old_cred);
 	put_cred(override_cred);
 
@@ -406,7 +395,7 @@
 		ovl_path_lower(next, &lowerpath);
 		err = vfs_getattr(&lowerpath, &stat);
 		if (!err)
-			err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
+			err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
 
 		dput(parent);
 		dput(next);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ec0c2a050..4060ffd 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,8 +12,7 @@
 #include <linux/xattr.h>
 #include "overlayfs.h"
 
-static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
-			    bool no_data)
+static int ovl_copy_up_truncate(struct dentry *dentry)
 {
 	int err;
 	struct dentry *parent;
@@ -30,10 +29,8 @@
 	if (err)
 		goto out_dput_parent;
 
-	if (no_data)
-		stat.size = 0;
-
-	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
+	stat.size = 0;
+	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
 
 out_dput_parent:
 	dput(parent);
@@ -49,13 +46,13 @@
 	if (err)
 		goto out;
 
-	upperdentry = ovl_dentry_upper(dentry);
-	if (upperdentry) {
+	err = ovl_copy_up(dentry);
+	if (!err) {
+		upperdentry = ovl_dentry_upper(dentry);
+
 		mutex_lock(&upperdentry->d_inode->i_mutex);
 		err = notify_change(upperdentry, attr, NULL);
 		mutex_unlock(&upperdentry->d_inode->i_mutex);
-	} else {
-		err = ovl_copy_up_last(dentry, attr, false);
 	}
 	ovl_drop_write(dentry);
 out:
@@ -353,7 +350,7 @@
 			return ERR_PTR(err);
 
 		if (file_flags & O_TRUNC)
-			err = ovl_copy_up_last(dentry, NULL, true);
+			err = ovl_copy_up_truncate(dentry);
 		else
 			err = ovl_copy_up(dentry);
 		ovl_drop_write(dentry);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index ea5a40b..e17154a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -194,7 +194,6 @@
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-		    struct path *lowerpath, struct kstat *stat,
-		    struct iattr *attr);
+		    struct path *lowerpath, struct kstat *stat);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/splice.c b/fs/splice.c
index 801c21c..4cf700d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -809,6 +809,13 @@
  */
 static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
+	/*
+	 * Check for signal early to make process killable when there are
+	 * always buffers available
+	 */
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+
 	while (!pipe->nrbufs) {
 		if (!pipe->writers)
 			return 0;
@@ -884,6 +891,7 @@
 
 	splice_from_pipe_begin(sd);
 	do {
+		cond_resched();
 		ret = splice_from_pipe_next(pipe, sd);
 		if (ret > 0)
 			ret = splice_from_pipe_feed(pipe, sd, actor);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 590ad92..02fa1dc 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -162,15 +162,8 @@
 		inode->i_fop = &sysv_dir_operations;
 		inode->i_mapping->a_ops = &sysv_aops;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (inode->i_blocks) {
-			inode->i_op = &sysv_symlink_inode_operations;
-			inode->i_mapping->a_ops = &sysv_aops;
-		} else {
-			inode->i_op = &simple_symlink_inode_operations;
-			inode->i_link = (char *)SYSV_I(inode)->i_data;
-			nd_terminate_link(inode->i_link, inode->i_size,
-				sizeof(SYSV_I(inode)->i_data) - 1);
-		}
+		inode->i_op = &sysv_symlink_inode_operations;
+		inode->i_mapping->a_ops = &sysv_aops;
 	} else
 		init_special_inode(inode, inode->i_mode, rdev);
 }
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0b921ae..5531d7b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -309,6 +309,11 @@
 	unsigned universal_planes:1;
 	/* true if client understands atomic properties */
 	unsigned atomic:1;
+	/*
+	 * This client is allowed to gain master privileges for @master.
+	 * Protected by struct drm_device::master_mutex.
+	 */
+	unsigned allowed_master:1;
 
 	struct pid *pid;
 	kuid_t uid;
@@ -344,6 +349,8 @@
 	struct list_head event_list;
 	int event_space;
 
+	struct mutex event_read_lock;
+
 	struct drm_prime_file_private prime;
 };
 
@@ -580,6 +587,13 @@
 	int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
 	void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
 
+	/**
+	 * Hook for allocating the GEM object struct, for use by core
+	 * helpers.
+	 */
+	struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
+						    size_t size);
+
 	/* prime: */
 	/* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
 	int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
@@ -910,6 +924,7 @@
 extern ssize_t drm_read(struct file *filp, char __user *buffer,
 			size_t count, loff_t *offset);
 extern int drm_release(struct inode *inode, struct file *filp);
+extern int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv);
 
 				/* Mapping support (drm_vm.h) */
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -947,6 +962,10 @@
 				  struct drm_pending_vblank_event *e);
 extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
 				       struct drm_pending_vblank_event *e);
+extern void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+				 struct drm_pending_vblank_event *e);
+extern void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+				      struct drm_pending_vblank_event *e);
 extern bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe);
 extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc);
 extern int drm_vblank_get(struct drm_device *dev, unsigned int pipe);
@@ -1111,4 +1130,7 @@
 	return true;
 }
 
+/* helper for handling conditionals in various for_each macros */
+#define for_each_if(condition) if (!(condition)) {} else
+
 #endif
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index e67aeac..d8576ac 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -136,6 +136,9 @@
 
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state);
 
+void
+drm_atomic_clean_old_fb(struct drm_device *dev, unsigned plane_mask, int ret);
+
 int __must_check drm_atomic_check_only(struct drm_atomic_state *state);
 int __must_check drm_atomic_commit(struct drm_atomic_state *state);
 int __must_check drm_atomic_async_commit(struct drm_atomic_state *state);
@@ -146,7 +149,7 @@
 	     ((connector) = (state)->connectors[__i],			\
 	     (connector_state) = (state)->connector_states[__i], 1); 	\
 	     (__i)++)							\
-		if (connector)
+		for_each_if (connector)
 
 #define for_each_crtc_in_state(state, crtc, crtc_state, __i)	\
 	for ((__i) = 0;						\
@@ -154,7 +157,7 @@
 	     ((crtc) = (state)->crtcs[__i],			\
 	     (crtc_state) = (state)->crtc_states[__i], 1);	\
 	     (__i)++)						\
-		if (crtc_state)
+		for_each_if (crtc_state)
 
 #define for_each_plane_in_state(state, plane, plane_state, __i)		\
 	for ((__i) = 0;							\
@@ -162,7 +165,7 @@
 	     ((plane) = (state)->planes[__i],				\
 	     (plane_state) = (state)->plane_states[__i], 1);		\
 	     (__i)++)							\
-		if (plane_state)
+		for_each_if (plane_state)
 static inline bool
 drm_atomic_crtc_needs_modeset(struct drm_crtc_state *state)
 {
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 8cba54a..a286cce 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -62,6 +62,8 @@
 void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 				      struct drm_atomic_state *old_state);
 void drm_atomic_helper_commit_planes_on_crtc(struct drm_crtc_state *old_crtc_state);
+void drm_atomic_helper_disable_planes_on_crtc(struct drm_crtc *crtc,
+					      bool atomic);
 
 void drm_atomic_helper_swap_state(struct drm_device *dev,
 				  struct drm_atomic_state *state);
@@ -81,6 +83,12 @@
 int __drm_atomic_helper_set_config(struct drm_mode_set *set,
 		struct drm_atomic_state *state);
 
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+				  struct drm_modeset_acquire_ctx *ctx);
+struct drm_atomic_state *drm_atomic_helper_suspend(struct drm_device *dev);
+int drm_atomic_helper_resume(struct drm_device *dev,
+			     struct drm_atomic_state *state);
+
 int drm_atomic_helper_crtc_set_property(struct drm_crtc *crtc,
 					struct drm_property *property,
 					uint64_t val);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 3f0c690..c2f98ba 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -85,7 +85,11 @@
 	return (uint64_t)*((uint64_t *)&val);
 }
 
-/* rotation property bits */
+/*
+ * Rotation property bits. DRM_ROTATE_<degrees> rotates the image by the
+ * specified amount in degrees in counter clockwise direction. DRM_REFLECT_X and
+ * DRM_REFLECT_Y reflects the image along the specified axis prior to rotation
+ */
 #define DRM_ROTATE_MASK 0x0f
 #define DRM_ROTATE_0	0
 #define DRM_ROTATE_90	1
@@ -158,23 +162,55 @@
 	u8 group_data[8];
 };
 
+/**
+ * struct drm_framebuffer_funcs - framebuffer hooks
+ */
 struct drm_framebuffer_funcs {
-	/* note: use drm_framebuffer_remove() */
+	/**
+	 * @destroy:
+	 *
+	 * Clean up framebuffer resources, specifically also unreference the
+	 * backing storage. The core guarantees to call this function for every
+	 * framebuffer successfully created by ->fb_create() in
+	 * &drm_mode_config_funcs.
+	 */
 	void (*destroy)(struct drm_framebuffer *framebuffer);
+
+	/**
+	 * @create_handle:
+	 *
+	 * Create a buffer handle in the driver-specific buffer manager (either
+	 * GEM or TTM) valid for the passed-in struct &drm_file. This is used by
+	 * the core to implement the GETFB IOCTL, which returns (for
+	 * sufficiently priviledged user) also a native buffer handle. This can
+	 * be used for seamless transitions between modesetting clients by
+	 * copying the current screen contents to a private buffer and blending
+	 * between that and the new contents.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*create_handle)(struct drm_framebuffer *fb,
 			     struct drm_file *file_priv,
 			     unsigned int *handle);
-	/*
-	 * Optional callback for the dirty fb ioctl.
+	/**
+	 * @dirty:
 	 *
-	 * Userspace can notify the driver via this callback
-	 * that a area of the framebuffer has changed and should
-	 * be flushed to the display hardware.
+	 * Optional callback for the dirty fb IOCTL.
 	 *
-	 * See documentation in drm_mode.h for the struct
-	 * drm_mode_fb_dirty_cmd for more information as all
-	 * the semantics and arguments have a one to one mapping
-	 * on this function.
+	 * Userspace can notify the driver via this callback that an area of the
+	 * framebuffer has changed and should be flushed to the display
+	 * hardware. This can also be used internally, e.g. by the fbdev
+	 * emulation, though that's not the case currently.
+	 *
+	 * See documentation in drm_mode.h for the struct drm_mode_fb_dirty_cmd
+	 * for more information as all the semantics and arguments have a one to
+	 * one mapping on this function.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
 	 */
 	int (*dirty)(struct drm_framebuffer *framebuffer,
 		     struct drm_file *file_priv, unsigned flags,
@@ -250,6 +286,11 @@
 struct drm_bridge;
 struct drm_atomic_state;
 
+struct drm_crtc_helper_funcs;
+struct drm_encoder_helper_funcs;
+struct drm_connector_helper_funcs;
+struct drm_plane_helper_funcs;
+
 /**
  * struct drm_crtc_state - mutable CRTC state
  * @crtc: backpointer to the CRTC
@@ -311,23 +352,6 @@
 
 /**
  * struct drm_crtc_funcs - control CRTCs for a given device
- * @save: save CRTC state
- * @restore: restore CRTC state
- * @reset: reset CRTC after state has been invalidated (e.g. resume)
- * @cursor_set: setup the cursor
- * @cursor_set2: setup the cursor with hotspot, superseeds @cursor_set if set
- * @cursor_move: move the cursor
- * @gamma_set: specify color ramp for CRTC
- * @destroy: deinit and free object
- * @set_property: called when a property is changed
- * @set_config: apply a new CRTC configuration
- * @page_flip: initiate a page flip
- * @atomic_duplicate_state: duplicate the atomic state for this CRTC
- * @atomic_destroy_state: destroy an atomic state for this CRTC
- * @atomic_set_property: set a property on an atomic state for this CRTC
- *    (do not call directly, use drm_atomic_crtc_set_property())
- * @atomic_get_property: get a property on an atomic state for this CRTC
- *    (do not call directly, use drm_atomic_crtc_get_property())
  *
  * The drm_crtc_funcs structure is the central CRTC management structure
  * in the DRM.  Each CRTC controls one or more connectors (note that the name
@@ -339,54 +363,316 @@
  * bus accessors.
  */
 struct drm_crtc_funcs {
-	/* Save CRTC state */
-	void (*save)(struct drm_crtc *crtc); /* suspend? */
-	/* Restore CRTC state */
-	void (*restore)(struct drm_crtc *crtc); /* resume? */
-	/* Reset CRTC state */
+	/**
+	 * @reset:
+	 *
+	 * Reset CRTC hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_crtc_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_crtc *crtc);
 
-	/* cursor controls */
+	/**
+	 * @cursor_set:
+	 *
+	 * Update the cursor image. The cursor position is relative to the CRTC
+	 * and can be partially or fully outside of the visible area.
+	 *
+	 * Note that contrary to all other KMS functions the legacy cursor entry
+	 * points don't take a framebuffer object, but instead take directly a
+	 * raw buffer object id from the driver's buffer manager (which is
+	 * either GEM or TTM for current drivers).
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_set)(struct drm_crtc *crtc, struct drm_file *file_priv,
 			  uint32_t handle, uint32_t width, uint32_t height);
+
+	/**
+	 * @cursor_set2:
+	 *
+	 * Update the cursor image, including hotspot information. The hotspot
+	 * must not affect the cursor position in CRTC coordinates, but is only
+	 * meant as a hint for virtualized display hardware to coordinate the
+	 * guests and hosts cursor position. The cursor hotspot is relative to
+	 * the cursor image. Otherwise this works exactly like @cursor_set.
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_set2)(struct drm_crtc *crtc, struct drm_file *file_priv,
 			   uint32_t handle, uint32_t width, uint32_t height,
 			   int32_t hot_x, int32_t hot_y);
+
+	/**
+	 * @cursor_move:
+	 *
+	 * Update the cursor position. The cursor does not need to be visible
+	 * when this hook is called.
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_move)(struct drm_crtc *crtc, int x, int y);
 
-	/* Set gamma on the CRTC */
+	/**
+	 * @gamma_set:
+	 *
+	 * Set gamma on the CRTC.
+	 *
+	 * This callback is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Drivers that support gamma tables and also fbdev emulation through
+	 * the provided helper library need to take care to fill out the gamma
+	 * hooks for both. Currently there's a bit an unfortunate duplication
+	 * going on, which should eventually be unified to just one set of
+	 * hooks.
+	 */
 	void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
 			  uint32_t start, uint32_t size);
-	/* Object destroy routine */
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up plane resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since a CRTC cannot be hotplugged
+	 * in DRM.
+	 */
 	void (*destroy)(struct drm_crtc *crtc);
 
+	/**
+	 * @set_config:
+	 *
+	 * This is the main legacy entry point to change the modeset state on a
+	 * CRTC. All the details of the desired configuration are passed in a
+	 * struct &drm_mode_set - see there for details.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_set_config() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_config)(struct drm_mode_set *set);
 
-	/*
-	 * Flip to the given framebuffer.  This implements the page
-	 * flip ioctl described in drm_mode.h, specifically, the
-	 * implementation must return immediately and block all
-	 * rendering to the current fb until the flip has completed.
-	 * If userspace set the event flag in the ioctl, the event
-	 * argument will point to an event to send back when the flip
-	 * completes, otherwise it will be NULL.
+	/**
+	 * @page_flip:
+	 *
+	 * Legacy entry point to schedule a flip to the given framebuffer.
+	 *
+	 * Page flipping is a synchronization mechanism that replaces the frame
+	 * buffer being scanned out by the CRTC with a new frame buffer during
+	 * vertical blanking, avoiding tearing (except when requested otherwise
+	 * through the DRM_MODE_PAGE_FLIP_ASYNC flag). When an application
+	 * requests a page flip the DRM core verifies that the new frame buffer
+	 * is large enough to be scanned out by the CRTC in the currently
+	 * configured mode and then calls the CRTC ->page_flip() operation with a
+	 * pointer to the new frame buffer.
+	 *
+	 * The driver must wait for any pending rendering to the new framebuffer
+	 * to complete before executing the flip. It should also wait for any
+	 * pending rendering from other drivers if the underlying buffer is a
+	 * shared dma-buf.
+	 *
+	 * An application can request to be notified when the page flip has
+	 * completed. The drm core will supply a struct &drm_event in the event
+	 * parameter in this case. This can be handled by the
+	 * drm_crtc_send_vblank_event() function, which the driver should call on
+	 * the provided event upon completion of the flip. Note that if
+	 * the driver supports vblank signalling and timestamping the vblank
+	 * counters and timestamps must agree with the ones returned from page
+	 * flip events. With the current vblank helper infrastructure this can
+	 * be achieved by holding a vblank reference while the page flip is
+	 * pending, acquired through drm_crtc_vblank_get() and released with
+	 * drm_crtc_vblank_put(). Drivers are free to implement their own vblank
+	 * counter and timestamp tracking though, e.g. if they have accurate
+	 * timestamp registers in hardware.
+	 *
+	 * FIXME:
+	 *
+	 * Up to that point drivers need to manage events themselves and can use
+	 * even->base.list freely for that. Specifically they need to ensure
+	 * that they don't send out page flip (or vblank) events for which the
+	 * corresponding drm file has been closed already. The drm core
+	 * unfortunately does not (yet) take care of that. Therefore drivers
+	 * currently must clean up and release pending events in their
+	 * ->preclose driver function.
+	 *
+	 * This callback is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Very early versions of the KMS ABI mandated that the driver must
+	 * block (but not reject) any rendering to the old framebuffer until the
+	 * flip operation has completed and the old framebuffer is no longer
+	 * visible. This requirement has been lifted, and userspace is instead
+	 * expected to request delivery of an event and wait with recycling old
+	 * buffers until such has been received.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure. Note that if a
+	 * ->page_flip() operation is already pending the callback should return
+	 * -EBUSY. Pageflips on a disabled CRTC (either by setting a NULL mode
+	 * or just runtime disabled through DPMS respectively the new atomic
+	 * "ACTIVE" state) should result in an -EINVAL error code.
 	 */
 	int (*page_flip)(struct drm_crtc *crtc,
 			 struct drm_framebuffer *fb,
 			 struct drm_pending_vblank_event *event,
 			 uint32_t flags);
 
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * CRTC.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_crtc_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_property)(struct drm_crtc *crtc,
 			    struct drm_property *property, uint64_t val);
 
-	/* atomic update handling */
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this CRTC and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_crtc should use
+	 * drm_atomic_helper_crtc_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_crtc_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before crtc->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_crtc_state *(*atomic_duplicate_state)(struct drm_crtc *crtc);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_crtc *crtc,
 				     struct drm_crtc_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_crtc_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which should never happen, the core only
+	 * asks for properties attached to this CRTC). No other validation is
+	 * allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_crtc *crtc,
 				   struct drm_crtc_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETCRTC IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_crtc_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which should never happen, the core only asks for
+	 * properties attached to this CRTC).
+	 */
 	int (*atomic_get_property)(struct drm_crtc *crtc,
 				   const struct drm_crtc_state *state,
 				   struct drm_property *property,
@@ -416,7 +702,7 @@
  * @properties: property tracking for this CRTC
  * @state: current atomic state for this CRTC
  * @acquire_ctx: per-CRTC implicit acquire context used by atomic drivers for
- * 	legacy ioctls
+ * 	legacy IOCTLs
  *
  * Each CRTC may have one or more connectors associated with it.  This structure
  * allows the CRTC to be controlled.
@@ -426,6 +712,8 @@
 	struct device_node *port;
 	struct list_head head;
 
+	char *name;
+
 	/*
 	 * crtc mutex
 	 *
@@ -463,14 +751,14 @@
 	uint16_t *gamma_store;
 
 	/* if you are using the helper */
-	const void *helper_private;
+	const struct drm_crtc_helper_funcs *helper_private;
 
 	struct drm_object_properties properties;
 
 	struct drm_crtc_state *state;
 
 	/*
-	 * For legacy crtc ioctls so that atomic drivers can get at the locking
+	 * For legacy crtc IOCTLs so that atomic drivers can get at the locking
 	 * acquire context.
 	 */
 	struct drm_modeset_acquire_ctx *acquire_ctx;
@@ -495,54 +783,239 @@
 
 /**
  * struct drm_connector_funcs - control connectors on a given device
- * @dpms: set power state
- * @save: save connector state
- * @restore: restore connector state
- * @reset: reset connector after state has been invalidated (e.g. resume)
- * @detect: is this connector active?
- * @fill_modes: fill mode list for this connector
- * @set_property: property for this connector may need an update
- * @destroy: make object go away
- * @force: notify the driver that the connector is forced on
- * @atomic_duplicate_state: duplicate the atomic state for this connector
- * @atomic_destroy_state: destroy an atomic state for this connector
- * @atomic_set_property: set a property on an atomic state for this connector
- *    (do not call directly, use drm_atomic_connector_set_property())
- * @atomic_get_property: get a property on an atomic state for this connector
- *    (do not call directly, use drm_atomic_connector_get_property())
  *
  * Each CRTC may have one or more connectors attached to it.  The functions
  * below allow the core DRM code to control connectors, enumerate available modes,
  * etc.
  */
 struct drm_connector_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Legacy entry point to set the per-connector DPMS state. Legacy DPMS
+	 * is exposed as a standard property on the connector, but diverted to
+	 * this callback in the drm core. Note that atomic drivers don't
+	 * implement the 4 level DPMS support on the connector any more, but
+	 * instead only have an on/off "ACTIVE" property on the CRTC object.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_connector_dpms() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*dpms)(struct drm_connector *connector, int mode);
-	void (*save)(struct drm_connector *connector);
-	void (*restore)(struct drm_connector *connector);
+
+	/**
+	 * @reset:
+	 *
+	 * Reset connector hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_connector_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_connector *connector);
 
-	/* Check to see if anything is attached to the connector.
-	 * @force is set to false whilst polling, true when checking the
-	 * connector due to user request. @force can be used by the driver
-	 * to avoid expensive, destructive operations during automated
-	 * probing.
+	/**
+	 * @detect:
+	 *
+	 * Check to see if anything is attached to the connector. The parameter
+	 * force is set to false whilst polling, true when checking the
+	 * connector due to a user request. force can be used by the driver to
+	 * avoid expensive, destructive operations during automated probing.
+	 *
+	 * FIXME:
+	 *
+	 * Note that this hook is only called by the probe helper. It's not in
+	 * the helper library vtable purely for historical reasons. The only DRM
+	 * core	entry point to probe connector state is @fill_modes.
+	 *
+	 * RETURNS:
+	 *
+	 * drm_connector_status indicating the connector's status.
 	 */
 	enum drm_connector_status (*detect)(struct drm_connector *connector,
 					    bool force);
-	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
-	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
-			     uint64_t val);
-	void (*destroy)(struct drm_connector *connector);
+
+	/**
+	 * @force:
+	 *
+	 * This function is called to update internal encoder state when the
+	 * connector is forced to a certain state by userspace, either through
+	 * the sysfs interfaces or on the kernel cmdline. In that case the
+	 * @detect callback isn't called.
+	 *
+	 * FIXME:
+	 *
+	 * Note that this hook is only called by the probe helper. It's not in
+	 * the helper library vtable purely for historical reasons. The only DRM
+	 * core	entry point to probe connector state is @fill_modes.
+	 */
 	void (*force)(struct drm_connector *connector);
 
-	/* atomic update handling */
+	/**
+	 * @fill_modes:
+	 *
+	 * Entry point for output detection and basic mode validation. The
+	 * driver should reprobe the output if needed (e.g. when hotplug
+	 * handling is unreliable), add all detected modes to connector->modes
+	 * and filter out any the device can't support in any configuration. It
+	 * also needs to filter out any modes wider or higher than the
+	 * parameters max_width and max_height indicate.
+	 *
+	 * The drivers must also prune any modes no longer valid from
+	 * connector->modes. Furthermore it must update connector->status and
+	 * connector->edid.  If no EDID has been received for this output
+	 * connector->edid must be NULL.
+	 *
+	 * Drivers using the probe helpers should use
+	 * drm_helper_probe_single_connector_modes() or
+	 * drm_helper_probe_single_connector_modes_nomerge() to implement this
+	 * function.
+	 *
+	 * RETURNS:
+	 *
+	 * The number of modes detected and filled into connector->modes.
+	 */
+	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
+
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * connector.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_connector_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
+			     uint64_t val);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up connector resources. This is called at driver unload time
+	 * through drm_mode_config_cleanup(). It can also be called at runtime
+	 * when a connector is being hot-unplugged for drivers that support
+	 * connector hotplugging (e.g. DisplayPort MST).
+	 */
+	void (*destroy)(struct drm_connector *connector);
+
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this connector and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_connector_state should use
+	 * drm_atomic_helper_connector_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_connector_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before connector->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_connector_state *(*atomic_duplicate_state)(struct drm_connector *connector);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_connector *connector,
 				     struct drm_connector_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_connector_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which shouldn't ever happen, the core only
+	 * asks for properties attached to this connector). No other validation
+	 * is allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_connector *connector,
 				   struct drm_connector_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETCONNECTOR IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_connector_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which shouldn't ever happen, the core only asks for
+	 * properties attached to this connector).
+	 */
 	int (*atomic_get_property)(struct drm_connector *connector,
 				   const struct drm_connector_state *state,
 				   struct drm_property *property,
@@ -551,13 +1024,26 @@
 
 /**
  * struct drm_encoder_funcs - encoder controls
- * @reset: reset state (e.g. at init or resume time)
- * @destroy: cleanup and free associated data
  *
  * Encoders sit between CRTCs and connectors.
  */
 struct drm_encoder_funcs {
+	/**
+	 * @reset:
+	 *
+	 * Reset encoder hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 */
 	void (*reset)(struct drm_encoder *encoder);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up encoder resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since an encoder cannot be
+	 * hotplugged in DRM.
+	 */
 	void (*destroy)(struct drm_encoder *encoder);
 };
 
@@ -593,7 +1079,7 @@
 	struct drm_crtc *crtc;
 	struct drm_bridge *bridge;
 	const struct drm_encoder_funcs *funcs;
-	const void *helper_private;
+	const struct drm_encoder_helper_funcs *helper_private;
 };
 
 /* should we poll this connector for connects and disconnects */
@@ -698,7 +1184,7 @@
 	/* requested DPMS state */
 	int dpms;
 
-	const void *helper_private;
+	const struct drm_connector_helper_funcs *helper_private;
 
 	/* forced on connector */
 	struct drm_cmdline_mode cmdline_mode;
@@ -778,40 +1264,203 @@
 
 /**
  * struct drm_plane_funcs - driver plane control functions
- * @update_plane: update the plane configuration
- * @disable_plane: shut down the plane
- * @destroy: clean up plane resources
- * @reset: reset plane after state has been invalidated (e.g. resume)
- * @set_property: called when a property is changed
- * @atomic_duplicate_state: duplicate the atomic state for this plane
- * @atomic_destroy_state: destroy an atomic state for this plane
- * @atomic_set_property: set a property on an atomic state for this plane
- *    (do not call directly, use drm_atomic_plane_set_property())
- * @atomic_get_property: get a property on an atomic state for this plane
- *    (do not call directly, use drm_atomic_plane_get_property())
  */
 struct drm_plane_funcs {
+	/**
+	 * @update_plane:
+	 *
+	 * This is the legacy entry point to enable and configure the plane for
+	 * the given CRTC and framebuffer. It is never called to disable the
+	 * plane, i.e. the passed-in crtc and fb paramters are never NULL.
+	 *
+	 * The source rectangle in frame buffer memory coordinates is given by
+	 * the src_x, src_y, src_w and src_h parameters (as 16.16 fixed point
+	 * values). Devices that don't support subpixel plane coordinates can
+	 * ignore the fractional part.
+	 *
+	 * The destination rectangle in CRTC coordinates is given by the
+	 * crtc_x, crtc_y, crtc_w and crtc_h parameters (as integer values).
+	 * Devices scale the source rectangle to the destination rectangle. If
+	 * scaling is not supported, and the source rectangle size doesn't match
+	 * the destination rectangle size, the driver must return a
+	 * -<errorname>EINVAL</errorname> error.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_update_plane() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*update_plane)(struct drm_plane *plane,
 			    struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			    int crtc_x, int crtc_y,
 			    unsigned int crtc_w, unsigned int crtc_h,
 			    uint32_t src_x, uint32_t src_y,
 			    uint32_t src_w, uint32_t src_h);
+
+	/**
+	 * @disable_plane:
+	 *
+	 * This is the legacy entry point to disable the plane. The DRM core
+	 * calls this method in response to a DRM_IOCTL_MODE_SETPLANE IOCTL call
+	 * with the frame buffer ID set to 0.  Disabled planes must not be
+	 * processed by the CRTC.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_disable_plane() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*disable_plane)(struct drm_plane *plane);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up plane resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since a plane cannot be hotplugged
+	 * in DRM.
+	 */
 	void (*destroy)(struct drm_plane *plane);
+
+	/**
+	 * @reset:
+	 *
+	 * Reset plane hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_plane_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_plane *plane);
 
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * plane.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_plane_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_property)(struct drm_plane *plane,
 			    struct drm_property *property, uint64_t val);
 
-	/* atomic update handling */
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this plane and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_plane_state should use
+	 * drm_atomic_helper_plane_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_plane_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before plane->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_plane_state *(*atomic_duplicate_state)(struct drm_plane *plane);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_plane *plane,
 				     struct drm_plane_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_plane_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which shouldn't ever happen, the core only
+	 * asks for properties attached to this plane). No other validation is
+	 * allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_plane *plane,
 				   struct drm_plane_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETPLANE IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_plane_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which should never happen, the core only asks for
+	 * properties attached to this plane).
+	 */
 	int (*atomic_get_property)(struct drm_plane *plane,
 				   const struct drm_plane_state *state,
 				   struct drm_property *property,
@@ -824,6 +1473,7 @@
 	DRM_PLANE_TYPE_CURSOR,
 };
 
+
 /**
  * struct drm_plane - central DRM plane control structure
  * @dev: DRM device this plane belongs to
@@ -846,6 +1496,8 @@
 	struct drm_device *dev;
 	struct list_head head;
 
+	char *name;
+
 	struct drm_modeset_lock mutex;
 
 	struct drm_mode_object base;
@@ -866,7 +1518,7 @@
 
 	enum drm_plane_type type;
 
-	const void *helper_private;
+	const struct drm_plane_helper_funcs *helper_private;
 
 	struct drm_plane_state *state;
 };
@@ -874,24 +1526,114 @@
 /**
  * struct drm_bridge_funcs - drm_bridge control functions
  * @attach: Called during drm_bridge_attach
- * @mode_fixup: Try to fixup (or reject entirely) proposed mode for this bridge
- * @disable: Called right before encoder prepare, disables the bridge
- * @post_disable: Called right after encoder prepare, for lockstepped disable
- * @mode_set: Set this mode to the bridge
- * @pre_enable: Called right before encoder commit, for lockstepped commit
- * @enable: Called right after encoder commit, enables the bridge
  */
 struct drm_bridge_funcs {
 	int (*attach)(struct drm_bridge *bridge);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate and adjust a mode. The paramater
+	 * mode is the display mode that should be fed to the next element in
+	 * the display chain, either the final &drm_connector or the next
+	 * &drm_bridge. The parameter adjusted_mode is the input mode the bridge
+	 * requires. It can be modified by this callback and does not need to
+	 * match mode.
+	 *
+	 * This is the only hook that allows a bridge to reject a modeset. If
+	 * this function passes all other callbacks must succeed for this
+	 * configuration.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Drivers MUST
+	 * NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in @state parameter.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
 	bool (*mode_fixup)(struct drm_bridge *bridge,
 			   const struct drm_display_mode *mode,
 			   struct drm_display_mode *adjusted_mode);
+	/**
+	 * @disable:
+	 *
+	 * This callback should disable the bridge. It is called right before
+	 * the preceding element in the display pipe is disabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's ->disable() function. If the preceding element is a
+	 * &drm_encoder it's called right before the encoder's ->disable(),
+	 * ->prepare() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is still running when this callback is called.
+	 */
 	void (*disable)(struct drm_bridge *bridge);
+
+	/**
+	 * @post_disable:
+	 *
+	 * This callback should disable the bridge. It is called right after
+	 * the preceding element in the display pipe is disabled. If the
+	 * preceding element is a bridge this means it's called after that
+	 * bridge's ->post_disable() function. If the preceding element is a
+	 * &drm_encoder it's called right after the encoder's ->disable(),
+	 * ->prepare() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge must assume that the display pipe (i.e. clocks and timing
+	 * singals) feeding it is no longer running when this callback is
+	 * called.
+	 */
 	void (*post_disable)(struct drm_bridge *bridge);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback should set the given mode on the bridge. It is called
+	 * after the ->mode_set() callback for the preceding element in the
+	 * display pipeline has been called already. The display pipe (i.e.
+	 * clocks and timing signals) is off when this function is called.
+	 */
 	void (*mode_set)(struct drm_bridge *bridge,
 			 struct drm_display_mode *mode,
 			 struct drm_display_mode *adjusted_mode);
+	/**
+	 * @pre_enable:
+	 *
+	 * This callback should enable the bridge. It is called right before
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's ->pre_enable() function. If the preceding element is a
+	 * &drm_encoder it's called right before the encoder's ->enable(),
+	 * ->commit() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The display pipe (i.e. clocks and timing signals) feeding this bridge
+	 * will not yet be running when this callback is called. The bridge must
+	 * not enable the display link feeding the next bridge in the chain (if
+	 * there is one) when this callback is called.
+	 */
 	void (*pre_enable)(struct drm_bridge *bridge);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should enable the bridge. It is called right after
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called after that
+	 * bridge's ->enable() function. If the preceding element is a
+	 * &drm_encoder it's called right after the encoder's ->enable(),
+	 * ->commit() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is running when this callback is called. This
+	 * callback must enable the display link feeding the next bridge in the
+	 * chain if there is one.
+	 */
 	void (*enable)(struct drm_bridge *bridge);
 };
 
@@ -922,7 +1664,7 @@
  * struct drm_atomic_state - the global state object for atomic updates
  * @dev: parent DRM device
  * @allow_modeset: allow full modeset
- * @legacy_cursor_update: hint to enforce legacy cursor ioctl semantics
+ * @legacy_cursor_update: hint to enforce legacy cursor IOCTL semantics
  * @planes: pointer to array of plane pointers
  * @plane_states: pointer to array of plane states pointers
  * @crtcs: pointer to array of CRTC pointers
@@ -977,31 +1719,254 @@
 
 /**
  * struct drm_mode_config_funcs - basic driver provided mode setting functions
- * @fb_create: create a new framebuffer object
- * @output_poll_changed: function to handle output configuration changes
- * @atomic_check: check whether a given atomic state update is possible
- * @atomic_commit: commit an atomic state update previously verified with
- * 	atomic_check()
- * @atomic_state_alloc: allocate a new atomic state
- * @atomic_state_clear: clear the atomic state
- * @atomic_state_free: free the atomic state
  *
  * Some global (i.e. not per-CRTC, connector, etc) mode setting functions that
  * involve drivers.
  */
 struct drm_mode_config_funcs {
+	/**
+	 * @fb_create:
+	 *
+	 * Create a new framebuffer object. The core does basic checks on the
+	 * requested metadata, but most of that is left to the driver. See
+	 * struct &drm_mode_fb_cmd2 for details.
+	 *
+	 * RETURNS:
+	 *
+	 * A new framebuffer with an initial reference count of 1 or a negative
+	 * error code encoded with ERR_PTR().
+	 */
 	struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
 					     struct drm_file *file_priv,
-					     struct drm_mode_fb_cmd2 *mode_cmd);
+					     const struct drm_mode_fb_cmd2 *mode_cmd);
+
+	/**
+	 * @output_poll_changed:
+	 *
+	 * Callback used by helpers to inform the driver of output configuration
+	 * changes.
+	 *
+	 * Drivers implementing fbdev emulation with the helpers can call
+	 * drm_fb_helper_hotplug_changed from this hook to inform the fbdev
+	 * helper of output changes.
+	 *
+	 * FIXME:
+	 *
+	 * Except that there's no vtable for device-level helper callbacks
+	 * there's no reason this is a core function.
+	 */
 	void (*output_poll_changed)(struct drm_device *dev);
 
+	/**
+	 * @atomic_check:
+	 *
+	 * This is the only hook to validate an atomic modeset update. This
+	 * function must reject any modeset and state changes which the hardware
+	 * or driver doesn't support. This includes but is of course not limited
+	 * to:
+	 *
+	 *  - Checking that the modes, framebuffers, scaling and placement
+	 *    requirements and so on are within the limits of the hardware.
+	 *
+	 *  - Checking that any hidden shared resources are not oversubscribed.
+	 *    This can be shared PLLs, shared lanes, overall memory bandwidth,
+	 *    display fifo space (where shared between planes or maybe even
+	 *    CRTCs).
+	 *
+	 *  - Checking that virtualized resources exported to userspace are not
+	 *    oversubscribed. For various reasons it can make sense to expose
+	 *    more planes, crtcs or encoders than which are physically there. One
+	 *    example is dual-pipe operations (which generally should be hidden
+	 *    from userspace if when lockstepped in hardware, exposed otherwise),
+	 *    where a plane might need 1 hardware plane (if it's just on one
+	 *    pipe), 2 hardware planes (when it spans both pipes) or maybe even
+	 *    shared a hardware plane with a 2nd plane (if there's a compatible
+	 *    plane requested on the area handled by the other pipe).
+	 *
+	 *  - Check that any transitional state is possible and that if
+	 *    requested, the update can indeed be done in the vblank period
+	 *    without temporarily disabling some functions.
+	 *
+	 *  - Check any other constraints the driver or hardware might have.
+	 *
+	 *  - This callback also needs to correctly fill out the &drm_crtc_state
+	 *    in this update to make sure that drm_atomic_crtc_needs_modeset()
+	 *    reflects the nature of the possible update and returns true if and
+	 *    only if the update cannot be applied without tearing within one
+	 *    vblank on that CRTC. The core uses that information to reject
+	 *    updates which require a full modeset (i.e. blanking the screen, or
+	 *    at least pausing updates for a substantial amount of time) if
+	 *    userspace has disallowed that in its request.
+	 *
+	 *  - The driver also does not need to repeat basic input validation
+	 *    like done for the corresponding legacy entry points. The core does
+	 *    that before calling this hook.
+	 *
+	 * See the documentation of @atomic_commit for an exhaustive list of
+	 * error conditions which don't have to be checked at the
+	 * ->atomic_check() stage?
+	 *
+	 * See the documentation for struct &drm_atomic_state for how exactly
+	 * an atomic modeset update is described.
+	 *
+	 * Drivers using the atomic helpers can implement this hook using
+	 * drm_atomic_helper_check(), or one of the exported sub-functions of
+	 * it.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the below negative error codes:
+	 *
+	 *  - -EINVAL, if any of the above constraints are violated.
+	 *
+	 *  - -EDEADLK, when returned from an attempt to acquire an additional
+	 *    &drm_modeset_lock through drm_modeset_lock().
+	 *
+	 *  - -ENOMEM, if allocating additional state sub-structures failed due
+	 *    to lack of memory.
+	 *
+	 *  - -EINTR, -EAGAIN or -ERESTARTSYS, if the IOCTL should be restarted.
+	 *    This can either be due to a pending signal, or because the driver
+	 *    needs to completely bail out to recover from an exceptional
+	 *    situation like a GPU hang. From a userspace point all errors are
+	 *    treated equally.
+	 */
 	int (*atomic_check)(struct drm_device *dev,
-			    struct drm_atomic_state *a);
+			    struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_commit:
+	 *
+	 * This is the only hook to commit an atomic modeset update. The core
+	 * guarantees that @atomic_check has been called successfully before
+	 * calling this function, and that nothing has been changed in the
+	 * interim.
+	 *
+	 * See the documentation for struct &drm_atomic_state for how exactly
+	 * an atomic modeset update is described.
+	 *
+	 * Drivers using the atomic helpers can implement this hook using
+	 * drm_atomic_helper_commit(), or one of the exported sub-functions of
+	 * it.
+	 *
+	 * Asynchronous commits (as indicated with the async parameter) must
+	 * do any preparatory work which might result in an unsuccessful commit
+	 * in the context of this callback. The only exceptions are hardware
+	 * errors resulting in -EIO. But even in that case the driver must
+	 * ensure that the display pipe is at least running, to avoid
+	 * compositors crashing when pageflips don't work. Anything else,
+	 * specifically committing the update to the hardware, should be done
+	 * without blocking the caller. For updates which do not require a
+	 * modeset this must be guaranteed.
+	 *
+	 * The driver must wait for any pending rendering to the new
+	 * framebuffers to complete before executing the flip. It should also
+	 * wait for any pending rendering from other drivers if the underlying
+	 * buffer is a shared dma-buf. Asynchronous commits must not wait for
+	 * rendering in the context of this callback.
+	 *
+	 * An application can request to be notified when the atomic commit has
+	 * completed. These events are per-CRTC and can be distinguished by the
+	 * CRTC index supplied in &drm_event to userspace.
+	 *
+	 * The drm core will supply a struct &drm_event in the event
+	 * member of each CRTC's &drm_crtc_state structure. This can be handled by the
+	 * drm_crtc_send_vblank_event() function, which the driver should call on
+	 * the provided event upon completion of the atomic commit. Note that if
+	 * the driver supports vblank signalling and timestamping the vblank
+	 * counters and timestamps must agree with the ones returned from page
+	 * flip events. With the current vblank helper infrastructure this can
+	 * be achieved by holding a vblank reference while the page flip is
+	 * pending, acquired through drm_crtc_vblank_get() and released with
+	 * drm_crtc_vblank_put(). Drivers are free to implement their own vblank
+	 * counter and timestamp tracking though, e.g. if they have accurate
+	 * timestamp registers in hardware.
+	 *
+	 * NOTE:
+	 *
+	 * Drivers are not allowed to shut down any display pipe successfully
+	 * enabled through an atomic commit on their own. Doing so can result in
+	 * compositors crashing if a page flip is suddenly rejected because the
+	 * pipe is off.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the below negative error codes:
+	 *
+	 *  - -EBUSY, if an asynchronous updated is requested and there is
+	 *    an earlier updated pending. Drivers are allowed to support a queue
+	 *    of outstanding updates, but currently no driver supports that.
+	 *    Note that drivers must wait for preceding updates to complete if a
+	 *    synchronous update is requested, they are not allowed to fail the
+	 *    commit in that case.
+	 *
+	 *  - -ENOMEM, if the driver failed to allocate memory. Specifically
+	 *    this can happen when trying to pin framebuffers, which must only
+	 *    be done when committing the state.
+	 *
+	 *  - -ENOSPC, as a refinement of the more generic -ENOMEM to indicate
+	 *    that the driver has run out of vram, iommu space or similar GPU
+	 *    address space needed for framebuffer.
+	 *
+	 *  - -EIO, if the hardware completely died.
+	 *
+	 *  - -EINTR, -EAGAIN or -ERESTARTSYS, if the IOCTL should be restarted.
+	 *    This can either be due to a pending signal, or because the driver
+	 *    needs to completely bail out to recover from an exceptional
+	 *    situation like a GPU hang. From a userspace point of view all errors are
+	 *    treated equally.
+	 *
+	 * This list is exhaustive. Specifically this hook is not allowed to
+	 * return -EINVAL (any invalid requests should be caught in
+	 * @atomic_check) or -EDEADLK (this function must not acquire
+	 * additional modeset locks).
+	 */
 	int (*atomic_commit)(struct drm_device *dev,
-			     struct drm_atomic_state *a,
+			     struct drm_atomic_state *state,
 			     bool async);
+
+	/**
+	 * @atomic_state_alloc:
+	 *
+	 * This optional hook can be used by drivers that want to subclass struct
+	 * &drm_atomic_state to be able to track their own driver-private global
+	 * state easily. If this hook is implemented, drivers must also
+	 * implement @atomic_state_clear and @atomic_state_free.
+	 *
+	 * RETURNS:
+	 *
+	 * A new &drm_atomic_state on success or NULL on failure.
+	 */
 	struct drm_atomic_state *(*atomic_state_alloc)(struct drm_device *dev);
+
+	/**
+	 * @atomic_state_clear:
+	 *
+	 * This hook must clear any driver private state duplicated into the
+	 * passed-in &drm_atomic_state. This hook is called when the caller
+	 * encountered a &drm_modeset_lock deadlock and needs to drop all
+	 * already acquired locks as part of the deadlock avoidance dance
+	 * implemented in drm_modeset_lock_backoff().
+	 *
+	 * Any duplicated state must be invalidated since a concurrent atomic
+	 * update might change it, and the drm atomic interfaces always apply
+	 * updates as relative changes to the current state.
+	 *
+	 * Drivers that implement this must call drm_atomic_state_default_clear()
+	 * to clear common state.
+	 */
 	void (*atomic_state_clear)(struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_state_free:
+	 *
+	 * This hook needs driver private resources and the &drm_atomic_state
+	 * itself. Note that the core first calls drm_atomic_state_clear() to
+	 * avoid code duplicate between the clear and free hooks.
+	 *
+	 * Drivers that implement this must call drm_atomic_state_default_free()
+	 * to release common resources.
+	 */
 	void (*atomic_state_free)(struct drm_atomic_state *state);
 };
 
@@ -1010,7 +1975,7 @@
  * @mutex: mutex protecting KMS related lists and structures
  * @connection_mutex: ww mutex protecting connector state and routing
  * @acquire_ctx: global implicit acquire context used by atomic drivers for
- * 	legacy ioctls
+ * 	legacy IOCTLs
  * @idr_mutex: mutex for KMS ID allocation and management
  * @crtc_idr: main KMS ID tracking object
  * @fb_lock: mutex to protect fb state and lists
@@ -1166,7 +2131,7 @@
  */
 #define drm_for_each_plane_mask(plane, dev, plane_mask) \
 	list_for_each_entry((plane), &(dev)->mode_config.plane_list, head) \
-		if ((plane_mask) & (1 << drm_plane_index(plane)))
+		for_each_if ((plane_mask) & (1 << drm_plane_index(plane)))
 
 
 #define obj_to_crtc(x) container_of(x, struct drm_crtc, base)
@@ -1183,11 +2148,13 @@
 	char *name;
 };
 
-extern int drm_crtc_init_with_planes(struct drm_device *dev,
-				     struct drm_crtc *crtc,
-				     struct drm_plane *primary,
-				     struct drm_plane *cursor,
-				     const struct drm_crtc_funcs *funcs);
+extern __printf(6, 7)
+int drm_crtc_init_with_planes(struct drm_device *dev,
+			      struct drm_crtc *crtc,
+			      struct drm_plane *primary,
+			      struct drm_plane *cursor,
+			      const struct drm_crtc_funcs *funcs,
+			      const char *name, ...);
 extern void drm_crtc_cleanup(struct drm_crtc *crtc);
 extern unsigned int drm_crtc_index(struct drm_crtc *crtc);
 
@@ -1233,10 +2200,11 @@
 void drm_bridge_pre_enable(struct drm_bridge *bridge);
 void drm_bridge_enable(struct drm_bridge *bridge);
 
-extern int drm_encoder_init(struct drm_device *dev,
-			    struct drm_encoder *encoder,
-			    const struct drm_encoder_funcs *funcs,
-			    int encoder_type);
+extern __printf(5, 6)
+int drm_encoder_init(struct drm_device *dev,
+		     struct drm_encoder *encoder,
+		     const struct drm_encoder_funcs *funcs,
+		     int encoder_type, const char *name, ...);
 
 /**
  * drm_encoder_crtc_ok - can a given crtc drive a given encoder?
@@ -1251,13 +2219,15 @@
 	return !!(encoder->possible_crtcs & drm_crtc_mask(crtc));
 }
 
-extern int drm_universal_plane_init(struct drm_device *dev,
-				    struct drm_plane *plane,
-				    unsigned long possible_crtcs,
-				    const struct drm_plane_funcs *funcs,
-				    const uint32_t *formats,
-				    unsigned int format_count,
-				    enum drm_plane_type type);
+extern __printf(8, 9)
+int drm_universal_plane_init(struct drm_device *dev,
+			     struct drm_plane *plane,
+			     unsigned long possible_crtcs,
+			     const struct drm_plane_funcs *funcs,
+			     const uint32_t *formats,
+			     unsigned int format_count,
+			     enum drm_plane_type type,
+			     const char *name, ...);
 extern int drm_plane_init(struct drm_device *dev,
 			  struct drm_plane *plane,
 			  unsigned long possible_crtcs,
@@ -1543,7 +2513,7 @@
 /* Plane list iterator for legacy (overlay only) planes. */
 #define drm_for_each_legacy_plane(plane, dev) \
 	list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) \
-		if (plane->type == DRM_PLANE_TYPE_OVERLAY)
+		for_each_if (plane->type == DRM_PLANE_TYPE_OVERLAY)
 
 #define drm_for_each_plane(plane, dev) \
 	list_for_each_entry(plane, &(dev)->mode_config.plane_list, head)
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 3febb4b..4b37afa 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -40,148 +40,7 @@
 #include <linux/fb.h>
 
 #include <drm/drm_crtc.h>
-
-enum mode_set_atomic {
-	LEAVE_ATOMIC_MODE_SET,
-	ENTER_ATOMIC_MODE_SET,
-};
-
-/**
- * struct drm_crtc_helper_funcs - helper operations for CRTCs
- * @dpms: set power state
- * @prepare: prepare the CRTC, called before @mode_set
- * @commit: commit changes to CRTC, called after @mode_set
- * @mode_fixup: try to fixup proposed mode for this CRTC
- * @mode_set: set this mode
- * @mode_set_nofb: set mode only (no scanout buffer attached)
- * @mode_set_base: update the scanout buffer
- * @mode_set_base_atomic: non-blocking mode set (used for kgdb support)
- * @load_lut: load color palette
- * @disable: disable CRTC when no longer in use
- * @enable: enable CRTC
- * @atomic_check: check for validity of an atomic state
- * @atomic_begin: begin atomic update
- * @atomic_flush: flush atomic update
- *
- * The helper operations are called by the mid-layer CRTC helper.
- *
- * Note that with atomic helpers @dpms, @prepare and @commit hooks are
- * deprecated. Used @enable and @disable instead exclusively.
- *
- * With legacy crtc helpers there's a big semantic difference between @disable
- * and the other hooks: @disable also needs to release any resources acquired in
- * @mode_set (like shared PLLs).
- */
-struct drm_crtc_helper_funcs {
-	/*
-	 * Control power levels on the CRTC.  If the mode passed in is
-	 * unsupported, the provider must use the next lowest power level.
-	 */
-	void (*dpms)(struct drm_crtc *crtc, int mode);
-	void (*prepare)(struct drm_crtc *crtc);
-	void (*commit)(struct drm_crtc *crtc);
-
-	/* Provider can fixup or change mode timings before modeset occurs */
-	bool (*mode_fixup)(struct drm_crtc *crtc,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode);
-	/* Actually set the mode */
-	int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode, int x, int y,
-			struct drm_framebuffer *old_fb);
-	/* Actually set the mode for atomic helpers, optional */
-	void (*mode_set_nofb)(struct drm_crtc *crtc);
-
-	/* Move the crtc on the current fb to the given position *optional* */
-	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
-			     struct drm_framebuffer *old_fb);
-	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
-				    struct drm_framebuffer *fb, int x, int y,
-				    enum mode_set_atomic);
-
-	/* reload the current crtc LUT */
-	void (*load_lut)(struct drm_crtc *crtc);
-
-	void (*disable)(struct drm_crtc *crtc);
-	void (*enable)(struct drm_crtc *crtc);
-
-	/* atomic helpers */
-	int (*atomic_check)(struct drm_crtc *crtc,
-			    struct drm_crtc_state *state);
-	void (*atomic_begin)(struct drm_crtc *crtc,
-			     struct drm_crtc_state *old_crtc_state);
-	void (*atomic_flush)(struct drm_crtc *crtc,
-			     struct drm_crtc_state *old_crtc_state);
-};
-
-/**
- * struct drm_encoder_helper_funcs - helper operations for encoders
- * @dpms: set power state
- * @save: save connector state
- * @restore: restore connector state
- * @mode_fixup: try to fixup proposed mode for this connector
- * @prepare: part of the disable sequence, called before the CRTC modeset
- * @commit: called after the CRTC modeset
- * @mode_set: set this mode, optional for atomic helpers
- * @get_crtc: return CRTC that the encoder is currently attached to
- * @detect: connection status detection
- * @disable: disable encoder when not in use (overrides DPMS off)
- * @enable: enable encoder
- * @atomic_check: check for validity of an atomic update
- *
- * The helper operations are called by the mid-layer CRTC helper.
- *
- * Note that with atomic helpers @dpms, @prepare and @commit hooks are
- * deprecated. Used @enable and @disable instead exclusively.
- *
- * With legacy crtc helpers there's a big semantic difference between @disable
- * and the other hooks: @disable also needs to release any resources acquired in
- * @mode_set (like shared PLLs).
- */
-struct drm_encoder_helper_funcs {
-	void (*dpms)(struct drm_encoder *encoder, int mode);
-	void (*save)(struct drm_encoder *encoder);
-	void (*restore)(struct drm_encoder *encoder);
-
-	bool (*mode_fixup)(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode);
-	void (*prepare)(struct drm_encoder *encoder);
-	void (*commit)(struct drm_encoder *encoder);
-	void (*mode_set)(struct drm_encoder *encoder,
-			 struct drm_display_mode *mode,
-			 struct drm_display_mode *adjusted_mode);
-	struct drm_crtc *(*get_crtc)(struct drm_encoder *encoder);
-	/* detect for DAC style encoders */
-	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
-					    struct drm_connector *connector);
-	void (*disable)(struct drm_encoder *encoder);
-
-	void (*enable)(struct drm_encoder *encoder);
-
-	/* atomic helpers */
-	int (*atomic_check)(struct drm_encoder *encoder,
-			    struct drm_crtc_state *crtc_state,
-			    struct drm_connector_state *conn_state);
-};
-
-/**
- * struct drm_connector_helper_funcs - helper operations for connectors
- * @get_modes: get mode list for this connector
- * @mode_valid: is this mode valid on the given connector? (optional)
- * @best_encoder: return the preferred encoder for this connector
- * @atomic_best_encoder: atomic version of @best_encoder
- *
- * The helper operations are called by the mid-layer CRTC helper.
- */
-struct drm_connector_helper_funcs {
-	int (*get_modes)(struct drm_connector *connector);
-	enum drm_mode_status (*mode_valid)(struct drm_connector *connector,
-					   struct drm_display_mode *mode);
-	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
-	struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector,
-						   struct drm_connector_state *connector_state);
-};
+#include <drm/drm_modeset_helper_vtables.h>
 
 extern void drm_helper_disable_unused_functions(struct drm_device *dev);
 extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
@@ -197,25 +56,7 @@
 extern void drm_helper_move_panel_connectors_to_head(struct drm_device *);
 
 extern void drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
-					   struct drm_mode_fb_cmd2 *mode_cmd);
-
-static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
-				       const struct drm_crtc_helper_funcs *funcs)
-{
-	crtc->helper_private = funcs;
-}
-
-static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
-					  const struct drm_encoder_helper_funcs *funcs)
-{
-	encoder->helper_private = funcs;
-}
-
-static inline void drm_connector_helper_add(struct drm_connector *connector,
-					    const struct drm_connector_helper_funcs *funcs)
-{
-	connector->helper_private = funcs;
-}
+					   const struct drm_mode_fb_cmd2 *mode_cmd);
 
 extern void drm_helper_resume_force_mode(struct drm_device *dev);
 
@@ -229,10 +70,6 @@
 extern int drm_helper_probe_single_connector_modes(struct drm_connector
 						   *connector, uint32_t maxX,
 						   uint32_t maxY);
-extern int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector
-							   *connector,
-							   uint32_t maxX,
-							   uint32_t maxY);
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern bool drm_helper_hpd_irq_event(struct drm_device *dev);
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index bb9d0de..1252108 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -455,16 +455,52 @@
 # define DP_EDP_14			    0x03
 
 #define DP_EDP_GENERAL_CAP_1		    0x701
+# define DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP		(1 << 0)
+# define DP_EDP_BACKLIGHT_PIN_ENABLE_CAP		(1 << 1)
+# define DP_EDP_BACKLIGHT_AUX_ENABLE_CAP		(1 << 2)
+# define DP_EDP_PANEL_SELF_TEST_PIN_ENABLE_CAP		(1 << 3)
+# define DP_EDP_PANEL_SELF_TEST_AUX_ENABLE_CAP		(1 << 4)
+# define DP_EDP_FRC_ENABLE_CAP				(1 << 5)
+# define DP_EDP_COLOR_ENGINE_CAP			(1 << 6)
+# define DP_EDP_SET_POWER_CAP				(1 << 7)
 
 #define DP_EDP_BACKLIGHT_ADJUSTMENT_CAP     0x702
+# define DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP	(1 << 0)
+# define DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP	(1 << 1)
+# define DP_EDP_BACKLIGHT_BRIGHTNESS_BYTE_COUNT		(1 << 2)
+# define DP_EDP_BACKLIGHT_AUX_PWM_PRODUCT_CAP		(1 << 3)
+# define DP_EDP_BACKLIGHT_FREQ_PWM_PIN_PASSTHRU_CAP	(1 << 4)
+# define DP_EDP_BACKLIGHT_FREQ_AUX_SET_CAP		(1 << 5)
+# define DP_EDP_DYNAMIC_BACKLIGHT_CAP			(1 << 6)
+# define DP_EDP_VBLANK_BACKLIGHT_UPDATE_CAP		(1 << 7)
 
 #define DP_EDP_GENERAL_CAP_2		    0x703
+# define DP_EDP_OVERDRIVE_ENGINE_ENABLED		(1 << 0)
 
 #define DP_EDP_GENERAL_CAP_3		    0x704    /* eDP 1.4 */
+# define DP_EDP_X_REGION_CAP_MASK			(0xf << 0)
+# define DP_EDP_X_REGION_CAP_SHIFT			0
+# define DP_EDP_Y_REGION_CAP_MASK			(0xf << 4)
+# define DP_EDP_Y_REGION_CAP_SHIFT			4
 
 #define DP_EDP_DISPLAY_CONTROL_REGISTER     0x720
+# define DP_EDP_BACKLIGHT_ENABLE			(1 << 0)
+# define DP_EDP_BLACK_VIDEO_ENABLE			(1 << 1)
+# define DP_EDP_FRC_ENABLE				(1 << 2)
+# define DP_EDP_COLOR_ENGINE_ENABLE			(1 << 3)
+# define DP_EDP_VBLANK_BACKLIGHT_UPDATE_ENABLE		(1 << 7)
 
 #define DP_EDP_BACKLIGHT_MODE_SET_REGISTER  0x721
+# define DP_EDP_BACKLIGHT_CONTROL_MODE_MASK		(3 << 0)
+# define DP_EDP_BACKLIGHT_CONTROL_MODE_PWM		(0 << 0)
+# define DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET		(1 << 0)
+# define DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD		(2 << 0)
+# define DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT		(3 << 0)
+# define DP_EDP_BACKLIGHT_FREQ_PWM_PIN_PASSTHRU_ENABLE	(1 << 2)
+# define DP_EDP_BACKLIGHT_FREQ_AUX_SET_ENABLE		(1 << 3)
+# define DP_EDP_DYNAMIC_BACKLIGHT_ENABLE		(1 << 4)
+# define DP_EDP_REGIONAL_BACKLIGHT_ENABLE		(1 << 5)
+# define DP_EDP_UPDATE_REGION_BRIGHTNESS		(1 << 6) /* eDP 1.4 */
 
 #define DP_EDP_BACKLIGHT_BRIGHTNESS_MSB     0x722
 #define DP_EDP_BACKLIGHT_BRIGHTNESS_LSB     0x723
diff --git a/include/drm/drm_fb_cma_helper.h b/include/drm/drm_fb_cma_helper.h
index c54cf3d..be62bd3 100644
--- a/include/drm/drm_fb_cma_helper.h
+++ b/include/drm/drm_fb_cma_helper.h
@@ -18,7 +18,7 @@
 void drm_fbdev_cma_hotplug_event(struct drm_fbdev_cma *fbdev_cma);
 
 struct drm_framebuffer *drm_fb_cma_create(struct drm_device *dev,
-	struct drm_file *file_priv, struct drm_mode_fb_cmd2 *mode_cmd);
+	struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd);
 
 struct drm_gem_cma_object *drm_fb_cma_get_gem_obj(struct drm_framebuffer *fb,
 	unsigned int plane);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 87b090c..d8a40df 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -34,6 +34,11 @@
 
 #include <linux/kgdb.h>
 
+enum mode_set_atomic {
+	LEAVE_ATOMIC_MODE_SET,
+	ENTER_ATOMIC_MODE_SET,
+};
+
 struct drm_fb_offset {
 	int x, y;
 };
@@ -74,25 +79,76 @@
 
 /**
  * struct drm_fb_helper_funcs - driver callbacks for the fbdev emulation library
- * @gamma_set: Set the given gamma lut register on the given crtc.
- * @gamma_get: Read the given gamma lut register on the given crtc, used to
- *             save the current lut when force-restoring the fbdev for e.g.
- *             kdbg.
- * @fb_probe: Driver callback to allocate and initialize the fbdev info
- *            structure. Furthermore it also needs to allocate the drm
- *            framebuffer used to back the fbdev.
- * @initial_config: Setup an initial fbdev display configuration
  *
  * Driver callbacks used by the fbdev emulation helper library.
  */
 struct drm_fb_helper_funcs {
+	/**
+	 * @gamma_set:
+	 *
+	 * Set the given gamma LUT register on the given CRTC.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
 	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
 			  u16 blue, int regno);
+	/**
+	 * @gamma_get:
+	 *
+	 * Read the given gamma LUT register on the given CRTC, used to save the
+	 * current LUT when force-restoring the fbdev for e.g. kdbg.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
 	void (*gamma_get)(struct drm_crtc *crtc, u16 *red, u16 *green,
 			  u16 *blue, int regno);
 
+	/**
+	 * @fb_probe:
+	 *
+	 * Driver callback to allocate and initialize the fbdev info structure.
+	 * Furthermore it also needs to allocate the DRM framebuffer used to
+	 * back the fbdev.
+	 *
+	 * This callback is mandatory.
+	 *
+	 * RETURNS:
+	 *
+	 * The driver should return 0 on success and a negative error code on
+	 * failure.
+	 */
 	int (*fb_probe)(struct drm_fb_helper *helper,
 			struct drm_fb_helper_surface_size *sizes);
+
+	/**
+	 * @initial_config:
+	 *
+	 * Driver callback to setup an initial fbdev display configuration.
+	 * Drivers can use this callback to tell the fbdev emulation what the
+	 * preferred initial configuration is. This is useful to implement
+	 * smooth booting where the fbdev (and subsequently all userspace) never
+	 * changes the mode, but always inherits the existing configuration.
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * The driver should return true if a suitable initial configuration has
+	 * been filled out and false when the fbdev helper should fall back to
+	 * the default probing logic.
+	 */
 	bool (*initial_config)(struct drm_fb_helper *fb_helper,
 			       struct drm_fb_helper_crtc **crtcs,
 			       struct drm_display_mode **modes,
@@ -105,18 +161,22 @@
 };
 
 /**
- * struct drm_fb_helper - helper to emulate fbdev on top of kms
- * @fb:  Scanout framebuffer object
- * @dev:  DRM device
+ * struct drm_fb_helper - main structure to emulate fbdev on top of KMS
+ * @fb: Scanout framebuffer object
+ * @dev: DRM device
  * @crtc_count: number of possible CRTCs
  * @crtc_info: per-CRTC helper state (mode, x/y offset, etc)
  * @connector_count: number of connected connectors
  * @connector_info_alloc_count: size of connector_info
+ * @connector_info: array of per-connector information
  * @funcs: driver callbacks for fb helper
  * @fbdev: emulated fbdev device info struct
  * @pseudo_palette: fake palette of 16 colors
- * @kernel_fb_list: list_head in kernel_fb_helper_list
- * @delayed_hotplug: was there a hotplug while kms master active?
+ *
+ * This is the main structure used by the fbdev helpers. Drivers supporting
+ * fbdev emulation should embedded this into their overall driver structure.
+ * Drivers must also fill out a struct &drm_fb_helper_funcs with a few
+ * operations.
  */
 struct drm_fb_helper {
 	struct drm_framebuffer *fb;
@@ -129,10 +189,21 @@
 	const struct drm_fb_helper_funcs *funcs;
 	struct fb_info *fbdev;
 	u32 pseudo_palette[17];
+
+	/**
+	 * @kernel_fb_list:
+	 *
+	 * Entry on the global kernel_fb_helper_list, used for kgdb entry/exit.
+	 */
 	struct list_head kernel_fb_list;
 
-	/* we got a hotplug but fbdev wasn't running the console
-	   delay until next set_par */
+	/**
+	 * @delayed_hotplug:
+	 *
+	 * A hotplug was received while fbdev wasn't in control of the DRM
+	 * device, i.e. another KMS master was active. The output configuration
+	 * needs to be reprobe when fbdev is in control again.
+	 */
 	bool delayed_hotplug;
 
 	/**
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 15e7f00..0b3e11a 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -35,76 +35,129 @@
  */
 
 /**
- * This structure defines the drm_mm memory object, which will be used by the
- * DRM for its buffer objects.
+ * struct drm_gem_object - GEM buffer object
+ *
+ * This structure defines the generic parts for GEM buffer objects, which are
+ * mostly around handling mmap and userspace handles.
+ *
+ * Buffer objects are often abbreviated to BO.
  */
 struct drm_gem_object {
-	/** Reference count of this object */
+	/**
+	 * @refcount:
+	 *
+	 * Reference count of this object
+	 *
+	 * Please use drm_gem_object_reference() to acquire and
+	 * drm_gem_object_unreference() or drm_gem_object_unreference_unlocked()
+	 * to release a reference to a GEM buffer object.
+	 */
 	struct kref refcount;
 
 	/**
-	 * handle_count - gem file_priv handle count of this object
+	 * @handle_count:
+	 *
+	 * This is the GEM file_priv handle count of this object.
 	 *
 	 * Each handle also holds a reference. Note that when the handle_count
 	 * drops to 0 any global names (e.g. the id in the flink namespace) will
 	 * be cleared.
 	 *
 	 * Protected by dev->object_name_lock.
-	 * */
+	 */
 	unsigned handle_count;
 
-	/** Related drm device */
+	/**
+	 * @dev: DRM dev this object belongs to.
+	 */
 	struct drm_device *dev;
 
-	/** File representing the shmem storage */
+	/**
+	 * @filp:
+	 *
+	 * SHMEM file node used as backing storage for swappable buffer objects.
+	 * GEM also supports driver private objects with driver-specific backing
+	 * storage (contiguous CMA memory, special reserved blocks). In this
+	 * case @filp is NULL.
+	 */
 	struct file *filp;
 
-	/* Mapping info for this object */
+	/**
+	 * @vma_node:
+	 *
+	 * Mapping info for this object to support mmap. Drivers are supposed to
+	 * allocate the mmap offset using drm_gem_create_mmap_offset(). The
+	 * offset itself can be retrieved using drm_vma_node_offset_addr().
+	 *
+	 * Memory mapping itself is handled by drm_gem_mmap(), which also checks
+	 * that userspace is allowed to access the object.
+	 */
 	struct drm_vma_offset_node vma_node;
 
 	/**
+	 * @size:
+	 *
 	 * Size of the object, in bytes.  Immutable over the object's
 	 * lifetime.
 	 */
 	size_t size;
 
 	/**
+	 * @name:
+	 *
 	 * Global name for this object, starts at 1. 0 means unnamed.
-	 * Access is covered by the object_name_lock in the related drm_device
+	 * Access is covered by dev->object_name_lock. This is used by the GEM_FLINK
+	 * and GEM_OPEN ioctls.
 	 */
 	int name;
 
 	/**
-	 * Memory domains. These monitor which caches contain read/write data
+	 * @read_domains:
+	 *
+	 * Read memory domains. These monitor which caches contain read/write data
 	 * related to the object. When transitioning from one set of domains
 	 * to another, the driver is called to ensure that caches are suitably
-	 * flushed and invalidated
+	 * flushed and invalidated.
 	 */
 	uint32_t read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
 	uint32_t write_domain;
 
 	/**
+	 * @pending_read_domains:
+	 *
 	 * While validating an exec operation, the
 	 * new read/write domain values are computed here.
 	 * They will be transferred to the above values
 	 * at the point that any cache flushing occurs
 	 */
 	uint32_t pending_read_domains;
+
+	/**
+	 * @pending_write_domain: Write domain similar to @pending_read_domains.
+	 */
 	uint32_t pending_write_domain;
 
 	/**
-	 * dma_buf - dma buf associated with this GEM object
+	 * @dma_buf:
+	 *
+	 * dma-buf associated with this GEM object.
 	 *
 	 * Pointer to the dma-buf associated with this gem object (either
 	 * through importing or exporting). We break the resulting reference
 	 * loop when the last gem handle for this object is released.
 	 *
-	 * Protected by obj->object_name_lock
+	 * Protected by obj->object_name_lock.
 	 */
 	struct dma_buf *dma_buf;
 
 	/**
-	 * import_attach - dma buf attachment backing this object
+	 * @import_attach:
+	 *
+	 * dma-buf attachment backing this object.
 	 *
 	 * Any foreign dma_buf imported as a gem object has this set to the
 	 * attachment point for the device. This is invariant over the lifetime
@@ -133,12 +186,30 @@
 		     struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
+/**
+ * drm_gem_object_reference - acquire a GEM BO reference
+ * @obj: GEM buffer object
+ *
+ * This acquires additional reference to @obj. It is illegal to call this
+ * without already holding a reference. No locks required.
+ */
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
 {
 	kref_get(&obj->refcount);
 }
 
+/**
+ * drm_gem_object_unreference - release a GEM BO reference
+ * @obj: GEM buffer object
+ *
+ * This releases a reference to @obj. Callers must hold the dev->struct_mutex
+ * lock when calling this function, even when the driver doesn't use
+ * dev->struct_mutex for anything.
+ *
+ * For drivers not encumbered with legacy locking use
+ * drm_gem_object_unreference_unlocked() instead.
+ */
 static inline void
 drm_gem_object_unreference(struct drm_gem_object *obj)
 {
@@ -149,6 +220,13 @@
 	}
 }
 
+/**
+ * drm_gem_object_unreference_unlocked - release a GEM BO reference
+ * @obj: GEM buffer object
+ *
+ * This releases a reference to @obj. Callers must not hold the
+ * dev->struct_mutex lock when calling this function.
+ */
 static inline void
 drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
 {
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 0de6290..fc65118 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -148,8 +148,7 @@
 
 static inline u64 __drm_mm_hole_node_end(struct drm_mm_node *hole_node)
 {
-	return list_entry(hole_node->node_list.next,
-			  struct drm_mm_node, node_list)->start;
+	return list_next_entry(hole_node, node_list)->start;
 }
 
 /**
@@ -180,6 +179,14 @@
 						&(mm)->head_node.node_list, \
 						node_list)
 
+#define __drm_mm_for_each_hole(entry, mm, hole_start, hole_end, backwards) \
+	for (entry = list_entry((backwards) ? (mm)->hole_stack.prev : (mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
+	     &entry->hole_stack != &(mm)->hole_stack ? \
+	     hole_start = drm_mm_hole_node_start(entry), \
+	     hole_end = drm_mm_hole_node_end(entry), \
+	     1 : 0; \
+	     entry = list_entry((backwards) ? entry->hole_stack.prev : entry->hole_stack.next, struct drm_mm_node, hole_stack))
+
 /**
  * drm_mm_for_each_hole - iterator to walk over all holes
  * @entry: drm_mm_node used internally to track progress
@@ -200,20 +207,7 @@
  * going backwards.
  */
 #define drm_mm_for_each_hole(entry, mm, hole_start, hole_end) \
-	for (entry = list_entry((mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
-	     &entry->hole_stack != &(mm)->hole_stack ? \
-	     hole_start = drm_mm_hole_node_start(entry), \
-	     hole_end = drm_mm_hole_node_end(entry), \
-	     1 : 0; \
-	     entry = list_entry(entry->hole_stack.next, struct drm_mm_node, hole_stack))
-
-#define __drm_mm_for_each_hole(entry, mm, hole_start, hole_end, backwards) \
-	for (entry = list_entry((backwards) ? (mm)->hole_stack.prev : (mm)->hole_stack.next, struct drm_mm_node, hole_stack); \
-	     &entry->hole_stack != &(mm)->hole_stack ? \
-	     hole_start = drm_mm_hole_node_start(entry), \
-	     hole_end = drm_mm_hole_node_end(entry), \
-	     1 : 0; \
-	     entry = list_entry((backwards) ? entry->hole_stack.prev : entry->hole_stack.next, struct drm_mm_node, hole_stack))
+	__drm_mm_for_each_hole(entry, mm, hole_start, hole_end, 0)
 
 /*
  * Basic range manager support (drm_mm.c)
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 08a8cac..625966a 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -35,46 +35,91 @@
  * structures).
  */
 
+/**
+ * enum drm_mode_status - hardware support status of a mode
+ * @MODE_OK: Mode OK
+ * @MODE_HSYNC: hsync out of range
+ * @MODE_VSYNC: vsync out of range
+ * @MODE_H_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_V_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_BAD_WIDTH: requires an unsupported linepitch
+ * @MODE_NOMODE: no mode with a matching name
+ * @MODE_NO_INTERLACE: interlaced mode not supported
+ * @MODE_NO_DBLESCAN: doublescan mode not supported
+ * @MODE_NO_VSCAN: multiscan mode not supported
+ * @MODE_MEM: insufficient video memory
+ * @MODE_VIRTUAL_X: mode width too large for specified virtual size
+ * @MODE_VIRTUAL_Y: mode height too large for specified virtual size
+ * @MODE_MEM_VIRT: insufficient video memory given virtual size
+ * @MODE_NOCLOCK: no fixed clock available
+ * @MODE_CLOCK_HIGH: clock required is too high
+ * @MODE_CLOCK_LOW: clock required is too low
+ * @MODE_CLOCK_RANGE: clock/mode isn't in a ClockRange
+ * @MODE_BAD_HVALUE: horizontal timing was out of range
+ * @MODE_BAD_VVALUE: vertical timing was out of range
+ * @MODE_BAD_VSCAN: VScan value out of range
+ * @MODE_HSYNC_NARROW: horizontal sync too narrow
+ * @MODE_HSYNC_WIDE: horizontal sync too wide
+ * @MODE_HBLANK_NARROW: horizontal blanking too narrow
+ * @MODE_HBLANK_WIDE: horizontal blanking too wide
+ * @MODE_VSYNC_NARROW: vertical sync too narrow
+ * @MODE_VSYNC_WIDE: vertical sync too wide
+ * @MODE_VBLANK_NARROW: vertical blanking too narrow
+ * @MODE_VBLANK_WIDE: vertical blanking too wide
+ * @MODE_PANEL: exceeds panel dimensions
+ * @MODE_INTERLACE_WIDTH: width too large for interlaced mode
+ * @MODE_ONE_WIDTH: only one width is supported
+ * @MODE_ONE_HEIGHT: only one height is supported
+ * @MODE_ONE_SIZE: only one resolution is supported
+ * @MODE_NO_REDUCED: monitor doesn't accept reduced blanking
+ * @MODE_NO_STEREO: stereo modes not supported
+ * @MODE_STALE: mode has become stale
+ * @MODE_BAD: unspecified reason
+ * @MODE_ERROR: error condition
+ *
+ * This enum is used to filter out modes not supported by the driver/hardware
+ * combination.
+ */
 enum drm_mode_status {
-    MODE_OK	= 0,	/* Mode OK */
-    MODE_HSYNC,		/* hsync out of range */
-    MODE_VSYNC,		/* vsync out of range */
-    MODE_H_ILLEGAL,	/* mode has illegal horizontal timings */
-    MODE_V_ILLEGAL,	/* mode has illegal horizontal timings */
-    MODE_BAD_WIDTH,	/* requires an unsupported linepitch */
-    MODE_NOMODE,	/* no mode with a matching name */
-    MODE_NO_INTERLACE,	/* interlaced mode not supported */
-    MODE_NO_DBLESCAN,	/* doublescan mode not supported */
-    MODE_NO_VSCAN,	/* multiscan mode not supported */
-    MODE_MEM,		/* insufficient video memory */
-    MODE_VIRTUAL_X,	/* mode width too large for specified virtual size */
-    MODE_VIRTUAL_Y,	/* mode height too large for specified virtual size */
-    MODE_MEM_VIRT,	/* insufficient video memory given virtual size */
-    MODE_NOCLOCK,	/* no fixed clock available */
-    MODE_CLOCK_HIGH,	/* clock required is too high */
-    MODE_CLOCK_LOW,	/* clock required is too low */
-    MODE_CLOCK_RANGE,	/* clock/mode isn't in a ClockRange */
-    MODE_BAD_HVALUE,	/* horizontal timing was out of range */
-    MODE_BAD_VVALUE,	/* vertical timing was out of range */
-    MODE_BAD_VSCAN,	/* VScan value out of range */
-    MODE_HSYNC_NARROW,	/* horizontal sync too narrow */
-    MODE_HSYNC_WIDE,	/* horizontal sync too wide */
-    MODE_HBLANK_NARROW,	/* horizontal blanking too narrow */
-    MODE_HBLANK_WIDE,	/* horizontal blanking too wide */
-    MODE_VSYNC_NARROW,	/* vertical sync too narrow */
-    MODE_VSYNC_WIDE,	/* vertical sync too wide */
-    MODE_VBLANK_NARROW,	/* vertical blanking too narrow */
-    MODE_VBLANK_WIDE,	/* vertical blanking too wide */
-    MODE_PANEL,         /* exceeds panel dimensions */
-    MODE_INTERLACE_WIDTH, /* width too large for interlaced mode */
-    MODE_ONE_WIDTH,     /* only one width is supported */
-    MODE_ONE_HEIGHT,    /* only one height is supported */
-    MODE_ONE_SIZE,      /* only one resolution is supported */
-    MODE_NO_REDUCED,    /* monitor doesn't accept reduced blanking */
-    MODE_NO_STEREO,	/* stereo modes not supported */
-    MODE_UNVERIFIED = -3, /* mode needs to reverified */
-    MODE_BAD = -2,	/* unspecified reason */
-    MODE_ERROR	= -1	/* error condition */
+	MODE_OK = 0,
+	MODE_HSYNC,
+	MODE_VSYNC,
+	MODE_H_ILLEGAL,
+	MODE_V_ILLEGAL,
+	MODE_BAD_WIDTH,
+	MODE_NOMODE,
+	MODE_NO_INTERLACE,
+	MODE_NO_DBLESCAN,
+	MODE_NO_VSCAN,
+	MODE_MEM,
+	MODE_VIRTUAL_X,
+	MODE_VIRTUAL_Y,
+	MODE_MEM_VIRT,
+	MODE_NOCLOCK,
+	MODE_CLOCK_HIGH,
+	MODE_CLOCK_LOW,
+	MODE_CLOCK_RANGE,
+	MODE_BAD_HVALUE,
+	MODE_BAD_VVALUE,
+	MODE_BAD_VSCAN,
+	MODE_HSYNC_NARROW,
+	MODE_HSYNC_WIDE,
+	MODE_HBLANK_NARROW,
+	MODE_HBLANK_WIDE,
+	MODE_VSYNC_NARROW,
+	MODE_VSYNC_WIDE,
+	MODE_VBLANK_NARROW,
+	MODE_VBLANK_WIDE,
+	MODE_PANEL,
+	MODE_INTERLACE_WIDTH,
+	MODE_ONE_WIDTH,
+	MODE_ONE_HEIGHT,
+	MODE_ONE_SIZE,
+	MODE_NO_REDUCED,
+	MODE_NO_STEREO,
+	MODE_STALE = -3,
+	MODE_BAD = -2,
+	MODE_ERROR = -1
 };
 
 #define DRM_MODE_TYPE_CLOCK_CRTC_C (DRM_MODE_TYPE_CLOCK_C | \
@@ -96,17 +141,125 @@
 
 #define DRM_MODE_FLAG_3D_MAX	DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF
 
+/**
+ * struct drm_display_mode - DRM kernel-internal display mode structure
+ * @hdisplay: horizontal display size
+ * @hsync_start: horizontal sync start
+ * @hsync_end: horizontal sync end
+ * @htotal: horizontal total size
+ * @hskew: horizontal skew?!
+ * @vdisplay: vertical display size
+ * @vsync_start: vertical sync start
+ * @vsync_end: vertical sync end
+ * @vtotal: vertical total size
+ * @vscan: vertical scan?!
+ * @crtc_hdisplay: hardware mode horizontal display size
+ * @crtc_hblank_start: hardware mode horizontal blank start
+ * @crtc_hblank_end: hardware mode horizontal blank end
+ * @crtc_hsync_start: hardware mode horizontal sync start
+ * @crtc_hsync_end: hardware mode horizontal sync end
+ * @crtc_htotal: hardware mode horizontal total size
+ * @crtc_hskew: hardware mode horizontal skew?!
+ * @crtc_vdisplay: hardware mode vertical display size
+ * @crtc_vblank_start: hardware mode vertical blank start
+ * @crtc_vblank_end: hardware mode vertical blank end
+ * @crtc_vsync_start: hardware mode vertical sync start
+ * @crtc_vsync_end: hardware mode vertical sync end
+ * @crtc_vtotal: hardware mode vertical total size
+ *
+ * The horizontal and vertical timings are defined per the following diagram.
+ *
+ *
+ *               Active                 Front           Sync           Back
+ *              Region                 Porch                          Porch
+ *     <-----------------------><----------------><-------------><-------------->
+ *       //////////////////////|
+ *      ////////////////////// |
+ *     //////////////////////  |..................               ................
+ *                                                _______________
+ *     <----- [hv]display ----->
+ *     <------------- [hv]sync_start ------------>
+ *     <--------------------- [hv]sync_end --------------------->
+ *     <-------------------------------- [hv]total ----------------------------->*
+ *
+ * This structure contains two copies of timings. First are the plain timings,
+ * which specify the logical mode, as it would be for a progressive 1:1 scanout
+ * at the refresh rate userspace can observe through vblank timestamps. Then
+ * there's the hardware timings, which are corrected for interlacing,
+ * double-clocking and similar things. They are provided as a convenience, and
+ * can be appropriately computed using drm_mode_set_crtcinfo().
+ */
 struct drm_display_mode {
-	/* Header */
+	/**
+	 * @head:
+	 *
+	 * struct list_head for mode lists.
+	 */
 	struct list_head head;
+
+	/**
+	 * @base:
+	 *
+	 * A display mode is a normal modeset object, possibly including public
+	 * userspace id.
+	 *
+	 * FIXME:
+	 *
+	 * This can probably be removed since the entire concept of userspace
+	 * managing modes explicitly has never landed in upstream kernel mode
+	 * setting support.
+	 */
 	struct drm_mode_object base;
 
+	/**
+	 * @name:
+	 *
+	 * Human-readable name of the mode, filled out with drm_mode_set_name().
+	 */
 	char name[DRM_DISPLAY_MODE_LEN];
 
+	/**
+	 * @status:
+	 *
+	 * Status of the mode, used to filter out modes not supported by the
+	 * hardware. See enum &drm_mode_status.
+	 */
 	enum drm_mode_status status;
+
+	/**
+	 * @type:
+	 *
+	 * A bitmask of flags, mostly about the source of a mode. Possible flags
+	 * are:
+	 *
+	 *  - DRM_MODE_TYPE_BUILTIN: Meant for hard-coded modes, effectively
+	 *    unused.
+	 *  - DRM_MODE_TYPE_PREFERRED: Preferred mode, usually the native
+	 *    resolution of an LCD panel. There should only be one preferred
+	 *    mode per connector at any given time.
+	 *  - DRM_MODE_TYPE_DRIVER: Mode created by the driver, which is all of
+	 *    them really. Drivers must set this bit for all modes they create
+	 *    and expose to userspace.
+	 *
+	 * Plus a big list of flags which shouldn't be used at all, but are
+	 * still around since these flags are also used in the userspace ABI:
+	 *
+	 *  - DRM_MODE_TYPE_DEFAULT: Again a leftover, use
+	 *    DRM_MODE_TYPE_PREFERRED instead.
+	 *  - DRM_MODE_TYPE_CLOCK_C and DRM_MODE_TYPE_CRTC_C: Define leftovers
+	 *    which are stuck around for hysterical raisins only. No one has an
+	 *    idea what they were meant for. Don't use.
+	 *  - DRM_MODE_TYPE_USERDEF: Mode defined by userspace, again a vestige
+	 *    from older kms designs where userspace had to first add a custom
+	 *    mode to the kernel's mode list before it could use it. Don't use.
+	 */
 	unsigned int type;
 
-	/* Proposed mode values */
+	/**
+	 * @clock:
+	 *
+	 * Pixel clock in kHz.
+	 */
 	int clock;		/* in kHz */
 	int hdisplay;
 	int hsync_start;
@@ -118,14 +271,74 @@
 	int vsync_end;
 	int vtotal;
 	int vscan;
+	/**
+	 * @flags:
+	 *
+	 * Sync and timing flags:
+	 *
+	 *  - DRM_MODE_FLAG_PHSYNC: horizontal sync is active high.
+	 *  - DRM_MODE_FLAG_NHSYNC: horizontal sync is active low.
+	 *  - DRM_MODE_FLAG_PVSYNC: vertical sync is active high.
+	 *  - DRM_MODE_FLAG_NVSYNC: vertical sync is active low.
+	 *  - DRM_MODE_FLAG_INTERLACE: mode is interlaced.
+	 *  - DRM_MODE_FLAG_DBLSCAN: mode uses doublescan.
+	 *  - DRM_MODE_FLAG_CSYNC: mode uses composite sync.
+	 *  - DRM_MODE_FLAG_PCSYNC: composite sync is active high.
+	 *  - DRM_MODE_FLAG_NCSYNC: composite sync is active low.
+	 *  - DRM_MODE_FLAG_HSKEW: hskew provided (not used?).
+	 *  - DRM_MODE_FLAG_BCAST: not used?
+	 *  - DRM_MODE_FLAG_PIXMUX: not used?
+	 *  - DRM_MODE_FLAG_DBLCLK: double-clocked mode.
+	 *  - DRM_MODE_FLAG_CLKDIV2: half-clocked mode.
+	 *
+	 * Additionally there's flags to specify how 3D modes are packed:
+	 *
+	 *  - DRM_MODE_FLAG_3D_NONE: normal, non-3D mode.
+	 *  - DRM_MODE_FLAG_3D_FRAME_PACKING: 2 full frames for left and right.
+	 *  - DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE: interleaved like fields.
+	 *  - DRM_MODE_FLAG_3D_LINE_ALTERNATIVE: interleaved lines.
+	 *  - DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL: side-by-side full frames.
+	 *  - DRM_MODE_FLAG_3D_L_DEPTH: ?
+	 *  - DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH: ?
+	 *  - DRM_MODE_FLAG_3D_TOP_AND_BOTTOM: frame split into top and bottom
+	 *    parts.
+	 *  - DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF: frame split into left and
+	 *    right parts.
+	 */
 	unsigned int flags;
 
-	/* Addressable image size (may be 0 for projectors, etc.) */
+	/**
+	 * @width_mm:
+	 *
+	 * Addressable size of the output in mm, projectors should set this to
+	 * 0.
+	 */
 	int width_mm;
+
+	/**
+	 * @height_mm:
+	 *
+	 * Addressable size of the output in mm, projectors should set this to
+	 * 0.
+	 */
 	int height_mm;
 
-	/* Actual mode we give to hw */
-	int crtc_clock;		/* in KHz */
+	/**
+	 * @crtc_clock:
+	 *
+	 * Actual pixel or dot clock in the hardware. This differs from the
+	 * logical @clock when e.g. using interlacing, double-clocking, stereo
+	 * modes or other fancy stuff that changes the timings and signals
+	 * actually sent over the wire.
+	 *
+	 * This is again in kHz.
+	 *
+	 * Note that with digital outputs like HDMI or DP there's usually a
+	 * massive confusion between the dot clock and the signal clock at the
+	 * bit encoding level. Especially when a 8b/10b encoding is used and the
+	 * difference is exactly a factor of 10.
+	 */
+	int crtc_clock;
 	int crtc_hdisplay;
 	int crtc_hblank_start;
 	int crtc_hblank_end;
@@ -140,12 +353,48 @@
 	int crtc_vsync_end;
 	int crtc_vtotal;
 
-	/* Driver private mode info */
+	/**
+	 * @private:
+	 *
+	 * Pointer for driver private data. This can only be used for mode
+	 * objects passed to drivers in modeset operations. It shouldn't be used
+	 * by atomic drivers since they can store any additional data by
+	 * subclassing state structures.
+	 */
 	int *private;
+
+	/**
+	 * @private_flags:
+	 *
+	 * Similar to @private, but just an integer.
+	 */
 	int private_flags;
 
-	int vrefresh;		/* in Hz */
-	int hsync;		/* in kHz */
+	/**
+	 * @vrefresh:
+	 *
+	 * Vertical refresh rate, for debug output in human readable form. Not
+	 * used in a functional way.
+	 *
+	 * This value is in Hz.
+	 */
+	int vrefresh;
+
+	/**
+	 * @hsync:
+	 *
+	 * Horizontal refresh rate, for debug output in human readable form. Not
+	 * used in a functional way.
+	 *
+	 * This value is in kHz.
+	 */
+	int hsync;
+
+	/**
+	 * @picture_aspect_ratio:
+	 *
+	 * Field for setting the HDMI picture aspect ratio of a mode.
+	 */
 	enum hdmi_picture_aspect picture_aspect_ratio;
 };
 
@@ -222,6 +471,8 @@
 					    const struct drm_display_mode *mode);
 bool drm_mode_equal(const struct drm_display_mode *mode1,
 		    const struct drm_display_mode *mode2);
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1,
+			      const struct drm_display_mode *mode2);
 bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1,
 					const struct drm_display_mode *mode2);
 
@@ -232,7 +483,7 @@
 void drm_mode_prune_invalid(struct drm_device *dev,
 			    struct list_head *mode_list, bool verbose);
 void drm_mode_sort(struct list_head *mode_list);
-void drm_mode_connector_list_update(struct drm_connector *connector, bool merge_type_bits);
+void drm_mode_connector_list_update(struct drm_connector *connector);
 
 /* parsing cmdline modes */
 bool
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
new file mode 100644
index 0000000..29e0dc5
--- /dev/null
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -0,0 +1,890 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright © 2011-2013 Intel Corporation
+ * Copyright © 2015 Intel Corporation
+ *   Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __DRM_MODESET_HELPER_VTABLES_H__
+#define __DRM_MODESET_HELPER_VTABLES_H__
+
+#include <drm/drm_crtc.h>
+
+/**
+ * DOC: overview
+ *
+ * The DRM mode setting helper functions are common code for drivers to use if
+ * they wish.  Drivers are not forced to use this code in their
+ * implementations but it would be useful if the code they do use at least
+ * provides a consistent interface and operation to userspace. Therefore it is
+ * highly recommended to use the provided helpers as much as possible.
+ *
+ * Because there is only one pointer per modeset object to hold a vfunc table
+ * for helper libraries they are by necessity shared among the different
+ * helpers.
+ *
+ * To make this clear all the helper vtables are pulled together in this location here.
+ */
+
+enum mode_set_atomic;
+
+/**
+ * struct drm_crtc_helper_funcs - helper operations for CRTCs
+ *
+ * These hooks are used by the legacy CRTC helpers, the transitional plane
+ * helpers and the new atomic modesetting helpers.
+ */
+struct drm_crtc_helper_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Callback to control power levels on the CRTC.  If the mode passed in
+	 * is unsupported, the provider must use the next lowest power level.
+	 * This is used by the legacy CRTC helpers to implement DPMS
+	 * functionality in drm_helper_connector_dpms().
+	 *
+	 * This callback is also used to disable a CRTC by calling it with
+	 * DRM_MODE_DPMS_OFF if the @disable hook isn't used.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling and disabling a CRTC to
+	 * facilitate transitions to atomic, but it is deprecated. Instead
+	 * @enable and @disable should be used.
+	 */
+	void (*dpms)(struct drm_crtc *crtc, int mode);
+
+	/**
+	 * @prepare:
+	 *
+	 * This callback should prepare the CRTC for a subsequent modeset, which
+	 * in practice means the driver should disable the CRTC if it is
+	 * running. Most drivers ended up implementing this by calling their
+	 * @dpms hook with DRM_MODE_DPMS_OFF.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for disabling a CRTC to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @disable should
+	 * be used.
+	 */
+	void (*prepare)(struct drm_crtc *crtc);
+
+	/**
+	 * @commit:
+	 *
+	 * This callback should commit the new mode on the CRTC after a modeset,
+	 * which in practice means the driver should enable the CRTC.  Most
+	 * drivers ended up implementing this by calling their @dpms hook with
+	 * DRM_MODE_DPMS_ON.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling a CRTC to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @enable should
+	 * be used.
+	 */
+	void (*commit)(struct drm_crtc *crtc);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate a mode. The parameter mode is the
+	 * display mode that userspace requested, adjusted_mode is the mode the
+	 * encoders need to be fed with. Note that this is the inverse semantics
+	 * of the meaning for the &drm_encoder and &drm_bridge
+	 * ->mode_fixup() functions. If the CRTC cannot support the requested
+	 * conversion from mode to adjusted_mode it should reject the modeset.
+	 *
+	 * This function is used by both legacy CRTC helpers and atomic helpers.
+	 * With atomic helpers it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Atomic drivers
+	 * MUST NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in adjusted_mode parameter.
+	 *
+	 * This is in contrast to the legacy CRTC helpers where this was
+	 * allowed.
+	 *
+	 * Atomic drivers which need to inspect and adjust more state should
+	 * instead use the @atomic_check callback.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
+	bool (*mode_fixup)(struct drm_crtc *crtc,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback is used by the legacy CRTC helpers to set a new mode,
+	 * position and framebuffer. Since it ties the primary plane to every
+	 * mode change it is incompatible with universal plane support. And
+	 * since it can't update other planes it's incompatible with atomic
+	 * modeset support.
+	 *
+	 * This callback is only used by CRTC helpers and deprecated.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode, int x, int y,
+			struct drm_framebuffer *old_fb);
+
+	/**
+	 * @mode_set_nofb:
+	 *
+	 * This callback is used to update the display mode of a CRTC without
+	 * changing anything of the primary plane configuration. This fits the
+	 * requirement of atomic and hence is used by the atomic helpers. It is
+	 * also used by the transitional plane helpers to implement a
+	 * @mode_set hook in drm_helper_crtc_mode_set().
+	 *
+	 * Note that the display pipe is completely off when this function is
+	 * called. Atomic drivers which need hardware to be running before they
+	 * program the new display mode (e.g. because they implement runtime PM)
+	 * should not use this hook. This is because the helper library calls
+	 * this hook only once per mode change and not every time the display
+	 * pipeline is suspended using either DPMS or the new "ACTIVE" property.
+	 * Which means register values set in this callback might get reset when
+	 * the CRTC is suspended, but not restored.  Such drivers should instead
+	 * move all their CRTC setup into the @enable callback.
+	 *
+	 * This callback is optional.
+	 */
+	void (*mode_set_nofb)(struct drm_crtc *crtc);
+
+	/**
+	 * @mode_set_base:
+	 *
+	 * This callback is used by the legacy CRTC helpers to set a new
+	 * framebuffer and scanout position. It is optional and used as an
+	 * optimized fast-path instead of a full mode set operation with all the
+	 * resulting flickering. Since it can't update other planes it's
+	 * incompatible with atomic modeset support.
+	 *
+	 * This callback is only used by the CRTC helpers and deprecated.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
+			     struct drm_framebuffer *old_fb);
+
+	/**
+	 * @mode_set_base_atomic:
+	 *
+	 * This callback is used by the fbdev helpers to set a new framebuffer
+	 * and scanout without sleeping, i.e. from an atomic calling context. It
+	 * is only used to implement kgdb support.
+	 *
+	 * This callback is optional and only needed for kgdb support in the fbdev
+	 * helpers.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb, int x, int y,
+				    enum mode_set_atomic);
+
+	/**
+	 * @load_lut:
+	 *
+	 * Load a LUT prepared with the @gamma_set functions from
+	 * &drm_fb_helper_funcs.
+	 *
+	 * This callback is optional and is only used by the fbdev emulation
+	 * helpers.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
+	void (*load_lut)(struct drm_crtc *crtc);
+
+	/**
+	 * @disable:
+	 *
+	 * This callback should be used to disable the CRTC. With the atomic
+	 * drivers it is called after all encoders connected to this CRTC have
+	 * been shut off already using their own ->disable hook. If that
+	 * sequence is too simple drivers can just add their own hooks and call
+	 * it from this CRTC callback here by looping over all encoders
+	 * connected to it using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used both by legacy CRTC helpers and atomic helpers.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * disable anything at the CRTC level. To ensure that runtime PM
+	 * handling (using either DPMS or the new "ACTIVE" property) works
+	 * @disable must be the inverse of @enable for atomic drivers.
+	 *
+	 * NOTE:
+	 *
+	 * With legacy CRTC helpers there's a big semantic difference between
+	 * @disable and other hooks (like @prepare or @dpms) used to shut down a
+	 * CRTC: @disable is only called when also logically disabling the
+	 * display pipeline and needs to release any resources acquired in
+	 * @mode_set (like shared PLLs, or again release pinned framebuffers).
+	 *
+	 * Therefore @disable must be the inverse of @mode_set plus @commit for
+	 * drivers still using legacy CRTC helpers, which is different from the
+	 * rules under atomic.
+	 */
+	void (*disable)(struct drm_crtc *crtc);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should be used to enable the CRTC. With the atomic
+	 * drivers it is called before all encoders connected to this CRTC are
+	 * enabled through the encoder's own ->enable hook.  If that sequence is
+	 * too simple drivers can just add their own hooks and call it from this
+	 * CRTC callback here by looping over all encoders connected to it using
+	 * for_each_encoder_on_crtc().
+	 *
+	 * This hook is used only by atomic helpers, for symmetry with @disable.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * enable anything at the CRTC level. To ensure that runtime PM handling
+	 * (using either DPMS or the new "ACTIVE" property) works
+	 * @enable must be the inverse of @disable for atomic drivers.
+	 */
+	void (*enable)(struct drm_crtc *crtc);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * Drivers should check plane-update related CRTC constraints in this
+	 * hook. They can also check mode related limitations but need to be
+	 * aware of the calling order, since this hook is used by
+	 * drm_atomic_helper_check_planes() whereas the preparations needed to
+	 * check output routing and the display mode is done in
+	 * drm_atomic_helper_check_modeset(). Therefore drivers that want to
+	 * check output routing and display mode constraints in this callback
+	 * must ensure that drm_atomic_helper_check_modeset() has been called
+	 * beforehand. This is calling order used by the default helper
+	 * implementation in drm_atomic_helper_check().
+	 *
+	 * When using drm_atomic_helper_check_planes() CRTCs' ->atomic_check()
+	 * hooks are called after the ones for planes, which allows drivers to
+	 * assign shared resources requested by planes in the CRTC callback
+	 * here. For more complicated dependencies the driver can call the provided
+	 * check helpers multiple times until the computed state has a final
+	 * configuration and everything has been checked.
+	 *
+	 * This function is also allowed to inspect any other object's state and
+	 * can add more state objects to the atomic commit if needed. Care must
+	 * be taken though to ensure that state check&compute functions for
+	 * these added states are all called, and derived state in other objects
+	 * all updated. Again the recommendation is to just call check helpers
+	 * until a maximal configuration is reached.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_crtc *crtc,
+			    struct drm_crtc_state *state);
+
+	/**
+	 * @atomic_begin:
+	 *
+	 * Drivers should prepare for an atomic update of multiple planes on
+	 * a CRTC in this hook. Depending upon hardware this might be vblank
+	 * evasion, blocking updates by setting bits or doing preparatory work
+	 * for e.g. manual update display.
+	 *
+	 * This hook is called before any plane commit functions are called.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_begin)(struct drm_crtc *crtc,
+			     struct drm_crtc_state *old_crtc_state);
+	/**
+	 * @atomic_flush:
+	 *
+	 * Drivers should finalize an atomic update of multiple planes on
+	 * a CRTC in this hook. Depending upon hardware this might include
+	 * checking that vblank evasion was successful, unblocking updates by
+	 * setting bits or setting the GO bit to flush out all updates.
+	 *
+	 * Simple hardware or hardware with special requirements can commit and
+	 * flush out all updates for all planes from this hook and forgo all the
+	 * other commit hooks for plane updates.
+	 *
+	 * This hook is called after any plane commit functions are called.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_flush)(struct drm_crtc *crtc,
+			     struct drm_crtc_state *old_crtc_state);
+};
+
+/**
+ * drm_crtc_helper_add - sets the helper vtable for a crtc
+ * @crtc: DRM CRTC
+ * @funcs: helper vtable to set for @crtc
+ */
+static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
+				       const struct drm_crtc_helper_funcs *funcs)
+{
+	crtc->helper_private = funcs;
+}
+
+/**
+ * struct drm_encoder_helper_funcs - helper operations for encoders
+ *
+ * These hooks are used by the legacy CRTC helpers, the transitional plane
+ * helpers and the new atomic modesetting helpers.
+ */
+struct drm_encoder_helper_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Callback to control power levels on the encoder.  If the mode passed in
+	 * is unsupported, the provider must use the next lowest power level.
+	 * This is used by the legacy encoder helpers to implement DPMS
+	 * functionality in drm_helper_connector_dpms().
+	 *
+	 * This callback is also used to disable an encoder by calling it with
+	 * DRM_MODE_DPMS_OFF if the @disable hook isn't used.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling and disabling an encoder to
+	 * facilitate transitions to atomic, but it is deprecated. Instead
+	 * @enable and @disable should be used.
+	 */
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate and adjust a mode. The parameter
+	 * mode is the display mode that should be fed to the next element in
+	 * the display chain, either the final &drm_connector or a &drm_bridge.
+	 * The parameter adjusted_mode is the input mode the encoder requires. It
+	 * can be modified by this callback and does not need to match mode.
+	 *
+	 * This function is used by both legacy CRTC helpers and atomic helpers.
+	 * With atomic helpers it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Atomic drivers
+	 * MUST NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in adjusted_mode parameter.
+	 *
+	 * This is in contrast to the legacy CRTC helpers where this was
+	 * allowed.
+	 *
+	 * Atomic drivers which need to inspect and adjust more state should
+	 * instead use the @atomic_check callback.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @prepare:
+	 *
+	 * This callback should prepare the encoder for a subsequent modeset,
+	 * which in practice means the driver should disable the encoder if it
+	 * is running. Most drivers ended up implementing this by calling their
+	 * @dpms hook with DRM_MODE_DPMS_OFF.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for disabling an encoder to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @disable should
+	 * be used.
+	 */
+	void (*prepare)(struct drm_encoder *encoder);
+
+	/**
+	 * @commit:
+	 *
+	 * This callback should commit the new mode on the encoder after a modeset,
+	 * which in practice means the driver should enable the encoder.  Most
+	 * drivers ended up implementing this by calling their @dpms hook with
+	 * DRM_MODE_DPMS_ON.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling an encoder to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @enable should
+	 * be used.
+	 */
+	void (*commit)(struct drm_encoder *encoder);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback is used to update the display mode of an encoder.
+	 *
+	 * Note that the display pipe is completely off when this function is
+	 * called. Drivers which need hardware to be running before they program
+	 * the new display mode (because they implement runtime PM) should not
+	 * use this hook, because the helper library calls it only once and not
+	 * every time the display pipeline is suspend using either DPMS or the
+	 * new "ACTIVE" property. Such drivers should instead move all their
+	 * encoder setup into the ->enable() callback.
+	 *
+	 * This callback is used both by the legacy CRTC helpers and the atomic
+	 * modeset helpers. It is optional in the atomic helpers.
+	 */
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @get_crtc:
+	 *
+	 * This callback is used by the legacy CRTC helpers to work around
+	 * deficiencies in its own book-keeping.
+	 *
+	 * Do not use, use atomic helpers instead, which get the book keeping
+	 * right.
+	 *
+	 * FIXME:
+	 *
+	 * Currently only nouveau is using this, and as soon as nouveau is
+	 * atomic we can ditch this hook.
+	 */
+	struct drm_crtc *(*get_crtc)(struct drm_encoder *encoder);
+
+	/**
+	 * @detect:
+	 *
+	 * This callback can be used by drivers who want to do detection on the
+	 * encoder object instead of in connector functions.
+	 *
+	 * It is not used by any helper and therefore has purely driver-specific
+	 * semantics. New drivers shouldn't use this and instead just implement
+	 * their own private callbacks.
+	 *
+	 * FIXME:
+	 *
+	 * This should just be converted into a pile of driver vfuncs.
+	 * Currently radeon, amdgpu and nouveau are using it.
+	 */
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+
+	/**
+	 * @disable:
+	 *
+	 * This callback should be used to disable the encoder. With the atomic
+	 * drivers it is called before this encoder's CRTC has been shut off
+	 * using the CRTC's own ->disable hook.  If that sequence is too simple
+	 * drivers can just add their own driver private encoder hooks and call
+	 * them from CRTC's callback by looping over all encoders connected to
+	 * it using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used both by legacy CRTC helpers and atomic helpers.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * disable anything at the encoder level. To ensure that runtime PM
+	 * handling (using either DPMS or the new "ACTIVE" property) works
+	 * @disable must be the inverse of @enable for atomic drivers.
+	 *
+	 * NOTE:
+	 *
+	 * With legacy CRTC helpers there's a big semantic difference between
+	 * @disable and other hooks (like @prepare or @dpms) used to shut down a
+	 * encoder: @disable is only called when also logically disabling the
+	 * display pipeline and needs to release any resources acquired in
+	 * @mode_set (like shared PLLs, or again release pinned framebuffers).
+	 *
+	 * Therefore @disable must be the inverse of @mode_set plus @commit for
+	 * drivers still using legacy CRTC helpers, which is different from the
+	 * rules under atomic.
+	 */
+	void (*disable)(struct drm_encoder *encoder);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should be used to enable the encoder. With the atomic
+	 * drivers it is called after this encoder's CRTC has been enabled using
+	 * the CRTC's own ->enable hook.  If that sequence is too simple drivers
+	 * can just add their own driver private encoder hooks and call them
+	 * from CRTC's callback by looping over all encoders connected to it
+	 * using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used only by atomic helpers, for symmetry with @disable.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * enable anything at the encoder level. To ensure that runtime PM handling
+	 * (using either DPMS or the new "ACTIVE" property) works
+	 * @enable must be the inverse of @disable for atomic drivers.
+	 */
+	void (*enable)(struct drm_encoder *encoder);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * This callback is used to validate encoder state for atomic drivers.
+	 * Since the encoder is the object connecting the CRTC and connector it
+	 * gets passed both states, to be able to validate interactions and
+	 * update the CRTC to match what the encoder needs for the requested
+	 * connector.
+	 *
+	 * This function is used by the atomic helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_encoder *encoder,
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_connector_state *conn_state);
+};
+
+/**
+ * drm_encoder_helper_add - sets the helper vtable for an encoder
+ * @encoder: DRM encoder
+ * @funcs: helper vtable to set for @encoder
+ */
+static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
+					  const struct drm_encoder_helper_funcs *funcs)
+{
+	encoder->helper_private = funcs;
+}
+
+/**
+ * struct drm_connector_helper_funcs - helper operations for connectors
+ *
+ * These functions are used by the atomic and legacy modeset helpers and by the
+ * probe helpers.
+ */
+struct drm_connector_helper_funcs {
+	/**
+	 * @get_modes:
+	 *
+	 * This function should fill in all modes currently valid for the sink
+	 * into the connector->probed_modes list. It should also update the
+	 * EDID property by calling drm_mode_connector_update_edid_property().
+	 *
+	 * The usual way to implement this is to cache the EDID retrieved in the
+	 * probe callback somewhere in the driver-private connector structure.
+	 * In this function drivers then parse the modes in the EDID and add
+	 * them by calling drm_add_edid_modes(). But connectors that driver a
+	 * fixed panel can also manually add specific modes using
+	 * drm_mode_probed_add(). Finally drivers that support audio probably
+	 * want to update the ELD data, too, using drm_edid_to_eld().
+	 *
+	 * This function is only called after the ->detect() hook has indicated
+	 * that a sink is connected and when the EDID isn't overridden through
+	 * sysfs or the kernel commandline.
+	 *
+	 * This callback is used by the probe helpers in e.g.
+	 * drm_helper_probe_single_connector_modes().
+	 *
+	 * RETURNS:
+	 *
+	 * The number of modes added by calling drm_mode_probed_add().
+	 */
+	int (*get_modes)(struct drm_connector *connector);
+
+	/**
+	 * @mode_valid:
+	 *
+	 * Callback to validate a mode for a connector, irrespective of the
+	 * specific display configuration.
+	 *
+	 * This callback is used by the probe helpers to filter the mode list
+	 * (which is usually derived from the EDID data block from the sink).
+	 * See e.g. drm_helper_probe_single_connector_modes().
+	 *
+	 * NOTE:
+	 *
+	 * This only filters the mode list supplied to userspace in the
+	 * GETCONNECOTR IOCTL. Userspace is free to create modes of its own and
+	 * ask the kernel to use them. It this case the atomic helpers or legacy
+	 * CRTC helpers will not call this function. Drivers therefore must
+	 * still fully validate any mode passed in in a modeset request.
+	 *
+	 * RETURNS:
+	 *
+	 * Either MODE_OK or one of the failure reasons in enum
+	 * &drm_mode_status.
+	 */
+	enum drm_mode_status (*mode_valid)(struct drm_connector *connector,
+					   struct drm_display_mode *mode);
+	/**
+	 * @best_encoder:
+	 *
+	 * This function should select the best encoder for the given connector.
+	 *
+	 * This function is used by both the atomic helpers (in the
+	 * drm_atomic_helper_check_modeset() function) and in the legacy CRTC
+	 * helpers.
+	 *
+	 * NOTE:
+	 *
+	 * In atomic drivers this function is called in the check phase of an
+	 * atomic update. The driver is not allowed to change or inspect
+	 * anything outside of arguments passed-in. Atomic drivers which need to
+	 * inspect dynamic configuration state should instead use
+	 * @atomic_best_encoder.
+	 *
+	 * RETURNS:
+	 *
+	 * Encoder that should be used for the given connector and connector
+	 * state, or NULL if no suitable encoder exists. Note that the helpers
+	 * will ensure that encoders aren't used twice, drivers should not check
+	 * for this.
+	 */
+	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
+
+	/**
+	 * @atomic_best_encoder:
+	 *
+	 * This is the atomic version of @best_encoder for atomic drivers which
+	 * need to select the best encoder depending upon the desired
+	 * configuration and can't select it statically.
+	 *
+	 * This function is used by drm_atomic_helper_check_modeset() and either
+	 * this or @best_encoder is required.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * Encoder that should be used for the given connector and connector
+	 * state, or NULL if no suitable encoder exists. Note that the helpers
+	 * will ensure that encoders aren't used twice, drivers should not check
+	 * for this.
+	 */
+	struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector,
+						   struct drm_connector_state *connector_state);
+};
+
+/**
+ * drm_connector_helper_add - sets the helper vtable for a connector
+ * @connector: DRM connector
+ * @funcs: helper vtable to set for @connector
+ */
+static inline void drm_connector_helper_add(struct drm_connector *connector,
+					    const struct drm_connector_helper_funcs *funcs)
+{
+	connector->helper_private = funcs;
+}
+
+/**
+ * struct drm_plane_helper_funcs - helper operations for planes
+ *
+ * These functions are used by the atomic helpers and by the transitional plane
+ * helpers.
+ */
+struct drm_plane_helper_funcs {
+	/**
+	 * @prepare_fb:
+	 *
+	 * This hook is to prepare a framebuffer for scanout by e.g. pinning
+	 * it's backing storage or relocating it into a contiguous block of
+	 * VRAM. Other possible preparatory work includes flushing caches.
+	 *
+	 * This function must not block for outstanding rendering, since it is
+	 * called in the context of the atomic IOCTL even for async commits to
+	 * be able to return any errors to userspace. Instead the recommended
+	 * way is to fill out the fence member of the passed-in
+	 * &drm_plane_state. If the driver doesn't support native fences then
+	 * equivalent functionality should be implemented through private
+	 * members in the plane structure.
+	 *
+	 * The helpers will call @cleanup_fb with matching arguments for every
+	 * successful call to this hook.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the following negative error codes allowed by
+	 * the atomic_commit hook in &drm_mode_config_funcs. When using helpers
+	 * this callback is the only one which can fail an atomic commit,
+	 * everything else must complete successfully.
+	 */
+	int (*prepare_fb)(struct drm_plane *plane,
+			  const struct drm_plane_state *new_state);
+	/**
+	 * @cleanup_fb:
+	 *
+	 * This hook is called to clean up any resources allocated for the given
+	 * framebuffer and plane configuration in @prepare_fb.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*cleanup_fb)(struct drm_plane *plane,
+			   const struct drm_plane_state *old_state);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * Drivers should check plane specific constraints in this hook.
+	 *
+	 * When using drm_atomic_helper_check_planes() plane's ->atomic_check()
+	 * hooks are called before the ones for CRTCs, which allows drivers to
+	 * request shared resources that the CRTC controls here. For more
+	 * complicated dependencies the driver can call the provided check helpers
+	 * multiple times until the computed state has a final configuration and
+	 * everything has been checked.
+	 *
+	 * This function is also allowed to inspect any other object's state and
+	 * can add more state objects to the atomic commit if needed. Care must
+	 * be taken though to ensure that state check&compute functions for
+	 * these added states are all called, and derived state in other objects
+	 * all updated. Again the recommendation is to just call check helpers
+	 * until a maximal configuration is reached.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_plane *plane,
+			    struct drm_plane_state *state);
+
+	/**
+	 * @atomic_update:
+	 *
+	 * Drivers should use this function to update the plane state.  This
+	 * hook is called in-between the ->atomic_begin() and
+	 * ->atomic_flush() of &drm_crtc_helper_funcs.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_update)(struct drm_plane *plane,
+			      struct drm_plane_state *old_state);
+	/**
+	 * @atomic_disable:
+	 *
+	 * Drivers should use this function to unconditionally disable a plane.
+	 * This hook is called in-between the ->atomic_begin() and
+	 * ->atomic_flush() of &drm_crtc_helper_funcs. It is an alternative to
+	 * @atomic_update, which will be called for disabling planes, too, if
+	 * the @atomic_disable hook isn't implemented.
+	 *
+	 * This hook is also useful to disable planes in preparation of a modeset,
+	 * by calling drm_atomic_helper_disable_planes_on_crtc() from the
+	 * ->disable() hook in &drm_crtc_helper_funcs.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_disable)(struct drm_plane *plane,
+			       struct drm_plane_state *old_state);
+};
+
+/**
+ * drm_plane_helper_add - sets the helper vtable for a plane
+ * @plane: DRM plane
+ * @funcs: helper vtable to set for @plane
+ */
+static inline void drm_plane_helper_add(struct drm_plane *plane,
+					const struct drm_plane_helper_funcs *funcs)
+{
+	plane->helper_private = funcs;
+}
+
+#endif
diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
index 94938d8..c5576fb 100644
--- a/include/drm/drm_modeset_lock.h
+++ b/include/drm/drm_modeset_lock.h
@@ -138,7 +138,7 @@
 struct drm_modeset_acquire_ctx *
 drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc);
 
-int drm_modeset_lock_all_crtcs(struct drm_device *dev,
-		struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_lock_all_ctx(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx);
 
 #endif /* DRM_MODESET_LOCK_H_ */
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index 5a7f9d4..4421f3f 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -26,6 +26,7 @@
 
 #include <drm/drm_rect.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_modeset_helper_vtables.h>
 
 /*
  * Drivers that don't allow primary plane scaling may pass this macro in place
@@ -36,46 +37,9 @@
  */
 #define DRM_PLANE_HELPER_NO_SCALING (1<<16)
 
-/**
- * DOC: plane helpers
- *
- * Helper functions to assist with creation and handling of CRTC primary
- * planes.
- */
-
 int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
 		  const struct drm_crtc_funcs *funcs);
 
-/**
- * drm_plane_helper_funcs - helper operations for CRTCs
- * @prepare_fb: prepare a framebuffer for use by the plane
- * @cleanup_fb: cleanup a framebuffer when it's no longer used by the plane
- * @atomic_check: check that a given atomic state is valid and can be applied
- * @atomic_update: apply an atomic state to the plane (mandatory)
- * @atomic_disable: disable the plane
- *
- * The helper operations are called by the mid-layer CRTC helper.
- */
-struct drm_plane_helper_funcs {
-	int (*prepare_fb)(struct drm_plane *plane,
-			  const struct drm_plane_state *new_state);
-	void (*cleanup_fb)(struct drm_plane *plane,
-			   const struct drm_plane_state *old_state);
-
-	int (*atomic_check)(struct drm_plane *plane,
-			    struct drm_plane_state *state);
-	void (*atomic_update)(struct drm_plane *plane,
-			      struct drm_plane_state *old_state);
-	void (*atomic_disable)(struct drm_plane *plane,
-			       struct drm_plane_state *old_state);
-};
-
-static inline void drm_plane_helper_add(struct drm_plane *plane,
-					const struct drm_plane_helper_funcs *funcs)
-{
-	plane->helper_private = funcs;
-}
-
 int drm_plane_helper_check_update(struct drm_plane *plane,
 				  struct drm_crtc *crtc,
 				  struct drm_framebuffer *fb,
diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h
index 26bb55e..83bb156 100644
--- a/include/drm/drm_rect.h
+++ b/include/drm/drm_rect.h
@@ -162,7 +162,8 @@
 int drm_rect_calc_vscale_relaxed(struct drm_rect *src,
 				 struct drm_rect *dst,
 				 int min_vscale, int max_vscale);
-void drm_rect_debug_print(const struct drm_rect *r, bool fixed_point);
+void drm_rect_debug_print(const char *prefix,
+			  const struct drm_rect *r, bool fixed_point);
 void drm_rect_rotate(struct drm_rect *r,
 		     int width, int height,
 		     unsigned int rotation);
diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h
index 30d89e0..fab1385 100644
--- a/include/drm/i915_component.h
+++ b/include/drm/i915_component.h
@@ -31,47 +31,80 @@
 #define MAX_PORTS 5
 
 /**
- * struct i915_audio_component_ops - callbacks defined in gfx driver
- * @owner: the module owner
- * @get_power: get the POWER_DOMAIN_AUDIO power well
- * @put_power: put the POWER_DOMAIN_AUDIO power well
- * @codec_wake_override: Enable/Disable generating the codec wake signal
- * @get_cdclk_freq: get the Core Display Clock in KHz
- * @sync_audio_rate: set n/cts based on the sample rate
+ * struct i915_audio_component_ops - Ops implemented by i915 driver, called by hda driver
  */
 struct i915_audio_component_ops {
+	/**
+	 * @owner: i915 module
+	 */
 	struct module *owner;
+	/**
+	 * @get_power: get the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Request the power well to be turned on.
+	 */
 	void (*get_power)(struct device *);
+	/**
+	 * @put_power: put the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Allow the power well to be turned off.
+	 */
 	void (*put_power)(struct device *);
+	/**
+	 * @codec_wake_override: Enable/disable codec wake signal
+	 */
 	void (*codec_wake_override)(struct device *, bool enable);
+	/**
+	 * @get_cdclk_freq: Get the Core Display Clock in kHz
+	 */
 	int (*get_cdclk_freq)(struct device *);
+	/**
+	 * @sync_audio_rate: set n/cts based on the sample rate
+	 *
+	 * Called from audio driver. After audio driver sets the
+	 * sample rate, it will call this function to set n/cts
+	 */
 	int (*sync_audio_rate)(struct device *, int port, int rate);
 };
 
+/**
+ * struct i915_audio_component_audio_ops - Ops implemented by hda driver, called by i915 driver
+ */
 struct i915_audio_component_audio_ops {
+	/**
+	 * @audio_ptr: Pointer to be used in call to pin_eld_notify
+	 */
 	void *audio_ptr;
 	/**
-	 * Call from i915 driver, notifying the HDA driver that
-	 * pin sense and/or ELD information has changed.
-	 * @audio_ptr:		HDA driver object
-	 * @port:	Which port has changed (PORTA / PORTB / PORTC etc)
+	 * @pin_eld_notify: Notify the HDA driver that pin sense and/or ELD information has changed
+	 *
+	 * Called when the i915 driver has set up audio pipeline or has just
+	 * begun to tear it down. This allows the HDA driver to update its
+	 * status accordingly (even when the HDA controller is in power save
+	 * mode).
 	 */
 	void (*pin_eld_notify)(void *audio_ptr, int port);
 };
 
 /**
- * struct i915_audio_component - used for audio video interaction
- * @dev: the device from gfx driver
- * @aud_sample_rate: the array of audio sample rate per port
- * @ops: callback for audio driver calling
- * @audio_ops: Call from i915 driver
+ * struct i915_audio_component - Used for direct communication between i915 and hda drivers
  */
 struct i915_audio_component {
+	/**
+	 * @dev: i915 device, used as parameter for ops
+	 */
 	struct device *dev;
+	/**
+	 * @aud_sample_rate: the array of audio sample rate per port
+	 */
 	int aud_sample_rate[MAX_PORTS];
-
+	/**
+	 * @ops: Ops implemented by i915 driver, called by hda driver
+	 */
 	const struct i915_audio_component_ops *ops;
-
+	/**
+	 * @audio_ops: Ops implemented by hda driver, called by i915 driver
+	 */
 	const struct i915_audio_component_audio_ops *audio_ops;
 };
 
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 17c4456..f970209 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -279,16 +279,59 @@
 #define INTEL_SKL_GT3_IDS(info) \
 	INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
-	INTEL_VGA_DEVICE(0x192A, info) /* SRV GT3 */ \
+	INTEL_VGA_DEVICE(0x192A, info)  /* SRV GT3 */
 
-#define INTEL_SKL_IDS(info) \
+#define INTEL_SKL_GT4_IDS(info) \
+	INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \
+	INTEL_VGA_DEVICE(0x193B, info), /* Halo GT4 */ \
+	INTEL_VGA_DEVICE(0x193D, info), /* WKS GT4 */ \
+	INTEL_VGA_DEVICE(0x193A, info)  /* SRV GT4 */
+
+#define INTEL_SKL_IDS(info)	 \
 	INTEL_SKL_GT1_IDS(info), \
 	INTEL_SKL_GT2_IDS(info), \
-	INTEL_SKL_GT3_IDS(info)
+	INTEL_SKL_GT3_IDS(info), \
+	INTEL_SKL_GT4_IDS(info)
 
 #define INTEL_BXT_IDS(info) \
 	INTEL_VGA_DEVICE(0x0A84, info), \
 	INTEL_VGA_DEVICE(0x1A84, info), \
 	INTEL_VGA_DEVICE(0x5A84, info)
 
+#define INTEL_KBL_GT1_IDS(info)	\
+	INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5917, info), /* DT  GT1.5 */ \
+	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
+	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
+	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
+	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
+	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
+
+#define INTEL_KBL_GT2_IDS(info)	\
+	INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \
+	INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \
+	INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \
+	INTEL_VGA_DEVICE(0x5912, info), /* DT  GT2 */ \
+	INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \
+	INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \
+	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
+
+#define INTEL_KBL_GT3_IDS(info) \
+	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
+	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
+
+#define INTEL_KBL_GT4_IDS(info) \
+	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
+	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
+	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
+	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
+
+#define INTEL_KBL_IDS(info) \
+	INTEL_KBL_GT1_IDS(info), \
+	INTEL_KBL_GT2_IDS(info), \
+	INTEL_KBL_GT3_IDS(info), \
+	INTEL_KBL_GT4_IDS(info)
+
 #endif /* _I915_PCIIDS_H */
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 813042c..3d4bf08 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -826,10 +826,10 @@
  * reserved, the validation sequence is checked against the validation
  * sequence of the process currently reserving the buffer,
  * and if the current validation sequence is greater than that of the process
- * holding the reservation, the function returns -EAGAIN. Otherwise it sleeps
+ * holding the reservation, the function returns -EDEADLK. Otherwise it sleeps
  * waiting for the buffer to become unreserved, after which it retries
  * reserving.
- * The caller should, when receiving an -EAGAIN error
+ * The caller should, when receiving an -EDEADLK error
  * release all its buffer reservations, wait for @bo to become unreserved, and
  * then rerun the validation with the same validation sequence. This procedure
  * will always guarantee that the process with the lowest validation sequence
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9c747cb..d2f4147 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -342,10 +342,10 @@
 			       struct irq_phys_map *map, bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
 					   int virt_irq, int irq);
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0548339..1991aea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -870,8 +870,8 @@
 }
 
 static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-				const char *name, const char *cells_name,
-				size_t index, struct acpi_reference_args *args)
+				const char *name, size_t index,
+				struct acpi_reference_args *args)
 {
 	return -ENXIO;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3fe27f8..0169ba2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -254,6 +254,7 @@
 	unsigned long		virt_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		max_dev_sectors;
 	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
@@ -773,7 +774,6 @@
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
@@ -794,6 +794,8 @@
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 			 struct scsi_ioctl_command __user *);
 
+extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
+extern void blk_queue_exit(struct request_queue *q);
 extern void blk_start_queue(struct request_queue *q);
 extern void blk_stop_queue(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
@@ -958,7 +960,6 @@
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6..83d1926 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
+	atomic_t usercnt;
 };
 
 struct bpf_map_type_list {
@@ -167,8 +168,10 @@
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
-struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
+void bpf_map_inc(struct bpf_map *map, bool uref);
+void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index a8a335b..758a029 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -197,6 +197,16 @@
 int configfs_register_subsystem(struct configfs_subsystem *subsys);
 void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
 
+int configfs_register_group(struct config_group *parent_group,
+			    struct config_group *group);
+void configfs_unregister_group(struct config_group *group);
+
+struct config_group *
+configfs_register_default_group(struct config_group *parent_group,
+				const char *name,
+				struct config_item_type *item_type);
+void configfs_unregister_default_group(struct config_group *group);
+
 /* These functions can sleep and can alloc with GFP_KERNEL */
 /* WARNING: These cannot be called underneath configfs callbacks!! */
 int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ef4c5b1..177c768 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -77,6 +77,7 @@
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
+	unsigned int		last_policy; /* policy before unplug */
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index cc92268..6ac3cad 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -27,7 +27,7 @@
 #ifdef __KERNEL__
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
-		     const char *options, char **_result, time_t *_expiry);
+		     const char *options, char **_result, time64_t *_expiry);
 
 #endif /* KERNEL */
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6523109..8942af0 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -271,7 +271,7 @@
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
-	return gfp_flags & __GFP_DIRECT_RECLAIM;
+	return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM);
 }
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97..402753b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
-	struct ipv6_txoptions	*opt;
+	struct ipv6_txoptions __rcu	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
 	struct inet6_cork	cork;
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 484604d..e15828f 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,7 +19,6 @@
 #include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
 
 struct kref {
 	atomic_t refcount;
@@ -99,38 +98,6 @@
 	return kref_sub(kref, 1, release);
 }
 
-/**
- * kref_put_spinlock_irqsave - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- * @lock: lock to take in release case
- *
- * Behaves identical to kref_put with one exception.  If the reference count
- * drops to zero, the lock will be taken atomically wrt dropping the reference
- * count.  The release function has to call spin_unlock() without _irqrestore.
- */
-static inline int kref_put_spinlock_irqsave(struct kref *kref,
-		void (*release)(struct kref *kref),
-		spinlock_t *lock)
-{
-	unsigned long flags;
-
-	WARN_ON(release == NULL);
-	if (atomic_add_unless(&kref->refcount, -1, 1))
-		return 0;
-	spin_lock_irqsave(lock, flags);
-	if (atomic_dec_and_test(&kref->refcount)) {
-		release(kref);
-		local_irq_restore(flags);
-		return 1;
-	}
-	spin_unlock_irqrestore(lock, flags);
-	return 0;
-}
-
 static inline int kref_put_mutex(struct kref *kref,
 				 void (*release)(struct kref *kref),
 				 struct mutex *lock)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5706a21..c923350 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,6 +460,17 @@
 	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 	     idx++)
 
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (vcpu->vcpu_id == id)
+			return vcpu;
+	return NULL;
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 69c9057..c6916ae 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -58,7 +58,6 @@
 struct nvm_id_group {
 	u8	mtype;
 	u8	fmtype;
-	u16	res16;
 	u8	num_ch;
 	u8	num_lun;
 	u8	num_pln;
@@ -74,9 +73,9 @@
 	u32	tbet;
 	u32	tbem;
 	u32	mpos;
+	u32	mccap;
 	u16	cpar;
-	u8	res[913];
-} __packed;
+};
 
 struct nvm_addr_format {
 	u8	ch_offset;
@@ -91,19 +90,15 @@
 	u8	pg_len;
 	u8	sect_offset;
 	u8	sect_len;
-	u8	res[4];
 };
 
 struct nvm_id {
 	u8	ver_id;
 	u8	vmnt;
 	u8	cgrps;
-	u8	res[5];
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
-	u8	ppat;
-	u8	resv[224];
 	struct nvm_id_group groups[4];
 } __packed;
 
@@ -123,39 +118,28 @@
 #define NVM_VERSION_MINOR 0
 #define NVM_VERSION_PATCH 0
 
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS  (6)
-#define NVM_PG_BITS  (16)
 #define NVM_BLK_BITS (16)
-#define NVM_LUN_BITS (10)
+#define NVM_PG_BITS  (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS  (8)
+#define NVM_LUN_BITS (8)
 #define NVM_CH_BITS  (8)
 
 struct ppa_addr {
+	/* Generic structure for all addresses */
 	union {
-		/* Channel-based PPA format in nand 4x2x2x2x8x10 */
 		struct {
-			u64 ch		: 4;
-			u64 sec		: 2; /* 4 sectors per page */
-			u64 pl		: 2; /* 4 planes per LUN */
-			u64 lun		: 2; /* 4 LUNs per channel */
-			u64 pg		: 8; /* 256 pages per block */
-			u64 blk		: 10;/* 1024 blocks per plane */
-			u64 resved		: 36;
-		} chnl;
-
-		/* Generic structure for all addresses */
-		struct {
+			u64 blk		: NVM_BLK_BITS;
+			u64 pg		: NVM_PG_BITS;
 			u64 sec		: NVM_SEC_BITS;
 			u64 pl		: NVM_PL_BITS;
-			u64 pg		: NVM_PG_BITS;
-			u64 blk		: NVM_BLK_BITS;
 			u64 lun		: NVM_LUN_BITS;
 			u64 ch		: NVM_CH_BITS;
 		} g;
 
 		u64 ppa;
 	};
-} __packed;
+};
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
@@ -191,11 +175,11 @@
 struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
@@ -210,7 +194,7 @@
 	nvm_id_fn		*identity;
 	nvm_get_l2p_tbl_fn	*get_l2p_tbl;
 	nvm_op_bb_tbl_fn	*get_bb_tbl;
-	nvm_op_set_bb_fn	*set_bb;
+	nvm_op_set_bb_fn	*set_bb_tbl;
 
 	nvm_submit_io_fn	*submit_io;
 	nvm_erase_blk_fn	*erase_block;
@@ -220,7 +204,7 @@
 	nvm_dev_dma_alloc_fn	*dev_dma_alloc;
 	nvm_dev_dma_free_fn	*dev_dma_free;
 
-	uint8_t			max_phys_sect;
+	unsigned int		max_phys_sect;
 };
 
 struct nvm_lun {
@@ -229,7 +213,9 @@
 	int lun_id;
 	int chnl_id;
 
+	unsigned int nr_inuse_blocks;	/* Number of used blocks */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	unsigned int nr_bad_blocks;	/* Number of bad blocks */
 	struct nvm_block *blocks;
 
 	spinlock_t lock;
@@ -263,8 +249,7 @@
 	int blks_per_lun;
 	int sec_size;
 	int oob_size;
-	int addr_mode;
-	struct nvm_addr_format addr_format;
+	struct nvm_addr_format ppaf;
 
 	/* Calculated/Cached values. These do not reflect the actual usable
 	 * blocks at run-time.
@@ -290,118 +275,45 @@
 	char name[DISK_NAME_LEN];
 };
 
-/* fallback conversion */
-static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = r.g.sec +
-		r.g.pg  * dev->sec_per_pg +
-		r.g.blk * (dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.lun * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.ch * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->luns_per_chnl *
-				dev->sec_per_pg);
+	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
 
 	return l;
 }
 
-/* fallback conversion */
-static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
-
-	l.ppa = 0;
-
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
-	l.g.sec = secs;
-
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
-	l.g.pg = pgs;
-
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
-	l.g.blk = blks;
-
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
-	l.g.lun = luns;
-
-	sector_div(ppa, dev->luns_per_chnl);
-	l.g.ch = ppa;
-
-	return l;
-}
-
-static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = 0;
-
-	l.chnl.sec = r.g.sec;
-	l.chnl.pl = r.g.pl;
-	l.chnl.pg = r.g.pg;
-	l.chnl.blk = r.g.blk;
-	l.chnl.lun = r.g.lun;
-	l.chnl.ch = r.g.ch;
+	/*
+	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
+	 */
+	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
+					(((1 << dev->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
+					(((1 << dev->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
+					(((1 << dev->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
+					(((1 << dev->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
+					(((1 << dev->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
+					(((1 << dev->ppaf.ch_len) - 1));
 
 	return l;
 }
 
-static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
-{
-	struct ppa_addr l;
-
-	l.ppa = 0;
-
-	l.g.sec = r.chnl.sec;
-	l.g.pl = r.chnl.pl;
-	l.g.pg = r.chnl.pg;
-	l.g.blk = r.chnl.blk;
-	l.g.lun = r.chnl.lun;
-	l.g.ch = r.chnl.ch;
-
-	return l;
-}
-
-static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __linear_to_generic_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __chnl_to_generic_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
-static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __generic_to_linear_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __generic_to_chnl_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
 static inline int ppa_empty(struct ppa_addr ppa_addr)
 {
 	return (ppa_addr.ppa == ADDR_EMPTY);
@@ -468,7 +380,7 @@
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 struct nvmm_type {
 	const char *name;
@@ -492,7 +404,7 @@
 	nvmm_get_lun_fn *get_lun;
 
 	/* Statistics */
-	nvmm_free_blocks_print_fn *free_blocks_print;
+	nvmm_lun_info_print_fn *lun_info_print;
 	struct list_head list;
 };
 
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index e6982ac..a57f0df 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
+#define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* struct phy_device dev_flags definitions */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd20974..1565324 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x6];
+	u8         reserved_0[0x3];
+	u8         self_lb_en_modifiable[0x1];
+	u8         reserved_1[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_1[0x4];
+	u8         reserved_2[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_2[0x3];
+	u8         reserved_3[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_4[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_4[0x20];
+	u8         reserved_5[0x20];
 
-	u8         reserved_5[0x10];
+	u8         reserved_6[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_6[0x120];
+	u8         reserved_7[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_7[0x600];
+	u8         reserved_8[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_1[0x1b];
+	u8         self_lb_en[0x1];
+	u8         reserved_2[0x3];
 	u8         lro[0x1];
 };
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e2..0b4ac7d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,8 +34,12 @@
 struct file;
 struct net;
 
-#define SOCK_ASYNC_NOSPACE	0
-#define SOCK_ASYNC_WAITDATA	1
+/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
+ * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
+ * Eventually all flags will be in sk->sk_wq_flags.
+ */
+#define SOCKWQ_ASYNC_NOSPACE	0
+#define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
@@ -89,6 +93,7 @@
 	/* Note: wait MUST be first field of socket_wq */
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
 	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
@@ -96,7 +101,7 @@
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
  *  @type: socket type (%SOCK_STREAM, etc)
- *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ *  @flags: socket flags (%SOCK_NOSPACE, etc)
  *  @ops: protocol specific socket operations
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
@@ -202,7 +207,7 @@
 	SOCK_WAKE_URG,
 };
 
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d208914..3b5d134 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1398,7 +1398,8 @@
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length
+ *	@hard_header_len: Hardware header length, which means that this is the
+ *			  minimum size of a packet.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
@@ -2068,20 +2069,23 @@
 	struct u64_stats_sync   syncp;
 };
 
-#define netdev_alloc_pcpu_stats(type)				\
-({								\
-	typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
-	if (pcpu_stats)	{					\
-		int __cpu;					\
-		for_each_possible_cpu(__cpu) {			\
-			typeof(type) *stat;			\
-			stat = per_cpu_ptr(pcpu_stats, __cpu);	\
-			u64_stats_init(&stat->syncp);		\
-		}						\
-	}							\
-	pcpu_stats;						\
+#define __netdev_alloc_pcpu_stats(type, gfp)				\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
 })
 
+#define netdev_alloc_pcpu_stats(type)					\
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
@@ -3854,6 +3858,11 @@
 	return dev->priv_flags & IFF_EBRIDGE;
 }
 
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
 static inline bool netif_is_ovs_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 48bb01e..0e1f433 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -421,7 +421,7 @@
 extern int ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr);
 extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr);
 extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
-			      size_t len);
+			      size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 				 struct ip_set_ext *ext);
 
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 187feab..5fcd375 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -5,10 +5,13 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_INGRESS
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
 {
-	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
-				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+#ifdef HAVE_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return !list_empty(&skb->dev->nf_hooks_ingress);
 }
 
 static inline int nf_hook_ingress(struct sk_buff *skb)
@@ -16,8 +19,8 @@
 	struct nf_hook_state state;
 
 	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
-			   skb->dev, NULL, dev_net(skb->dev), NULL);
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
+			   skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 570d630..11bbae4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,6 +251,7 @@
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
 	gfp_t gfp_flags;
+	long timeout;
 };
 
 struct nfs4_getdeviceinfo_args {
diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h
index 36112cd..b90d8ec 100644
--- a/include/linux/of_dma.h
+++ b/include/linux/of_dma.h
@@ -80,7 +80,7 @@
 static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
 						     const char *name)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e828e7b..6ae25aa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -412,9 +412,18 @@
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
 	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
+	/* Resource alignment requirements */
+	resource_size_t (*align_resource)(struct pci_dev *dev,
+			const struct resource *res,
+			resource_size_t start,
+			resource_size_t size,
+			resource_size_t align);
 };
 
 #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
+
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
+
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 		     void (*release_fn)(struct pci_host_bridge *),
 		     void *release_data);
diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index 80af3cd..72ce932 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -71,7 +71,7 @@
 	int (*sensor_get_value)(u16, u32 *);
 };
 
-#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL)
 struct scpi_ops *get_scpi_ops(void);
 #else
 static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
diff --git a/include/linux/signal.h b/include/linux/signal.h
index ab1e039..92557bb 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -239,7 +239,6 @@
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
-extern int sigsuspend(sigset_t *);
 
 struct sigaction {
 #ifndef __ARCH_HAS_IRIX_SIGACTION
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 7c82e3b..2037a86 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -158,6 +158,24 @@
 #endif
 
 /*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
+/*
+ * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned
+ * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN
+ * aligned pointers.
+ */
+#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
+#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
+#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
+
+/*
  * Kmalloc array related definitions
  */
 
@@ -286,8 +304,8 @@
 }
 #endif /* !CONFIG_SLOB */
 
-void *__kmalloc(size_t size, gfp_t flags);
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment;
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment;
 void kmem_cache_free(struct kmem_cache *, void *);
 
 /*
@@ -298,11 +316,11 @@
  * Note that interrupts must be enabled when calling these functions.
  */
 void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment;
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment;
 #else
 static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
@@ -316,12 +334,12 @@
 #endif
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment;
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
 					   gfp_t gfpflags,
-					   int node, size_t size);
+					   int node, size_t size) __assume_slab_alignment;
 #else
 static __always_inline void *
 kmem_cache_alloc_node_trace(struct kmem_cache *s,
@@ -354,10 +372,10 @@
 }
 #endif /* CONFIG_TRACING */
 
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 
 #ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment;
 #else
 static __always_inline void *
 kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
@@ -482,15 +500,6 @@
 	return __kmalloc_node(size, flags, node);
 }
 
-/*
- * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
- * Intended for arches that get misalignment faults even for 64 bit integer
- * aligned buffers.
- */
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
 struct memcg_cache_array {
 	struct rcu_head rcu;
 	struct kmem_cache *entries[0];
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a156b82..c2b66a2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -524,7 +524,7 @@
 asmlinkage long sys_lchown(const char __user *filename,
 				uid_t user, gid_t group);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 asmlinkage long sys_chown16(const char __user *filename,
 				old_uid_t user, old_gid_t group);
 asmlinkage long sys_lchown16(const char __user *filename,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4014a59..613c29b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -438,7 +438,8 @@
 static inline int thermal_zone_bind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
 	struct thermal_cooling_device *cdev,
-	unsigned long upper, unsigned long lower)
+	unsigned long upper, unsigned long lower,
+	unsigned int weight)
 { return -ENODEV; }
 static inline int thermal_zone_unbind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5b04b0a..5e31f1b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -607,7 +607,7 @@
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			       size_t size, unsigned icanon);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
@@ -615,8 +615,8 @@
 extern void tty_audit_push(struct tty_struct *tty);
 extern int tty_audit_push_current(void);
 #else
-static inline void tty_audit_add_data(struct tty_struct *tty,
-		unsigned char *data, size_t size, unsigned icanon)
+static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
+				      size_t size, unsigned icanon)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)
diff --git a/include/linux/types.h b/include/linux/types.h
index 70d8500..70dd3df 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -35,7 +35,7 @@
 
 typedef unsigned long		uintptr_t;
 
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 /* This is defined by include/asm-{arch}/posix_types.h */
 typedef __kernel_old_uid_t	old_uid_t;
 typedef __kernel_old_gid_t	old_gid_t;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837..2a91a05 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index aaf9700..fb961a5 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -167,7 +167,8 @@
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
-	if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
 		rt = (struct rt6_info *)(rt->dst.from);
 
 	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2bfb2ad..877f682 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,27 +133,18 @@
 /*
  *	Store a destination cache entry in a socket
  */
-static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				   const struct in6_addr *daddr,
-				   const struct in6_addr *saddr)
+static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
+				 const struct in6_addr *daddr,
+				 const struct in6_addr *saddr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct rt6_info *rt = (struct rt6_info *) dst;
 
+	np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
 	sk_setup_caps(sk, dst);
 	np->daddr_cache = daddr;
 #ifdef CONFIG_IPV6_SUBTREES
 	np->saddr_cache = saddr;
 #endif
-	np->dst_cookie = rt6_get_cookie(rt);
-}
-
-static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				 struct in6_addr *daddr, struct in6_addr *saddr)
-{
-	spin_lock(&sk->sk_dst_lock);
-	__ip6_dst_store(sk, dst, daddr, saddr);
-	spin_unlock(&sk->sk_dst_lock);
 }
 
 static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index aaee6fa..ff788b6 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -90,11 +90,12 @@
 	err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
 
 	if (net_xmit_eval(err) == 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += pkt_len;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else {
 		stats->tx_errors++;
 		stats->tx_aborted_errors++;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f6dafec..62a750a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -287,12 +287,13 @@
 				       struct pcpu_sw_netstats __percpu *stats)
 {
 	if (err > 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats);
 
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += err;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else if (err < 0) {
 		err_stats->tx_errors++;
 		err_stats->tx_aborted_errors++;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e1a10b0..9a5c9f0 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@
  */
 
 struct ipv6_txoptions {
+	atomic_t		refcnt;
 	/* Length of this structure */
 	int			tot_len;
 
@@ -217,7 +218,7 @@
 	struct ipv6_opt_hdr	*dst0opt;
 	struct ipv6_rt_hdr	*srcrt;	/* Routing Header */
 	struct ipv6_opt_hdr	*dst1opt;
-
+	struct rcu_head		rcu;
 	/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
 };
 
@@ -252,6 +253,24 @@
 	struct rcu_head			rcu;
 };
 
+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+	struct ipv6_txoptions *opt;
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt && !atomic_inc_not_zero(&opt->refcnt))
+		opt = NULL;
+	rcu_read_unlock();
+	return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+	if (opt && atomic_dec_and_test(&opt->refcnt))
+		kfree_rcu(opt, rcu);
+}
+
 struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
 					 struct ip6_flowlabel *fl,
@@ -490,6 +509,7 @@
 	u32 user;
 	const struct in6_addr *src;
 	const struct in6_addr *dst;
+	int iif;
 	u8 ecn;
 };
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 82045fc..760bc4d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2003,8 +2003,10 @@
  *	it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *	aggregate an HT driver will transmit, used by the peer as a
- *	hint to size its reorder buffer.
+ *	aggregate an HT driver will transmit. Though ADDBA will advertise
+ *	a constant value of 64 as some older APs can crash if the window
+ *	size is smaller (an example is LinkSys WRT120N with FW v1.0.07
+ *	build 002 Jun 18 2012).
  *
  * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
  *	(if %IEEE80211_HW_QUEUE_CONTROL is set)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index bf39374..2d8edaa 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -181,8 +181,7 @@
 int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c9149cc..4bd7508 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -618,6 +618,8 @@
 	void				(*eval)(const struct nft_expr *expr,
 						struct nft_regs *regs,
 						const struct nft_pktinfo *pkt);
+	int				(*clone)(struct nft_expr *dst,
+						 const struct nft_expr *src);
 	unsigned int			size;
 
 	int				(*init)(const struct nft_ctx *ctx,
@@ -660,10 +662,20 @@
 int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
 		  const struct nft_expr *expr);
 
-static inline void nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
+static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
 {
+	int err;
+
 	__module_get(src->ops->type->owner);
-	memcpy(dst, src, src->ops->size);
+	if (src->ops->clone) {
+		dst->ops = src->ops;
+		err = src->ops->clone(dst, src);
+		if (err < 0)
+			return err;
+	} else {
+		memcpy(dst, src, src->ops->size);
+	}
+	return 0;
 }
 
 /**
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4c79ce8..b2a8e63 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,6 +61,9 @@
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
 #define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
+#define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
+				      * qdisc_tree_decrease_qlen() should stop.
+				      */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 495c87e..7bbb710 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,10 +775,10 @@
 		hb_sent:1,
 
 		/* Is the Path MTU update pending on this tranport */
-		pmtu_pending:1;
+		pmtu_pending:1,
 
-	/* Has this transport moved the ctsn since we last sacked */
-	__u32 sack_generation;
+		/* Has this transport moved the ctsn since we last sacked */
+		sack_generation:1;
 	u32 dst_cookie;
 
 	struct flowi fl;
@@ -1482,19 +1482,19 @@
 			prsctp_capable:1,   /* Can peer do PR-SCTP? */
 			auth_capable:1;     /* Is peer doing SCTP-AUTH? */
 
-		/* Ack State   : This flag indicates if the next received
+		/* sack_needed : This flag indicates if the next received
 		 *             : packet is to be responded to with a
-		 *             : SACK. This is initializedto 0.  When a packet
-		 *             : is received it is incremented. If this value
+		 *             : SACK. This is initialized to 0.  When a packet
+		 *             : is received sack_cnt is incremented. If this value
 		 *             : reaches 2 or more, a SACK is sent and the
 		 *             : value is reset to 0. Note: This is used only
 		 *             : when no DATA chunks are received out of
 		 *             : order.  When DATA chunks are out of order,
 		 *             : SACK's are not delayed (see Section 6).
 		 */
-		__u8    sack_needed;     /* Do we need to sack the peer? */
+		__u8    sack_needed:1,     /* Do we need to sack the peer? */
+			sack_generation:1;
 		__u32	sack_cnt;
-		__u32	sack_generation;
 
 		__u32   adaptation_ind;	 /* Adaptation Code point. */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index bbf7c2c..52d27ee 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -254,7 +254,6 @@
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
   *	@sk_dst_cache: destination cache
-  *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
@@ -384,14 +383,16 @@
 	int			sk_rcvbuf;
 
 	struct sk_filter __rcu	*sk_filter;
-	struct socket_wq __rcu	*sk_wq;
-
+	union {
+		struct socket_wq __rcu	*sk_wq;
+		struct socket_wq	*sk_wq_raw;
+	};
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
 	struct dst_entry	*sk_rx_dst;
 	struct dst_entry __rcu	*sk_dst_cache;
-	spinlock_t		sk_dst_lock;
+	/* Note: 32bit hole on 64bit arches */
 	atomic_t		sk_wmem_alloc;
 	atomic_t		sk_omem_alloc;
 	int			sk_sndbuf;
@@ -2005,10 +2006,27 @@
 	return amt;
 }
 
-static inline void sk_wake_async(struct sock *sk, int how, int band)
+/* Note:
+ *  We use sk->sk_wq_raw, from contexts knowing this
+ *  pointer is not NULL and cannot disappear/change.
+ */
+static inline void sk_set_bit(int nr, struct sock *sk)
 {
-	if (sock_flag(sk, SOCK_FASYNC))
-		sock_wake_async(sk->sk_socket, how, band);
+	set_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_clear_bit(int nr, struct sock *sk)
+{
+	clear_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_wake_async(const struct sock *sk, int how, int band)
+{
+	if (sock_flag(sk, SOCK_FASYNC)) {
+		rcu_read_lock();
+		sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+		rcu_read_unlock();
+	}
 }
 
 /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
@@ -2226,6 +2244,31 @@
 	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
 }
 
+/**
+ * sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with sk_state_store(). Used in places we do not hold socket lock :
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int sk_state_load(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
+static inline void sk_state_store(struct sock *sk, int newstate)
+{
+	smp_store_release(&sk->sk_state, newstate);
+}
+
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index bc865e2..1d22ce9 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -323,7 +323,7 @@
 					  struct net_device *filter_dev,
 					  int idx)
 {
-	return -EOPNOTSUPP;
+       return idx;
 }
 
 static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index ed52712..fcfa3d7 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -668,6 +668,9 @@
 	unsigned use_blk_mq:1;
 	unsigned use_cmd_list:1;
 
+	/* Host responded with short (<36 bytes) INQUIRY result */
+	unsigned short_inquiry:1;
+
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 7855cfe..95a937e 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -398,6 +398,7 @@
 int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm,
 			     const struct snd_soc_dapm_route *route, int num);
 void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w);
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm);
 
 /* dapm events */
 void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0a2c740..aabf0ac 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -474,7 +474,7 @@
 	struct completion	cmd_wait_comp;
 	const struct target_core_fabric_ops *se_tfo;
 	sense_reason_t		(*execute_cmd)(struct se_cmd *);
-	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
+	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *);
 	void			*protocol_data;
 
 	unsigned char		*t_task_cdb;
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 38d4370..9355dd8 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -3,6 +3,7 @@
 header-y += drm_fourcc.h
 header-y += drm_mode.h
 header-y += drm_sarea.h
+header-y += amdgpu_drm.h
 header-y += exynos_drm.h
 header-y += i810_drm.h
 header-y += i915_drm.h
@@ -17,4 +18,5 @@
 header-y += via_drm.h
 header-y += vmwgfx_drm.h
 header-y += msm_drm.h
+header-y += vc4_drm.h
 header-y += virtgpu_drm.h
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index e52933a..453a76a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -76,19 +76,19 @@
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-	uint64_t bo_size;
+	__u64 bo_size;
 	/** physical start_addr alignment in bytes for some HW requirements */
-	uint64_t alignment;
+	__u64 alignment;
 	/** the requested memory domains */
-	uint64_t domains;
+	__u64 domains;
 	/** allocation flags */
-	uint64_t domain_flags;
+	__u64 domain_flags;
 };
 
 struct drm_amdgpu_gem_create_out  {
 	/** returned GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_gem_create {
@@ -105,28 +105,28 @@
 
 struct drm_amdgpu_bo_list_in {
 	/** Type of operation */
-	uint32_t operation;
+	__u32 operation;
 	/** Handle of list or 0 if we want to create one */
-	uint32_t list_handle;
+	__u32 list_handle;
 	/** Number of BOs in list  */
-	uint32_t bo_number;
+	__u32 bo_number;
 	/** Size of each element describing BO */
-	uint32_t bo_info_size;
+	__u32 bo_info_size;
 	/** Pointer to array describing BOs */
-	uint64_t bo_info_ptr;
+	__u64 bo_info_ptr;
 };
 
 struct drm_amdgpu_bo_list_entry {
 	/** Handle of BO */
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	/** New (if specified) BO priority to be used during migration */
-	uint32_t bo_priority;
+	__u32 bo_priority;
 };
 
 struct drm_amdgpu_bo_list_out {
 	/** Handle of resource list  */
-	uint32_t list_handle;
-	uint32_t _pad;
+	__u32 list_handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_bo_list {
@@ -150,26 +150,26 @@
 
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** For future use, no flags defined so far */
-	uint32_t	flags;
-	uint32_t	ctx_id;
-	uint32_t	_pad;
+	__u32	flags;
+	__u32	ctx_id;
+	__u32	_pad;
 };
 
 union drm_amdgpu_ctx_out {
 		struct {
-			uint32_t	ctx_id;
-			uint32_t	_pad;
+			__u32	ctx_id;
+			__u32	_pad;
 		} alloc;
 
 		struct {
 			/** For future use, no flags defined so far */
-			uint64_t	flags;
+			__u64	flags;
 			/** Number of resets caused by this context so far. */
-			uint32_t	hangs;
+			__u32	hangs;
 			/** Reset status since the last call of the ioctl. */
-			uint32_t	reset_status;
+			__u32	reset_status;
 		} state;
 };
 
@@ -189,12 +189,12 @@
 #define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_amdgpu_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
+	__u64		addr;
+	__u64		size;
 	/* AMDGPU_GEM_USERPTR_* */
-	uint32_t		flags;
+	__u32		flags;
 	/* Resulting GEM handle */
-	uint32_t		handle;
+	__u32		handle;
 };
 
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
@@ -226,28 +226,28 @@
 /** The same structure is shared for input/output */
 struct drm_amdgpu_gem_metadata {
 	/** GEM Object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** Do we want get or set metadata */
-	uint32_t	op;
+	__u32	op;
 	struct {
 		/** For future use, no flags defined so far */
-		uint64_t	flags;
+		__u64	flags;
 		/** family specific tiling info */
-		uint64_t	tiling_info;
-		uint32_t	data_size_bytes;
-		uint32_t	data[64];
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
 	} data;
 };
 
 struct drm_amdgpu_gem_mmap_in {
 	/** the GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_gem_mmap_out {
 	/** mmap offset from the vma offset manager */
-	uint64_t addr_ptr;
+	__u64 addr_ptr;
 };
 
 union drm_amdgpu_gem_mmap {
@@ -257,18 +257,18 @@
 
 struct drm_amdgpu_gem_wait_idle_in {
 	/** GEM object handle */
-	uint32_t handle;
+	__u32 handle;
 	/** For future use, no flags defined so far */
-	uint32_t flags;
+	__u32 flags;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
+	__u64 timeout;
 };
 
 struct drm_amdgpu_gem_wait_idle_out {
 	/** BO status:  0 - BO is idle, 1 - BO is busy */
-	uint32_t status;
+	__u32 status;
 	/** Returned current memory domain */
-	uint32_t domain;
+	__u32 domain;
 };
 
 union drm_amdgpu_gem_wait_idle {
@@ -278,18 +278,18 @@
 
 struct drm_amdgpu_wait_cs_in {
 	/** Command submission handle */
-	uint64_t handle;
+	__u64 handle;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
 };
 
 struct drm_amdgpu_wait_cs_out {
 	/** CS status:  0 - CS completed, 1 - CS still busy */
-	uint64_t status;
+	__u64 status;
 };
 
 union drm_amdgpu_wait_cs {
@@ -303,11 +303,11 @@
 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
 	/** GEM object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** AMDGPU_GEM_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** Input or return value */
-	uint64_t	value;
+	__u64	value;
 };
 
 #define AMDGPU_VA_OP_MAP			1
@@ -326,18 +326,18 @@
 
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 	/** AMDGPU_VA_OP_* */
-	uint32_t operation;
+	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
-	uint32_t flags;
+	__u32 flags;
 	/** va address to assign . Must be correctly aligned.*/
-	uint64_t va_address;
+	__u64 va_address;
 	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
-	uint64_t offset_in_bo;
+	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
-	uint64_t map_size;
+	__u64 map_size;
 };
 
 #define AMDGPU_HW_IP_GFX          0
@@ -354,24 +354,24 @@
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 
 struct drm_amdgpu_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 struct drm_amdgpu_cs_in {
 	/** Rendering context id */
-	uint32_t		ctx_id;
+	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
-	uint32_t		bo_list_handle;
-	uint32_t		num_chunks;
-	uint32_t		_pad;
-	/** this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		_pad;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 };
 
 struct drm_amdgpu_cs_out {
-	uint64_t handle;
+	__u64 handle;
 };
 
 union drm_amdgpu_cs {
@@ -388,32 +388,32 @@
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
 
 struct drm_amdgpu_cs_chunk_ib {
-	uint32_t _pad;
+	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
-	uint32_t flags;
+	__u32 flags;
 	/** Virtual address to begin IB execution */
-	uint64_t va_start;
+	__u64 va_start;
 	/** Size of submission */
-	uint32_t ib_bytes;
+	__u32 ib_bytes;
 	/** HW IP to submit to */
-	uint32_t ip_type;
+	__u32 ip_type;
 	/** HW IP index of the same type to submit to  */
-	uint32_t ip_instance;
+	__u32 ip_instance;
 	/** Ring index to submit to */
-	uint32_t ring;
+	__u32 ring;
 };
 
 struct drm_amdgpu_cs_chunk_dep {
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
-	uint64_t handle;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
 };
 
 struct drm_amdgpu_cs_chunk_fence {
-	uint32_t handle;
-	uint32_t offset;
+	__u32 handle;
+	__u32 offset;
 };
 
 struct drm_amdgpu_cs_chunk_data {
@@ -486,83 +486,83 @@
 /* Input structure for the INFO ioctl */
 struct drm_amdgpu_info {
 	/* Where the return value will be stored */
-	uint64_t return_pointer;
+	__u64 return_pointer;
 	/* The size of the return value. Just like "size" in "snprintf",
 	 * it limits how many bytes the kernel can write. */
-	uint32_t return_size;
+	__u32 return_size;
 	/* The query request id. */
-	uint32_t query;
+	__u32 query;
 
 	union {
 		struct {
-			uint32_t id;
-			uint32_t _pad;
+			__u32 id;
+			__u32 _pad;
 		} mode_crtc;
 
 		struct {
 			/** AMDGPU_HW_IP_* */
-			uint32_t type;
+			__u32 type;
 			/**
 			 * Index of the IP if there are more IPs of the same
 			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 		} query_hw_ip;
 
 		struct {
-			uint32_t dword_offset;
+			__u32 dword_offset;
 			/** number of registers to read */
-			uint32_t count;
-			uint32_t instance;
+			__u32 count;
+			__u32 instance;
 			/** For future use, no flags defined so far */
-			uint32_t flags;
+			__u32 flags;
 		} read_mmr_reg;
 
 		struct {
 			/** AMDGPU_INFO_FW_* */
-			uint32_t fw_type;
+			__u32 fw_type;
 			/**
 			 * Index of the IP if there are more IPs of
 			 * the same type.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 			/**
 			 * Index of the engine. Whether this is used depends
 			 * on the firmware type. (e.g. MEC, SDMA)
 			 */
-			uint32_t index;
-			uint32_t _pad;
+			__u32 index;
+			__u32 _pad;
 		} query_fw;
 	};
 };
 
 struct drm_amdgpu_info_gds {
 	/** GDS GFX partition size */
-	uint32_t gds_gfx_partition_size;
+	__u32 gds_gfx_partition_size;
 	/** GDS compute partition size */
-	uint32_t compute_partition_size;
+	__u32 compute_partition_size;
 	/** total GDS memory size */
-	uint32_t gds_total_size;
+	__u32 gds_total_size;
 	/** GWS size per GFX partition */
-	uint32_t gws_per_gfx_partition;
+	__u32 gws_per_gfx_partition;
 	/** GSW size per compute partition */
-	uint32_t gws_per_compute_partition;
+	__u32 gws_per_compute_partition;
 	/** OA size per GFX partition */
-	uint32_t oa_per_gfx_partition;
+	__u32 oa_per_gfx_partition;
 	/** OA size per compute partition */
-	uint32_t oa_per_compute_partition;
-	uint32_t _pad;
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_info_vram_gtt {
-	uint64_t vram_size;
-	uint64_t vram_cpu_accessible_size;
-	uint64_t gtt_size;
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
 };
 
 struct drm_amdgpu_info_firmware {
-	uint32_t ver;
-	uint32_t feature;
+	__u32 ver;
+	__u32 feature;
 };
 
 #define AMDGPU_VRAM_TYPE_UNKNOWN 0
@@ -576,61 +576,61 @@
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-	uint32_t device_id;
+	__u32 device_id;
 	/** Internal chip revision: A0, A1, etc.) */
-	uint32_t chip_rev;
-	uint32_t external_rev;
+	__u32 chip_rev;
+	__u32 external_rev;
 	/** Revision id in PCI Config space */
-	uint32_t pci_rev;
-	uint32_t family;
-	uint32_t num_shader_engines;
-	uint32_t num_shader_arrays_per_engine;
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
 	/* in KHz */
-	uint32_t gpu_counter_freq;
-	uint64_t max_engine_clock;
-	uint64_t max_memory_clock;
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
 	/* cu information */
-	uint32_t cu_active_number;
-	uint32_t cu_ao_mask;
-	uint32_t cu_bitmap[4][4];
+	__u32 cu_active_number;
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
-	uint32_t enabled_rb_pipes_mask;
-	uint32_t num_rb_pipes;
-	uint32_t num_hw_gfx_contexts;
-	uint32_t _pad;
-	uint64_t ids_flags;
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	__u32 _pad;
+	__u64 ids_flags;
 	/** Starting virtual address for UMDs. */
-	uint64_t virtual_address_offset;
+	__u64 virtual_address_offset;
 	/** The maximum virtual address */
-	uint64_t virtual_address_max;
+	__u64 virtual_address_max;
 	/** Required alignment of virtual addresses. */
-	uint32_t virtual_address_alignment;
+	__u32 virtual_address_alignment;
 	/** Page table entry - fragment size */
-	uint32_t pte_fragment_size;
-	uint32_t gart_page_size;
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
 	/** constant engine ram size*/
-	uint32_t ce_ram_size;
+	__u32 ce_ram_size;
 	/** video memory type info*/
-	uint32_t vram_type;
+	__u32 vram_type;
 	/** video memory bit width*/
-	uint32_t vram_bit_width;
+	__u32 vram_bit_width;
 	/* vce harvesting instance */
-	uint32_t vce_harvest_config;
+	__u32 vce_harvest_config;
 };
 
 struct drm_amdgpu_info_hw_ip {
 	/** Version of h/w IP */
-	uint32_t  hw_ip_version_major;
-	uint32_t  hw_ip_version_minor;
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
 	/** Capabilities */
-	uint64_t  capabilities_flags;
+	__u64  capabilities_flags;
 	/** command buffer address start alignment*/
-	uint32_t  ib_start_alignment;
+	__u32  ib_start_alignment;
 	/** command buffer size alignment*/
-	uint32_t  ib_size_alignment;
+	__u32  ib_size_alignment;
 	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
-	uint32_t  available_rings;
-	uint32_t  _pad;
+	__u32  available_rings;
+	__u32  _pad;
 };
 
 /*
diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h
index 8dec3fd..6de7f01 100644
--- a/include/uapi/drm/armada_drm.h
+++ b/include/uapi/drm/armada_drm.h
@@ -9,6 +9,8 @@
 #ifndef DRM_ARMADA_IOCTL_H
 #define DRM_ARMADA_IOCTL_H
 
+#include "drm.h"
+
 #define DRM_ARMADA_GEM_CREATE		0x00
 #define DRM_ARMADA_GEM_MMAP		0x02
 #define DRM_ARMADA_GEM_PWRITE		0x03
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 3801584..b4e92eb 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -54,6 +54,7 @@
 typedef uint32_t __u32;
 typedef int64_t  __s64;
 typedef uint64_t __u64;
+typedef size_t   __kernel_size_t;
 typedef unsigned long drm_handle_t;
 
 #endif
@@ -129,11 +130,11 @@
 	int version_major;	  /**< Major version */
 	int version_minor;	  /**< Minor version */
 	int version_patchlevel;	  /**< Patch level */
-	size_t name_len;	  /**< Length of name buffer */
+	__kernel_size_t name_len;	  /**< Length of name buffer */
 	char __user *name;	  /**< Name of driver */
-	size_t date_len;	  /**< Length of date buffer */
+	__kernel_size_t date_len;	  /**< Length of date buffer */
 	char __user *date;	  /**< User-space buffer to hold date */
-	size_t desc_len;	  /**< Length of desc buffer */
+	__kernel_size_t desc_len;	  /**< Length of desc buffer */
 	char __user *desc;	  /**< User-space buffer to hold desc */
 };
 
@@ -143,7 +144,7 @@
  * \sa drmGetBusid() and drmSetBusId().
  */
 struct drm_unique {
-	size_t unique_len;	  /**< Length of unique */
+	__kernel_size_t unique_len;	  /**< Length of unique */
 	char __user *unique;	  /**< Unique name for driver instantiation */
 };
 
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 0b69a77..998bd25 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -24,7 +24,7 @@
 #ifndef DRM_FOURCC_H
 #define DRM_FOURCC_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 				 ((__u32)(c) << 16) | ((__u32)(d) << 24))
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 6c11ca4..50adb46 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -27,7 +27,7 @@
 #ifndef _DRM_MODE_H
 #define _DRM_MODE_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define DRM_DISPLAY_INFO_LEN	32
 #define DRM_CONNECTOR_NAME_LEN	32
@@ -526,14 +526,14 @@
 
 /* create a dumb scanout buffer */
 struct drm_mode_create_dumb {
-	uint32_t height;
-	uint32_t width;
-	uint32_t bpp;
-	uint32_t flags;
+	__u32 height;
+	__u32 width;
+	__u32 bpp;
+	__u32 flags;
 	/* handle, pitch, size will be returned */
-	uint32_t handle;
-	uint32_t pitch;
-	uint64_t size;
+	__u32 handle;
+	__u32 pitch;
+	__u64 size;
 };
 
 /* set up for mmap of a dumb scanout buffer */
@@ -550,7 +550,7 @@
 };
 
 struct drm_mode_destroy_dumb {
-	uint32_t handle;
+	__u32 handle;
 };
 
 /* page-flip flags are valid, plus: */
diff --git a/include/uapi/drm/drm_sarea.h b/include/uapi/drm/drm_sarea.h
index 413a564..1d1a858 100644
--- a/include/uapi/drm/drm_sarea.h
+++ b/include/uapi/drm/drm_sarea.h
@@ -32,7 +32,7 @@
 #ifndef _DRM_SAREA_H_
 #define _DRM_SAREA_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index 5575ed1..312c67d 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -15,7 +15,7 @@
 #ifndef _UAPI_EXYNOS_DRM_H_
 #define _UAPI_EXYNOS_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /**
  * User-desired buffer creation information structure.
@@ -27,7 +27,7 @@
  *	- this handle will be set by gem module of kernel side.
  */
 struct drm_exynos_gem_create {
-	uint64_t size;
+	__u64 size;
 	unsigned int flags;
 	unsigned int handle;
 };
@@ -44,7 +44,7 @@
 struct drm_exynos_gem_info {
 	unsigned int handle;
 	unsigned int flags;
-	uint64_t size;
+	__u64 size;
 };
 
 /**
@@ -58,7 +58,7 @@
 struct drm_exynos_vidi_connection {
 	unsigned int connection;
 	unsigned int extensions;
-	uint64_t edid;
+	__u64 edid;
 };
 
 /* memory type definitions. */
diff --git a/include/uapi/drm/i810_drm.h b/include/uapi/drm/i810_drm.h
index 34736ef..bdb0287 100644
--- a/include/uapi/drm/i810_drm.h
+++ b/include/uapi/drm/i810_drm.h
@@ -1,7 +1,7 @@
 #ifndef _I810_DRM_H_
 #define _I810_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 484a9fb..c937a36 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _UAPI_I915_DRM_H_
 #define _UAPI_I915_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
@@ -1079,6 +1079,12 @@
 };
 
 struct drm_i915_reg_read {
+	/*
+	 * Register offset.
+	 * For 64bit wide registers where the upper 32bits don't immediately
+	 * follow the lower 32bits, the offset of the lower 32bits must
+	 * be specified
+	 */
 	__u64 offset;
 	__u64 val; /* Return value */
 };
@@ -1125,8 +1131,9 @@
 	__u32 ctx_id;
 	__u32 size;
 	__u64 param;
-#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
-#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
+#define I915_CONTEXT_PARAM_BAN_PERIOD	0x1
+#define I915_CONTEXT_PARAM_NO_ZEROMAP	0x2
+#define I915_CONTEXT_PARAM_GTT_SIZE	0x3
 	__u64 value;
 };
 
diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h
index 2375bfd..fca8170 100644
--- a/include/uapi/drm/mga_drm.h
+++ b/include/uapi/drm/mga_drm.h
@@ -35,7 +35,7 @@
 #ifndef __MGA_DRM_H__
 #define __MGA_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 75a232b..81e6e0d 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -18,8 +18,7 @@
 #ifndef __MSM_DRM_H__
 #define __MSM_DRM_H__
 
-#include <stddef.h>
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints:
@@ -122,7 +121,7 @@
 struct drm_msm_gem_submit_reloc {
 	__u32 submit_offset;  /* in, offset from submit_bo */
 	__u32 or;             /* in, value OR'd with result */
-	__s32  shift;          /* in, amount of left shift (can be negative) */
+	__s32 shift;          /* in, amount of left shift (can be negative) */
 	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
 	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
 };
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index fd594cc..500d82a 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -27,6 +27,8 @@
 
 #define DRM_NOUVEAU_EVENT_NVIF                                       0x80000000
 
+#include <drm/drm.h>
+
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
@@ -41,34 +43,34 @@
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
 
 struct drm_nouveau_gem_info {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t size;
-	uint64_t offset;
-	uint64_t map_handle;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
+	__u32 handle;
+	__u32 domain;
+	__u64 size;
+	__u64 offset;
+	__u64 map_handle;
+	__u32 tile_mode;
+	__u32 tile_flags;
 };
 
 struct drm_nouveau_gem_new {
 	struct drm_nouveau_gem_info info;
-	uint32_t channel_hint;
-	uint32_t align;
+	__u32 channel_hint;
+	__u32 align;
 };
 
 #define NOUVEAU_GEM_MAX_BUFFERS 1024
 struct drm_nouveau_gem_pushbuf_bo_presumed {
-	uint32_t valid;
-	uint32_t domain;
-	uint64_t offset;
+	__u32 valid;
+	__u32 domain;
+	__u64 offset;
 };
 
 struct drm_nouveau_gem_pushbuf_bo {
-	uint64_t user_priv;
-	uint32_t handle;
-	uint32_t read_domains;
-	uint32_t write_domains;
-	uint32_t valid_domains;
+	__u64 user_priv;
+	__u32 handle;
+	__u32 read_domains;
+	__u32 write_domains;
+	__u32 valid_domains;
 	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
@@ -77,46 +79,46 @@
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
 #define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
-	uint32_t reloc_bo_index;
-	uint32_t reloc_bo_offset;
-	uint32_t bo_index;
-	uint32_t flags;
-	uint32_t data;
-	uint32_t vor;
-	uint32_t tor;
+	__u32 reloc_bo_index;
+	__u32 reloc_bo_offset;
+	__u32 bo_index;
+	__u32 flags;
+	__u32 data;
+	__u32 vor;
+	__u32 tor;
 };
 
 #define NOUVEAU_GEM_MAX_PUSH 512
 struct drm_nouveau_gem_pushbuf_push {
-	uint32_t bo_index;
-	uint32_t pad;
-	uint64_t offset;
-	uint64_t length;
+	__u32 bo_index;
+	__u32 pad;
+	__u64 offset;
+	__u64 length;
 };
 
 struct drm_nouveau_gem_pushbuf {
-	uint32_t channel;
-	uint32_t nr_buffers;
-	uint64_t buffers;
-	uint32_t nr_relocs;
-	uint32_t nr_push;
-	uint64_t relocs;
-	uint64_t push;
-	uint32_t suffix0;
-	uint32_t suffix1;
-	uint64_t vram_available;
-	uint64_t gart_available;
+	__u32 channel;
+	__u32 nr_buffers;
+	__u64 buffers;
+	__u32 nr_relocs;
+	__u32 nr_push;
+	__u64 relocs;
+	__u64 push;
+	__u32 suffix0;
+	__u32 suffix1;
+	__u64 vram_available;
+	__u64 gart_available;
 };
 
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
-	uint32_t handle;
-	uint32_t flags;
+	__u32 handle;
+	__u32 flags;
 };
 
 struct drm_nouveau_gem_cpu_fini {
-	uint32_t handle;
+	__u32 handle;
 };
 
 #define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 1d0b117..0750c01 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -20,7 +20,7 @@
 #ifndef __OMAP_DRM_H__
 #define __OMAP_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h
index ebebd36..4d1e326 100644
--- a/include/uapi/drm/qxl_drm.h
+++ b/include/uapi/drm/qxl_drm.h
@@ -24,13 +24,12 @@
 #ifndef QXL_DRM_H
 #define QXL_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -48,14 +47,14 @@
 #define DRM_QXL_ALLOC_SURF  0x06
 
 struct drm_qxl_alloc {
-	uint32_t size;
-	uint32_t handle; /* 0 is an invalid handle */
+	__u32 size;
+	__u32 handle; /* 0 is an invalid handle */
 };
 
 struct drm_qxl_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 /*
@@ -68,59 +67,59 @@
 #define QXL_RELOC_TYPE_SURF 2
 
 struct drm_qxl_reloc {
-	uint64_t src_offset; /* offset into src_handle or src buffer */
-	uint64_t dst_offset; /* offset in dest handle */
-	uint32_t src_handle; /* dest handle to compute address from */
-	uint32_t dst_handle; /* 0 if to command buffer */
-	uint32_t reloc_type;
-	uint32_t pad;
+	__u64 src_offset; /* offset into src_handle or src buffer */
+	__u64 dst_offset; /* offset in dest handle */
+	__u32 src_handle; /* dest handle to compute address from */
+	__u32 dst_handle; /* 0 if to command buffer */
+	__u32 reloc_type;
+	__u32 pad;
 };
 
 struct drm_qxl_command {
-	uint64_t	 __user command; /* void* */
-	uint64_t	 __user relocs; /* struct drm_qxl_reloc* */
-	uint32_t		type;
-	uint32_t		command_size;
-	uint32_t		relocs_num;
-	uint32_t                pad;
+	__u64	 __user command; /* void* */
+	__u64	 __user relocs; /* struct drm_qxl_reloc* */
+	__u32		type;
+	__u32		command_size;
+	__u32		relocs_num;
+	__u32                pad;
 };
 
 /* XXX: call it drm_qxl_commands? */
 struct drm_qxl_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t		commands_num;
-	uint64_t	 __user commands;	/* struct drm_qxl_command* */
+	__u32		flags;		/* for future use */
+	__u32		commands_num;
+	__u64	 __user commands;	/* struct drm_qxl_command* */
 };
 
 struct drm_qxl_update_area {
-	uint32_t handle;
-	uint32_t top;
-	uint32_t left;
-	uint32_t bottom;
-	uint32_t right;
-	uint32_t pad;
+	__u32 handle;
+	__u32 top;
+	__u32 left;
+	__u32 bottom;
+	__u32 right;
+	__u32 pad;
 };
 
 #define QXL_PARAM_NUM_SURFACES 1 /* rom->n_surfaces */
 #define QXL_PARAM_MAX_RELOCS 2
 struct drm_qxl_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* these are one bit values */
 struct drm_qxl_clientcap {
-	uint32_t index;
-	uint32_t pad;
+	__u32 index;
+	__u32 pad;
 };
 
 struct drm_qxl_alloc_surf {
-	uint32_t format;
-	uint32_t width;
-	uint32_t height;
-	int32_t stride;
-	uint32_t handle;
-	uint32_t pad;
+	__u32 format;
+	__u32 width;
+	__u32 height;
+	__s32 stride;
+	__u32 handle;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_QXL_ALLOC \
diff --git a/include/uapi/drm/r128_drm.h b/include/uapi/drm/r128_drm.h
index 76b0aa3..7a44c65 100644
--- a/include/uapi/drm/r128_drm.h
+++ b/include/uapi/drm/r128_drm.h
@@ -33,7 +33,7 @@
 #ifndef __R128_DRM_H__
 #define __R128_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (r128_sarea.h)
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 01aa2a8..ccb9bcd 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -793,9 +793,9 @@
 #define RADEON_GEM_DOMAIN_VRAM		0x4
 
 struct drm_radeon_gem_info {
-	uint64_t	gart_size;
-	uint64_t	vram_size;
-	uint64_t	vram_visible;
+	__u64	gart_size;
+	__u64	vram_size;
+	__u64	vram_visible;
 };
 
 #define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
@@ -807,11 +807,11 @@
 #define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
 
 struct drm_radeon_gem_create {
-	uint64_t	size;
-	uint64_t	alignment;
-	uint32_t	handle;
-	uint32_t	initial_domain;
-	uint32_t	flags;
+	__u64	size;
+	__u64	alignment;
+	__u32	handle;
+	__u32	initial_domain;
+	__u32	flags;
 };
 
 /*
@@ -825,10 +825,10 @@
 #define RADEON_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_radeon_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
-	uint32_t		flags;
-	uint32_t		handle;
+	__u64		addr;
+	__u64		size;
+	__u32		flags;
+	__u32		handle;
 };
 
 #define RADEON_TILING_MACRO				0x1
@@ -850,72 +850,72 @@
 #define RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK	0xf
 
 struct drm_radeon_gem_set_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_get_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_mmap {
-	uint32_t	handle;
-	uint32_t	pad;
-	uint64_t	offset;
-	uint64_t	size;
-	uint64_t	addr_ptr;
+	__u32	handle;
+	__u32	pad;
+	__u64	offset;
+	__u64	size;
+	__u64	addr_ptr;
 };
 
 struct drm_radeon_gem_set_domain {
-	uint32_t	handle;
-	uint32_t	read_domains;
-	uint32_t	write_domain;
+	__u32	handle;
+	__u32	read_domains;
+	__u32	write_domain;
 };
 
 struct drm_radeon_gem_wait_idle {
-	uint32_t	handle;
-	uint32_t	pad;
+	__u32	handle;
+	__u32	pad;
 };
 
 struct drm_radeon_gem_busy {
-	uint32_t	handle;
-	uint32_t        domain;
+	__u32	handle;
+	__u32        domain;
 };
 
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to read from */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to read */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to write the data into. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 struct drm_radeon_gem_pwrite {
 	/** Handle for the object being written to. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to write to */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to write */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to read the data from. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 /* Sets or returns a value associated with a buffer. */
 struct drm_radeon_gem_op {
-	uint32_t	handle; /* buffer */
-	uint32_t	op;     /* RADEON_GEM_OP_* */
-	uint64_t	value;  /* input or return value */
+	__u32	handle; /* buffer */
+	__u32	op;     /* RADEON_GEM_OP_* */
+	__u64	value;  /* input or return value */
 };
 
 #define RADEON_GEM_OP_GET_INITIAL_DOMAIN	0
@@ -935,11 +935,11 @@
 #define RADEON_VM_PAGE_SNOOPED		(1 << 4)
 
 struct drm_radeon_gem_va {
-	uint32_t		handle;
-	uint32_t		operation;
-	uint32_t		vm_id;
-	uint32_t		flags;
-	uint64_t		offset;
+	__u32		handle;
+	__u32		operation;
+	__u32		vm_id;
+	__u32		flags;
+	__u64		offset;
 };
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
@@ -961,29 +961,29 @@
 /* 0 = normal, + = higher priority, - = lower priority */
 
 struct drm_radeon_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 /* drm_radeon_cs_reloc.flags */
 #define RADEON_RELOC_PRIO_MASK		(0xf << 0)
 
 struct drm_radeon_cs_reloc {
-	uint32_t		handle;
-	uint32_t		read_domains;
-	uint32_t		write_domain;
-	uint32_t		flags;
+	__u32		handle;
+	__u32		read_domains;
+	__u32		write_domain;
+	__u32		flags;
 };
 
 struct drm_radeon_cs {
-	uint32_t		num_chunks;
-	uint32_t		cs_id;
-	/* this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		num_chunks;
+	__u32		cs_id;
+	/* this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 	/* updates to the limits after this CS ioctl */
-	uint64_t		gart_limit;
-	uint64_t		vram_limit;
+	__u64		gart_limit;
+	__u64		vram_limit;
 };
 
 #define RADEON_INFO_DEVICE_ID		0x00
@@ -1042,9 +1042,9 @@
 #define RADEON_INFO_GPU_RESET_COUNTER	0x26
 
 struct drm_radeon_info {
-	uint32_t		request;
-	uint32_t		pad;
-	uint64_t		value;
+	__u32		request;
+	__u32		pad;
+	__u64		value;
 };
 
 /* Those correspond to the tile index to use, this is to explicitly state
diff --git a/include/uapi/drm/savage_drm.h b/include/uapi/drm/savage_drm.h
index 9dc9dc1..5741474 100644
--- a/include/uapi/drm/savage_drm.h
+++ b/include/uapi/drm/savage_drm.h
@@ -26,7 +26,7 @@
 #ifndef __SAVAGE_DRM_H__
 #define __SAVAGE_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #ifndef __SAVAGE_SAREA_DEFINES__
 #define __SAVAGE_SAREA_DEFINES__
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index 5391780..27d0b05 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -23,7 +23,7 @@
 #ifndef _UAPI_TEGRA_DRM_H_
 #define _UAPI_TEGRA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
new file mode 100644
index 0000000..eeb37e3
--- /dev/null
+++ b/include/uapi/drm/vc4_drm.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _UAPI_VC4_DRM_H_
+#define _UAPI_VC4_DRM_H_
+
+#include "drm.h"
+
+#define DRM_VC4_SUBMIT_CL                         0x00
+#define DRM_VC4_WAIT_SEQNO                        0x01
+#define DRM_VC4_WAIT_BO                           0x02
+#define DRM_VC4_CREATE_BO                         0x03
+#define DRM_VC4_MMAP_BO                           0x04
+#define DRM_VC4_CREATE_SHADER_BO                  0x05
+#define DRM_VC4_GET_HANG_STATE                    0x06
+
+#define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+#define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+#define DRM_IOCTL_VC4_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
+#define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
+#define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
+#define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
+
+struct drm_vc4_submit_rcl_surface {
+	__u32 hindex; /* Handle index, or ~0 if not present. */
+	__u32 offset; /* Offset to start of buffer. */
+	/*
+	 * Bits for either render config (color_write) or load/store packet.
+	 * Bits should all be 0 for MSAA load/stores.
+	 */
+	__u16 bits;
+
+#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES		(1 << 0)
+	__u16 flags;
+};
+
+/**
+ * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * Drivers typically use GPU BOs to store batchbuffers / command lists and
+ * their associated state.  However, because the VC4 lacks an MMU, we have to
+ * do validation of memory accesses by the GPU commands.  If we were to store
+ * our commands in BOs, we'd need to do uncached readback from them to do the
+ * validation process, which is too expensive.  Instead, userspace accumulates
+ * commands and associated state in plain memory, then the kernel copies the
+ * data to its own address space, and then validates and stores it in a GPU
+ * BO.
+ */
+struct drm_vc4_submit_cl {
+	/* Pointer to the binner command list.
+	 *
+	 * This is the first set of commands executed, which runs the
+	 * coordinate shader to determine where primitives land on the screen,
+	 * then writes out the state updates and draw calls necessary per tile
+	 * to the tile allocation BO.
+	 */
+	__u64 bin_cl;
+
+	/* Pointer to the shader records.
+	 *
+	 * Shader records are the structures read by the hardware that contain
+	 * pointers to uniforms, shaders, and vertex attributes.  The
+	 * reference to the shader record has enough information to determine
+	 * how many pointers are necessary (fixed number for shaders/uniforms,
+	 * and an attribute count), so those BO indices into bo_handles are
+	 * just stored as __u32s before each shader record passed in.
+	 */
+	__u64 shader_rec;
+
+	/* Pointer to uniform data and texture handles for the textures
+	 * referenced by the shader.
+	 *
+	 * For each shader state record, there is a set of uniform data in the
+	 * order referenced by the record (FS, VS, then CS).  Each set of
+	 * uniform data has a __u32 index into bo_handles per texture
+	 * sample operation, in the order the QPU_W_TMUn_S writes appear in
+	 * the program.  Following the texture BO handle indices is the actual
+	 * uniform data.
+	 *
+	 * The individual uniform state blocks don't have sizes passed in,
+	 * because the kernel has to determine the sizes anyway during shader
+	 * code validation.
+	 */
+	__u64 uniforms;
+	__u64 bo_handles;
+
+	/* Size in bytes of the binner command list. */
+	__u32 bin_cl_size;
+	/* Size in bytes of the set of shader records. */
+	__u32 shader_rec_size;
+	/* Number of shader records.
+	 *
+	 * This could just be computed from the contents of shader_records and
+	 * the address bits of references to them from the bin CL, but it
+	 * keeps the kernel from having to resize some allocations it makes.
+	 */
+	__u32 shader_rec_count;
+	/* Size in bytes of the uniform state. */
+	__u32 uniforms_size;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* RCL setup: */
+	__u16 width;
+	__u16 height;
+	__u8 min_x_tile;
+	__u8 min_y_tile;
+	__u8 max_x_tile;
+	__u8 max_y_tile;
+	struct drm_vc4_submit_rcl_surface color_read;
+	struct drm_vc4_submit_rcl_surface color_write;
+	struct drm_vc4_submit_rcl_surface zs_read;
+	struct drm_vc4_submit_rcl_surface zs_write;
+	struct drm_vc4_submit_rcl_surface msaa_color_write;
+	struct drm_vc4_submit_rcl_surface msaa_zs_write;
+	__u32 clear_color[2];
+	__u32 clear_z;
+	__u8 clear_s;
+
+	__u32 pad:24;
+
+#define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+	__u32 flags;
+
+	/* Returned value of the seqno of this render job (for the
+	 * wait ioctl).
+	 */
+	__u64 seqno;
+};
+
+/**
+ * struct drm_vc4_wait_seqno - ioctl argument for waiting for
+ * DRM_VC4_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc4_wait_seqno {
+	__u64 seqno;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc4_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC4_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc4_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+	/* Size of the data argument. */
+	__u32 size;
+	/* Flags, currently must be 0. */
+	__u32 flags;
+
+	/* Pointer to the data. */
+	__u64 data;
+
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/* Pad, must be 0. */
+	__u32 pad;
+};
+
+struct drm_vc4_get_hang_state_bo {
+	__u32 handle;
+	__u32 paddr;
+	__u32 size;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_hang_state - ioctl argument for collecting state
+ * from a GPU hang for analysis.
+*/
+struct drm_vc4_get_hang_state {
+	/** Pointer to array of struct drm_vc4_get_hang_state_bo. */
+	__u64 bo;
+	/**
+	 * On input, the size of the bo array.  Output is the number
+	 * of bos to be returned.
+	 */
+	__u32 bo_count;
+
+	__u32 start_bin, start_render;
+
+	__u32 ct0ca, ct0ea;
+	__u32 ct1ca, ct1ea;
+	__u32 ct0cs, ct1cs;
+	__u32 ct0ra0, ct1ra0;
+
+	__u32 bpca, bpcs;
+	__u32 bpoa, bpos;
+
+	__u32 vpmbase;
+
+	__u32 dbge;
+	__u32 fdbgo;
+	__u32 fdbgb;
+	__u32 fdbgr;
+	__u32 fdbgs;
+	__u32 errstat;
+
+	/* Pad that we may save more registers into in the future. */
+	__u32 pad[16];
+};
+
+#endif /* _UAPI_VC4_DRM_H_ */
diff --git a/include/uapi/drm/via_drm.h b/include/uapi/drm/via_drm.h
index 45bc80c..fa21ed18 100644
--- a/include/uapi/drm/via_drm.h
+++ b/include/uapi/drm/via_drm.h
@@ -24,7 +24,7 @@
 #ifndef _VIA_DRM_H_
 #define _VIA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
@@ -33,9 +33,6 @@
 #ifndef _VIA_DEFINES_
 #define _VIA_DEFINES_
 
-#ifndef __KERNEL__
-#include "via_drmclient.h"
-#endif
 
 #define VIA_NR_SAREA_CLIPRECTS		8
 #define VIA_NR_XVMC_PORTS               10
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index fc9e2d6..c74f1f9 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -24,13 +24,12 @@
 #ifndef VIRTGPU_DRM_H
 #define VIRTGPU_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -45,88 +44,88 @@
 #define DRM_VIRTGPU_GET_CAPS  0x09
 
 struct drm_virtgpu_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 struct drm_virtgpu_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t size;
-	uint64_t command; /* void* */
-	uint64_t bo_handles;
-	uint32_t num_bo_handles;
-	uint32_t pad;
+	__u32		flags;		/* for future use */
+	__u32 size;
+	__u64 command; /* void* */
+	__u64 bo_handles;
+	__u32 num_bo_handles;
+	__u32 pad;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
 
 struct drm_virtgpu_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* NO_BO flags? NO resource flag? */
 /* resource flag for y_0_top */
 struct drm_virtgpu_resource_create {
-	uint32_t target;
-	uint32_t format;
-	uint32_t bind;
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t array_size;
-	uint32_t last_level;
-	uint32_t nr_samples;
-	uint32_t flags;
-	uint32_t bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
-	uint32_t res_handle;  /* returned by kernel */
-	uint32_t size;        /* validate transfer in the host */
-	uint32_t stride;      /* validate transfer in the host */
+	__u32 target;
+	__u32 format;
+	__u32 bind;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 array_size;
+	__u32 last_level;
+	__u32 nr_samples;
+	__u32 flags;
+	__u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
+	__u32 res_handle;  /* returned by kernel */
+	__u32 size;        /* validate transfer in the host */
+	__u32 stride;      /* validate transfer in the host */
 };
 
 struct drm_virtgpu_resource_info {
-	uint32_t bo_handle;
-	uint32_t res_handle;
-	uint32_t size;
-	uint32_t stride;
+	__u32 bo_handle;
+	__u32 res_handle;
+	__u32 size;
+	__u32 stride;
 };
 
 struct drm_virtgpu_3d_box {
-	uint32_t x;
-	uint32_t y;
-	uint32_t z;
-	uint32_t w;
-	uint32_t h;
-	uint32_t d;
+	__u32 x;
+	__u32 y;
+	__u32 z;
+	__u32 w;
+	__u32 h;
+	__u32 d;
 };
 
 struct drm_virtgpu_3d_transfer_to_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 struct drm_virtgpu_3d_transfer_from_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 #define VIRTGPU_WAIT_NOWAIT 1 /* like it */
 struct drm_virtgpu_3d_wait {
-	uint32_t handle; /* 0 is an invalid handle */
-	uint32_t flags;
+	__u32 handle; /* 0 is an invalid handle */
+	__u32 flags;
 };
 
 struct drm_virtgpu_get_caps {
-	uint32_t cap_set_id;
-	uint32_t cap_set_ver;
-	uint64_t addr;
-	uint32_t size;
-	uint32_t pad;
+	__u32 cap_set_id;
+	__u32 cap_set_ver;
+	__u64 addr;
+	__u32 size;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_VIRTGPU_MAP \
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 05b2049..5b68b4d 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -28,9 +28,7 @@
 #ifndef __VMWGFX_DRM_H__
 #define __VMWGFX_DRM_H__
 
-#ifndef __KERNEL__
-#include <drm/drm.h>
-#endif
+#include "drm.h"
 
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
@@ -111,9 +109,9 @@
  */
 
 struct drm_vmw_getparam_arg {
-	uint64_t value;
-	uint32_t param;
-	uint32_t pad64;
+	__u64 value;
+	__u32 param;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -134,8 +132,8 @@
  */
 
 struct drm_vmw_context_arg {
-	int32_t cid;
-	uint32_t pad64;
+	__s32 cid;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -165,7 +163,7 @@
  * @mip_levels: Number of mip levels for each face.
  * An unused face should have 0 encoded.
  * @size_addr: Address of a user-space array of sruct drm_vmw_size
- * cast to an uint64_t for 32-64 bit compatibility.
+ * cast to an __u64 for 32-64 bit compatibility.
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
@@ -177,12 +175,12 @@
  */
 
 struct drm_vmw_surface_create_req {
-	uint32_t flags;
-	uint32_t format;
-	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
-	uint64_t size_addr;
-	int32_t shareable;
-	int32_t scanout;
+	__u32 flags;
+	__u32 format;
+	__u32 mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	__u64 size_addr;
+	__s32 shareable;
+	__s32 scanout;
 };
 
 /**
@@ -197,7 +195,7 @@
  */
 
 struct drm_vmw_surface_arg {
-	int32_t sid;
+	__s32 sid;
 	enum drm_vmw_handle_type handle_type;
 };
 
@@ -213,10 +211,10 @@
  */
 
 struct drm_vmw_size {
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t pad64;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 pad64;
 };
 
 /**
@@ -284,13 +282,13 @@
 /**
  * struct drm_vmw_execbuf_arg
  *
- * @commands: User-space address of a command buffer cast to an uint64_t.
+ * @commands: User-space address of a command buffer cast to an __u64.
  * @command-size: Size in bytes of the command buffer.
  * @throttle-us: Sleep until software is less than @throttle_us
  * microseconds ahead of hardware. The driver may round this value
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
- * uint64_t.
+ * __u64.
  * @version: Allows expanding the execbuf ioctl parameters without breaking
  * backwards compatibility, since user-space will always tell the kernel
  * which version it uses.
@@ -302,14 +300,14 @@
 #define DRM_VMW_EXECBUF_VERSION 2
 
 struct drm_vmw_execbuf_arg {
-	uint64_t commands;
-	uint32_t command_size;
-	uint32_t throttle_us;
-	uint64_t fence_rep;
-	uint32_t version;
-	uint32_t flags;
-	uint32_t context_handle;
-	uint32_t pad64;
+	__u64 commands;
+	__u32 command_size;
+	__u32 throttle_us;
+	__u64 fence_rep;
+	__u32 version;
+	__u32 flags;
+	__u32 context_handle;
+	__u32 pad64;
 };
 
 /**
@@ -338,12 +336,12 @@
  */
 
 struct drm_vmw_fence_rep {
-	uint32_t handle;
-	uint32_t mask;
-	uint32_t seqno;
-	uint32_t passed_seqno;
-	uint32_t pad64;
-	int32_t error;
+	__u32 handle;
+	__u32 mask;
+	__u32 seqno;
+	__u32 passed_seqno;
+	__u32 pad64;
+	__s32 error;
 };
 
 /*************************************************************************/
@@ -373,8 +371,8 @@
  */
 
 struct drm_vmw_alloc_dmabuf_req {
-	uint32_t size;
-	uint32_t pad64;
+	__u32 size;
+	__u32 pad64;
 };
 
 /**
@@ -391,11 +389,11 @@
  */
 
 struct drm_vmw_dmabuf_rep {
-	uint64_t map_handle;
-	uint32_t handle;
-	uint32_t cur_gmr_id;
-	uint32_t cur_gmr_offset;
-	uint32_t pad64;
+	__u64 map_handle;
+	__u32 handle;
+	__u32 cur_gmr_id;
+	__u32 cur_gmr_offset;
+	__u32 pad64;
 };
 
 /**
@@ -428,8 +426,8 @@
  */
 
 struct drm_vmw_unref_dmabuf_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -452,10 +450,10 @@
  */
 
 struct drm_vmw_rect {
-	int32_t x;
-	int32_t y;
-	uint32_t w;
-	uint32_t h;
+	__s32 x;
+	__s32 y;
+	__u32 w;
+	__u32 h;
 };
 
 /**
@@ -477,21 +475,21 @@
  */
 
 struct drm_vmw_control_stream_arg {
-	uint32_t stream_id;
-	uint32_t enabled;
+	__u32 stream_id;
+	__u32 enabled;
 
-	uint32_t flags;
-	uint32_t color_key;
+	__u32 flags;
+	__u32 color_key;
 
-	uint32_t handle;
-	uint32_t offset;
-	int32_t format;
-	uint32_t size;
-	uint32_t width;
-	uint32_t height;
-	uint32_t pitch[3];
+	__u32 handle;
+	__u32 offset;
+	__s32 format;
+	__u32 size;
+	__u32 width;
+	__u32 height;
+	__u32 pitch[3];
 
-	uint32_t pad64;
+	__u32 pad64;
 	struct drm_vmw_rect src;
 	struct drm_vmw_rect dst;
 };
@@ -519,12 +517,12 @@
  */
 
 struct drm_vmw_cursor_bypass_arg {
-	uint32_t flags;
-	uint32_t crtc_id;
-	int32_t xpos;
-	int32_t ypos;
-	int32_t xhot;
-	int32_t yhot;
+	__u32 flags;
+	__u32 crtc_id;
+	__s32 xpos;
+	__s32 ypos;
+	__s32 xhot;
+	__s32 yhot;
 };
 
 /*************************************************************************/
@@ -542,8 +540,8 @@
  */
 
 struct drm_vmw_stream_arg {
-	uint32_t stream_id;
-	uint32_t pad64;
+	__u32 stream_id;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -565,7 +563,7 @@
 /**
  * struct drm_vmw_get_3d_cap_arg
  *
- * @buffer: Pointer to a buffer for capability data, cast to an uint64_t
+ * @buffer: Pointer to a buffer for capability data, cast to an __u64
  * @size: Max size to copy
  *
  * Input argument to the DRM_VMW_GET_3D_CAP_IOCTL
@@ -573,9 +571,9 @@
  */
 
 struct drm_vmw_get_3d_cap_arg {
-	uint64_t buffer;
-	uint32_t max_size;
-	uint32_t pad64;
+	__u64 buffer;
+	__u32 max_size;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -624,14 +622,14 @@
  */
 
 struct drm_vmw_fence_wait_arg {
-	uint32_t handle;
-	int32_t  cookie_valid;
-	uint64_t kernel_cookie;
-	uint64_t timeout_us;
-	int32_t lazy;
-	int32_t flags;
-	int32_t wait_options;
-	int32_t pad64;
+	__u32 handle;
+	__s32  cookie_valid;
+	__u64 kernel_cookie;
+	__u64 timeout_us;
+	__s32 lazy;
+	__s32 flags;
+	__s32 wait_options;
+	__s32 pad64;
 };
 
 /*************************************************************************/
@@ -655,12 +653,12 @@
  */
 
 struct drm_vmw_fence_signaled_arg {
-	 uint32_t handle;
-	 uint32_t flags;
-	 int32_t signaled;
-	 uint32_t passed_seqno;
-	 uint32_t signaled_flags;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 flags;
+	 __s32 signaled;
+	 __u32 passed_seqno;
+	 __u32 signaled_flags;
+	 __u32 pad64;
 };
 
 /*************************************************************************/
@@ -681,8 +679,8 @@
  */
 
 struct drm_vmw_fence_arg {
-	 uint32_t handle;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 pad64;
 };
 
 
@@ -703,9 +701,9 @@
 
 struct drm_vmw_event_fence {
 	struct drm_event base;
-	uint64_t user_data;
-	uint32_t tv_sec;
-	uint32_t tv_usec;
+	__u64 user_data;
+	__u32 tv_sec;
+	__u32 tv_usec;
 };
 
 /*
@@ -717,17 +715,17 @@
 /**
  * struct drm_vmw_fence_event_arg
  *
- * @fence_rep: Pointer to fence_rep structure cast to uint64_t or 0 if
+ * @fence_rep: Pointer to fence_rep structure cast to __u64 or 0 if
  * the fence is not supposed to be referenced by user-space.
  * @user_info: Info to be delivered with the event.
  * @handle: Attach the event to this fence only.
  * @flags: A set of flags as defined above.
  */
 struct drm_vmw_fence_event_arg {
-	uint64_t fence_rep;
-	uint64_t user_data;
-	uint32_t handle;
-	uint32_t flags;
+	__u64 fence_rep;
+	__u64 user_data;
+	__u32 handle;
+	__u32 flags;
 };
 
 
@@ -747,7 +745,7 @@
  * @sid: Surface id to present from.
  * @dest_x: X placement coordinate for surface.
  * @dest_y: Y placement coordinate for surface.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
  * @num_clips: Number of cliprects given relative to the framebuffer origin,
  * in the same coordinate space as the frame buffer.
  * @pad64: Unused 64-bit padding.
@@ -756,13 +754,13 @@
  */
 
 struct drm_vmw_present_arg {
-	uint32_t fb_id;
-	uint32_t sid;
-	int32_t dest_x;
-	int32_t dest_y;
-	uint64_t clips_ptr;
-	uint32_t num_clips;
-	uint32_t pad64;
+	__u32 fb_id;
+	__u32 sid;
+	__s32 dest_x;
+	__s32 dest_y;
+	__u64 clips_ptr;
+	__u32 num_clips;
+	__u32 pad64;
 };
 
 
@@ -780,16 +778,16 @@
  * struct drm_vmw_present_arg
  * @fb_id: fb_id to present / read back from.
  * @num_clips: Number of cliprects.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
- * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
+ * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an __u64.
  * If this member is NULL, then the ioctl should not return a fence.
  */
 
 struct drm_vmw_present_readback_arg {
-	 uint32_t fb_id;
-	 uint32_t num_clips;
-	 uint64_t clips_ptr;
-	 uint64_t fence_rep;
+	 __u32 fb_id;
+	 __u32 num_clips;
+	 __u64 clips_ptr;
+	 __u64 fence_rep;
 };
 
 /*************************************************************************/
@@ -805,14 +803,14 @@
  * struct drm_vmw_update_layout_arg
  *
  * @num_outputs: number of active connectors
- * @rects: pointer to array of drm_vmw_rect cast to an uint64_t
+ * @rects: pointer to array of drm_vmw_rect cast to an __u64
  *
  * Input argument to the DRM_VMW_UPDATE_LAYOUT Ioctl.
  */
 struct drm_vmw_update_layout_arg {
-	uint32_t num_outputs;
-	uint32_t pad64;
-	uint64_t rects;
+	__u32 num_outputs;
+	__u32 pad64;
+	__u64 rects;
 };
 
 
@@ -849,10 +847,10 @@
  */
 struct drm_vmw_shader_create_arg {
 	enum drm_vmw_shader_type shader_type;
-	uint32_t size;
-	uint32_t buffer_handle;
-	uint32_t shader_handle;
-	uint64_t offset;
+	__u32 size;
+	__u32 buffer_handle;
+	__u32 shader_handle;
+	__u64 offset;
 };
 
 /*************************************************************************/
@@ -871,8 +869,8 @@
  * Input argument to the DRM_VMW_UNREF_SHADER ioctl.
  */
 struct drm_vmw_shader_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -918,14 +916,14 @@
  * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
  */
 struct drm_vmw_gb_surface_create_req {
-	uint32_t svga3d_flags;
-	uint32_t format;
-	uint32_t mip_levels;
+	__u32 svga3d_flags;
+	__u32 format;
+	__u32 mip_levels;
 	enum drm_vmw_surface_flags drm_surface_flags;
-	uint32_t multisample_count;
-	uint32_t autogen_filter;
-	uint32_t buffer_handle;
-	uint32_t array_size;
+	__u32 multisample_count;
+	__u32 autogen_filter;
+	__u32 buffer_handle;
+	__u32 array_size;
 	struct drm_vmw_size base_size;
 };
 
@@ -944,11 +942,11 @@
  * Output argument for the DRM_VMW_GB_SURFACE_CREATE ioctl.
  */
 struct drm_vmw_gb_surface_create_rep {
-	uint32_t handle;
-	uint32_t backup_size;
-	uint32_t buffer_handle;
-	uint32_t buffer_size;
-	uint64_t buffer_map_handle;
+	__u32 handle;
+	__u32 backup_size;
+	__u32 buffer_handle;
+	__u32 buffer_size;
+	__u64 buffer_map_handle;
 };
 
 /**
@@ -1061,8 +1059,8 @@
 struct drm_vmw_synccpu_arg {
 	enum drm_vmw_synccpu_op op;
 	enum drm_vmw_synccpu_flags flags;
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
diff --git a/include/uapi/linux/agpgart.h b/include/uapi/linux/agpgart.h
index 4e828cf..f525104 100644
--- a/include/uapi/linux/agpgart.h
+++ b/include/uapi/linux/agpgart.h
@@ -52,6 +52,7 @@
 
 #ifndef __KERNEL__
 #include <linux/types.h>
+#include <stdlib.h>
 
 struct agp_version {
 	__u16 major;
diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 654bae3..5e62961 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -33,17 +33,6 @@
 
 #define NFS_PIPE_DIRNAME "nfs"
 
-/* NFS ioctls */
-/* Let's follow btrfs lead on CLONE to avoid messing userspace */
-#define NFS_IOC_CLONE		_IOW(0x94, 9, int)
-#define NFS_IOC_CLONE_RANGE	_IOW(0x94, 13, int)
-
-struct nfs_ioctl_clone_range_args {
-	__s64 src_fd;
-	__u64 src_off, count;
-	__u64 dst_off;
-};
-
 /*
  * NFS stats. The good thing with these values is that NFSv3 errors are
  * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 7a63faa..4b04ead 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -287,7 +287,7 @@
 /* VIRTIO_GPU_RESP_OK_CAPSET */
 struct virtio_gpu_resp_capset {
 	struct virtio_gpu_ctrl_hdr hdr;
-	uint8_t capset_data[];
+	__u8 capset_data[];
 };
 
 #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 85dedca..eeba753 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -343,7 +343,6 @@
 	int di;
 	int dc;
 	int dp;
-	int dmfc;
 	int dma[2];
 };
 
diff --git a/kernel/async.c b/kernel/async.c
index 4c3773c..d2edd6e 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -326,3 +326,4 @@
 
 	return worker && worker->current_func == async_run_entry_fn;
 }
+EXPORT_SYMBOL_GPL(current_is_async);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3f4c99e..b0799bc 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -28,11 +28,17 @@
 	    attr->value_size == 0)
 		return ERR_PTR(-EINVAL);
 
+	if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements.
+		 */
+		return ERR_PTR(-E2BIG);
+
 	elem_size = round_up(attr->value_size, 8);
 
 	/* check round_up into zero and u32 overflow */
 	if (elem_size == 0 ||
-	    attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
+	    attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
 		return ERR_PTR(-ENOMEM);
 
 	array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@
 		/* all elements already exist */
 		return -EEXIST;
 
-	memcpy(array->value + array->elem_size * index, value, array->elem_size);
+	memcpy(array->value + array->elem_size * index, value, map->value_size);
 	return 0;
 }
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 19909b2..34777b37 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -64,12 +64,35 @@
 		 */
 		goto free_htab;
 
-	err = -ENOMEM;
+	if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+	    MAX_BPF_STACK - sizeof(struct htab_elem))
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements via bpf syscall. This check also makes
+		 * sure that the elem_size doesn't overflow and it's
+		 * kmalloc-able later in htab_map_update_elem()
+		 */
+		goto free_htab;
+
+	htab->elem_size = sizeof(struct htab_elem) +
+			  round_up(htab->map.key_size, 8) +
+			  htab->map.value_size;
+
 	/* prevent zero size kmalloc and check for u32 overflow */
 	if (htab->n_buckets == 0 ||
 	    htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
 		goto free_htab;
 
+	if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
+	    (u64) htab->elem_size * htab->map.max_entries >=
+	    U32_MAX - PAGE_SIZE)
+		/* make sure page count doesn't overflow */
+		goto free_htab;
+
+	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+				   htab->elem_size * htab->map.max_entries,
+				   PAGE_SIZE) >> PAGE_SHIFT;
+
+	err = -ENOMEM;
 	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
 				      GFP_USER | __GFP_NOWARN);
 
@@ -85,13 +108,6 @@
 	raw_spin_lock_init(&htab->lock);
 	htab->count = 0;
 
-	htab->elem_size = sizeof(struct htab_elem) +
-			  round_up(htab->map.key_size, 8) +
-			  htab->map.value_size;
-
-	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
-				   htab->elem_size * htab->map.max_entries,
-				   PAGE_SIZE) >> PAGE_SHIFT;
 	return &htab->map;
 
 free_htab:
@@ -222,7 +238,7 @@
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* allocate new element outside of lock */
-	l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
+	l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
 	if (!l_new)
 		return -ENOMEM;
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726..5a8a797 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@
 		atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
 		break;
 	case BPF_TYPE_MAP:
-		atomic_inc(&((struct bpf_map *)raw)->refcnt);
+		bpf_map_inc(raw, true);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@
 		bpf_prog_put(raw);
 		break;
 	case BPF_TYPE_MAP:
-		bpf_map_put(raw);
+		bpf_map_put_with_uref(raw);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@
 	void *raw;
 
 	*type = BPF_TYPE_MAP;
-	raw = bpf_map_get(ufd);
+	raw = bpf_map_get_with_uref(ufd);
 	if (IS_ERR(raw)) {
 		*type = BPF_TYPE_PROG;
 		raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d..3b39550 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@
 	map->ops->map_free(map);
 }
 
+static void bpf_map_put_uref(struct bpf_map *map)
+{
+	if (atomic_dec_and_test(&map->usercnt)) {
+		if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+			bpf_fd_array_map_clear(map);
+	}
+}
+
 /* decrement map refcnt and schedule it for freeing via workqueue
  * (unrelying map implementation ops->map_free() might sleep)
  */
@@ -93,17 +101,15 @@
 	}
 }
 
+void bpf_map_put_with_uref(struct bpf_map *map)
+{
+	bpf_map_put_uref(map);
+	bpf_map_put(map);
+}
+
 static int bpf_map_release(struct inode *inode, struct file *filp)
 {
-	struct bpf_map *map = filp->private_data;
-
-	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
-		/* prog_array stores refcnt-ed bpf_prog pointers
-		 * release them all when user space closes prog_array_fd
-		 */
-		bpf_fd_array_map_clear(map);
-
-	bpf_map_put(map);
+	bpf_map_put_with_uref(filp->private_data);
 	return 0;
 }
 
@@ -142,6 +148,7 @@
 		return PTR_ERR(map);
 
 	atomic_set(&map->refcnt, 1);
+	atomic_set(&map->usercnt, 1);
 
 	err = bpf_map_charge_memlock(map);
 	if (err)
@@ -174,7 +181,14 @@
 	return f.file->private_data;
 }
 
-struct bpf_map *bpf_map_get(u32 ufd)
+void bpf_map_inc(struct bpf_map *map, bool uref)
+{
+	atomic_inc(&map->refcnt);
+	if (uref)
+		atomic_inc(&map->usercnt);
+}
+
+struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_map *map;
@@ -183,7 +197,7 @@
 	if (IS_ERR(map))
 		return map;
 
-	atomic_inc(&map->refcnt);
+	bpf_map_inc(map, true);
 	fdput(f);
 
 	return map;
@@ -226,7 +240,7 @@
 		goto free_key;
 
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER);
+	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -285,7 +299,7 @@
 		goto free_key;
 
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER);
+	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c607305..a7945d1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@
 			 * will be used by the valid program until it's unloaded
 			 * and all maps are released in free_bpf_prog_info()
 			 */
-			atomic_inc(&map->refcnt);
-
+			bpf_map_inc(map, false);
 			fdput(f);
 next_insn:
 			insn++;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 6e53441..db545cb 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -294,6 +294,12 @@
 
 	for (reloc = obj->relocs; reloc->name; reloc++) {
 		if (!klp_is_module(obj)) {
+
+#if defined(CONFIG_RANDOMIZE_BASE)
+			/* If KASLR has been enabled, adjust old value accordingly */
+			if (kaslr_enabled())
+				reloc->val += kaslr_offset();
+#endif
 			ret = klp_verify_vmlinux_symbol(reloc->name,
 							reloc->val);
 			if (ret)
diff --git a/kernel/panic.c b/kernel/panic.c
index 4579dbb..4b150bc 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -152,8 +152,11 @@
 	 * We may have ended up stopping the CPU holding the lock (in
 	 * smp_send_stop()) while still having some valuable data in the console
 	 * buffer.  Try to acquire the lock then release it regardless of the
-	 * result.  The release will also print the buffers out.
+	 * result.  The release will also print the buffers out.  Locks debug
+	 * should be disabled to avoid reporting bad unlock balance when
+	 * panic() is not being callled from OOPS.
 	 */
+	debug_locks_off();
 	console_trylock();
 	console_unlock();
 
diff --git a/kernel/pid.c b/kernel/pid.c
index ca36879..78b3d9f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -467,7 +467,7 @@
 	rcu_read_lock();
 	if (type != PIDTYPE_PID)
 		task = task->group_leader;
-	pid = get_pid(task->pids[type].pid);
+	pid = get_pid(rcu_dereference(task->pids[type].pid));
 	rcu_read_unlock();
 	return pid;
 }
@@ -528,7 +528,7 @@
 	if (likely(pid_alive(task))) {
 		if (type != PIDTYPE_PID)
 			task = task->group_leader;
-		nr = pid_nr_ns(task->pids[type].pid, ns);
+		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
 	}
 	rcu_read_unlock();
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac..7063c6a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1947,13 +1947,38 @@
 
 #ifdef CONFIG_SMP
 	/*
+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+	 * possible to, falsely, observe p->on_cpu == 0.
+	 *
+	 * One must be running (->on_cpu == 1) in order to remove oneself
+	 * from the runqueue.
+	 *
+	 *  [S] ->on_cpu = 1;	[L] ->on_rq
+	 *      UNLOCK rq->lock
+	 *			RMB
+	 *      LOCK   rq->lock
+	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 *
+	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+	 * from the consecutive calls to schedule(); the first switching to our
+	 * task, the second putting it to sleep.
+	 */
+	smp_rmb();
+
+	/*
 	 * If the owning (remote) cpu is still in the middle of schedule() with
 	 * this task as prev, wait until its done referencing the task.
 	 */
 	while (p->on_cpu)
 		cpu_relax();
 	/*
-	 * Pairs with the smp_wmb() in finish_lock_switch().
+	 * Combined with the control dependency above, we have an effective
+	 * smp_load_acquire() without the need for full barriers.
+	 *
+	 * Pairs with the smp_store_release() in finish_lock_switch().
+	 *
+	 * This ensures that tasks getting woken will be fully ordered against
+	 * their previous state and preserve Program Order.
 	 */
 	smp_rmb();
 
@@ -2039,7 +2064,6 @@
  */
 int wake_up_process(struct task_struct *p)
 {
-	WARN_ON(task_is_stopped_or_traced(p));
 	return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
@@ -5847,13 +5871,13 @@
 {
 	memset(rd, 0, sizeof(*rd));
 
-	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
 		goto out;
-	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
 		goto free_span;
-	if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
 		goto free_online;
-	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
 		goto free_dlo_mask;
 
 	init_dl_bw(&rd->dl_bw);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 26a5446..05de80b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -788,6 +788,9 @@
 	unsigned int seq;
 	cputime_t gtime;
 
+	if (!context_tracking_is_enabled())
+		return t->gtime;
+
 	do {
 		seq = read_seqbegin(&t->vtime_seqlock);
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e3cc163..8ec86ab 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -64,7 +64,7 @@
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
 static void push_irq_work_func(struct irq_work *work);
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efd3bfc..b242775 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1073,6 +1073,9 @@
 	 * We must ensure this doesn't happen until the switch is completely
 	 * finished.
 	 *
+	 * In particular, the load of prev->state in finish_task_switch() must
+	 * happen before this.
+	 *
 	 * Pairs with the control dependency and rmb in try_to_wake_up().
 	 */
 	smp_store_release(&prev->on_cpu, 0);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 052e026..f10bd87 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -583,18 +583,18 @@
 
 __sched int bit_wait(struct wait_bit_key *word)
 {
-	if (signal_pending_state(current->state, current))
-		return 1;
 	schedule();
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait);
 
 __sched int bit_wait_io(struct wait_bit_key *word)
 {
-	if (signal_pending_state(current->state, current))
-		return 1;
 	io_schedule();
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait_io);
@@ -602,11 +602,11 @@
 __sched int bit_wait_timeout(struct wait_bit_key *word)
 {
 	unsigned long now = READ_ONCE(jiffies);
-	if (signal_pending_state(current->state, current))
-		return 1;
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	schedule_timeout(word->timeout - now);
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_timeout);
@@ -614,11 +614,11 @@
 __sched int bit_wait_io_timeout(struct wait_bit_key *word)
 {
 	unsigned long now = READ_ONCE(jiffies);
-	if (signal_pending_state(current->state, current))
-		return 1;
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	io_schedule_timeout(word->timeout - now);
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/signal.c b/kernel/signal.c
index c0b01fe..f3f1f7a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3503,7 +3503,7 @@
 
 #endif
 
-int sigsuspend(sigset_t *set)
+static int sigsuspend(sigset_t *set)
 {
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 75f1d05..9c6045a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,12 +1887,6 @@
 	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 
-static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
-	cpu_buffer->reader_page->read = 0;
-}
-
 static void rb_inc_iter(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -2803,8 +2797,11 @@
 
 	event = __rb_reserve_next(cpu_buffer, &info);
 
-	if (unlikely(PTR_ERR(event) == -EAGAIN))
+	if (unlikely(PTR_ERR(event) == -EAGAIN)) {
+		if (info.add_timestamp)
+			info.length -= RB_LEN_TIME_EXTEND;
 		goto again;
+	}
 
 	if (!event)
 		goto out_fail;
@@ -3626,7 +3623,7 @@
 
 	/* Finally update the reader page to the new head */
 	cpu_buffer->reader_page = reader;
-	rb_reset_reader_page(cpu_buffer);
+	cpu_buffer->reader_page->read = 0;
 
 	if (overwrite != cpu_buffer->last_overrun) {
 		cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
@@ -3636,6 +3633,10 @@
 	goto again;
 
  out:
+	/* Update the read_stamp on the first event */
+	if (reader && reader->read == 0)
+		cpu_buffer->read_stamp = reader->page->time_stamp;
+
 	arch_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6bbc5f6..4f6ef69 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -582,6 +582,12 @@
 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 
+	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
+
+	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
+
 	list_for_each_entry(file, &tr->events, list) {
 		clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 	}
@@ -1729,6 +1735,16 @@
 						 tr, INT_MAX);
 		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
 						 tr, 0);
+
+		register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
+						     tr, INT_MAX);
+		register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
+						     tr, 0);
+
+		register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
+						 tr, 0);
 	}
 
 	/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c29ddeb..62fe06b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2009,7 +2009,7 @@
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
+		if (*vm_flags & VM_NO_THP)
 			return -EINVAL;
 		*vm_flags &= ~VM_NOHUGEPAGE;
 		*vm_flags |= VM_HUGEPAGE;
@@ -2025,7 +2025,7 @@
 		/*
 		 * Be somewhat over-protective like KSM for now!
 		 */
-		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
+		if (*vm_flags & VM_NO_THP)
 			return -EINVAL;
 		*vm_flags &= ~VM_HUGEPAGE;
 		*vm_flags |= VM_NOHUGEPAGE;
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index d41b21b..bc0a8d8 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kmemleak.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
@@ -444,6 +445,7 @@
 
 	if (ret) {
 		find_vm_area(addr)->flags |= VM_KASAN;
+		kmemleak_ignore(ret);
 		return 0;
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index deb679c..c387430 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3015,9 +3015,9 @@
 		} else {
 			/*
 			 * The fault handler has no page to lock, so it holds
-			 * i_mmap_lock for write to protect against truncate.
+			 * i_mmap_lock for read to protect against truncate.
 			 */
-			i_mmap_unlock_write(vma->vm_file->f_mapping);
+			i_mmap_unlock_read(vma->vm_file->f_mapping);
 		}
 		goto uncharge_out;
 	}
@@ -3031,9 +3031,9 @@
 	} else {
 		/*
 		 * The fault handler has no page to lock, so it holds
-		 * i_mmap_lock for write to protect against truncate.
+		 * i_mmap_lock for read to protect against truncate.
 		 */
-		i_mmap_unlock_write(vma->vm_file->f_mapping);
+		i_mmap_unlock_read(vma->vm_file->f_mapping);
 	}
 	return ret;
 uncharge_out:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2c90357..3e4d654 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1542,7 +1542,9 @@
 	for (;;) {
 		unsigned long now = jiffies;
 		unsigned long dirty, thresh, bg_thresh;
-		unsigned long m_dirty, m_thresh, m_bg_thresh;
+		unsigned long m_dirty = 0;	/* stop bogus uninit warnings */
+		unsigned long m_thresh = 0;
+		unsigned long m_bg_thresh = 0;
 
 		/*
 		 * Unstable writes are a feature of certain networked
diff --git a/mm/slab.c b/mm/slab.c
index e0819fa..4765c97 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3419,7 +3419,7 @@
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slab.h b/mm/slab.h
index 27492eb..7b60871 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -170,7 +170,7 @@
  * may be allocated or freed using these operations.
  */
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-bool __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
 #ifdef CONFIG_MEMCG_KMEM
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d88e97c..3c6a86b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -112,7 +112,7 @@
 		kmem_cache_free(s, p[i]);
 }
 
-bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
+int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 								void **p)
 {
 	size_t i;
@@ -121,10 +121,10 @@
 		void *x = p[i] = kmem_cache_alloc(s, flags);
 		if (!x) {
 			__kmem_cache_free_bulk(s, i, p);
-			return false;
+			return 0;
 		}
 	}
-	return true;
+	return i;
 }
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/slob.c b/mm/slob.c
index 0d7e5df..17e8f8c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -617,7 +617,7 @@
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 								void **p)
 {
 	return __kmem_cache_alloc_bulk(s, flags, size, p);
diff --git a/mm/slub.c b/mm/slub.c
index 7cb4bf9..4699751 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1065,11 +1065,15 @@
 	return 0;
 }
 
+/* Supports checking bulk free of a constructed freelist */
 static noinline struct kmem_cache_node *free_debug_processing(
-	struct kmem_cache *s, struct page *page, void *object,
+	struct kmem_cache *s, struct page *page,
+	void *head, void *tail, int bulk_cnt,
 	unsigned long addr, unsigned long *flags)
 {
 	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+	void *object = head;
+	int cnt = 0;
 
 	spin_lock_irqsave(&n->list_lock, *flags);
 	slab_lock(page);
@@ -1077,6 +1081,9 @@
 	if (!check_slab(s, page))
 		goto fail;
 
+next_object:
+	cnt++;
+
 	if (!check_valid_pointer(s, page, object)) {
 		slab_err(s, page, "Invalid object pointer 0x%p", object);
 		goto fail;
@@ -1107,8 +1114,19 @@
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
 	trace(s, page, object, 0);
+	/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
 	init_object(s, object, SLUB_RED_INACTIVE);
+
+	/* Reached end of constructed freelist yet? */
+	if (object != tail) {
+		object = get_freepointer(s, object);
+		goto next_object;
+	}
 out:
+	if (cnt != bulk_cnt)
+		slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
+			 bulk_cnt, cnt);
+
 	slab_unlock(page);
 	/*
 	 * Keep node_lock to preserve integrity
@@ -1204,7 +1222,7 @@
 
 	return flags;
 }
-#else
+#else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,
 			struct page *page, void *object) {}
 
@@ -1212,7 +1230,8 @@
 	struct page *page, void *object, unsigned long addr) { return 0; }
 
 static inline struct kmem_cache_node *free_debug_processing(
-	struct kmem_cache *s, struct page *page, void *object,
+	struct kmem_cache *s, struct page *page,
+	void *head, void *tail, int bulk_cnt,
 	unsigned long addr, unsigned long *flags) { return NULL; }
 
 static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
@@ -1273,14 +1292,21 @@
 	return memcg_kmem_get_cache(s, flags);
 }
 
-static inline void slab_post_alloc_hook(struct kmem_cache *s,
-					gfp_t flags, void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+					size_t size, void **p)
 {
+	size_t i;
+
 	flags &= gfp_allowed_mask;
-	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
-	kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
+	for (i = 0; i < size; i++) {
+		void *object = p[i];
+
+		kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
+		kmemleak_alloc_recursive(object, s->object_size, 1,
+					 s->flags, flags);
+		kasan_slab_alloc(s, object);
+	}
 	memcg_kmem_put_cache(s);
-	kasan_slab_alloc(s, object);
 }
 
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -1308,6 +1334,29 @@
 	kasan_slab_free(s, x);
 }
 
+static inline void slab_free_freelist_hook(struct kmem_cache *s,
+					   void *head, void *tail)
+{
+/*
+ * Compiler cannot detect this function can be removed if slab_free_hook()
+ * evaluates to nothing.  Thus, catch all relevant config debug options here.
+ */
+#if defined(CONFIG_KMEMCHECK) ||		\
+	defined(CONFIG_LOCKDEP)	||		\
+	defined(CONFIG_DEBUG_KMEMLEAK) ||	\
+	defined(CONFIG_DEBUG_OBJECTS_FREE) ||	\
+	defined(CONFIG_KASAN)
+
+	void *object = head;
+	void *tail_obj = tail ? : head;
+
+	do {
+		slab_free_hook(s, object);
+	} while ((object != tail_obj) &&
+		 (object = get_freepointer(s, object)));
+#endif
+}
+
 static void setup_object(struct kmem_cache *s, struct page *page,
 				void *object)
 {
@@ -2295,23 +2344,15 @@
  * And if we were unable to get a new slab from the partial slab lists then
  * we need to allocate a new slab. This is the slowest path since it involves
  * a call to the page allocator and the setup of a new slab.
+ *
+ * Version of __slab_alloc to use when we know that interrupts are
+ * already disabled (which is the case for bulk allocation).
  */
-static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 			  unsigned long addr, struct kmem_cache_cpu *c)
 {
 	void *freelist;
 	struct page *page;
-	unsigned long flags;
-
-	local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
-	/*
-	 * We may have been preempted and rescheduled on a different
-	 * cpu before disabling interrupts. Need to reload cpu area
-	 * pointer.
-	 */
-	c = this_cpu_ptr(s->cpu_slab);
-#endif
 
 	page = c->page;
 	if (!page)
@@ -2369,7 +2410,6 @@
 	VM_BUG_ON(!c->page->frozen);
 	c->freelist = get_freepointer(s, freelist);
 	c->tid = next_tid(c->tid);
-	local_irq_restore(flags);
 	return freelist;
 
 new_slab:
@@ -2386,7 +2426,6 @@
 
 	if (unlikely(!freelist)) {
 		slab_out_of_memory(s, gfpflags, node);
-		local_irq_restore(flags);
 		return NULL;
 	}
 
@@ -2402,11 +2441,35 @@
 	deactivate_slab(s, page, get_freepointer(s, freelist));
 	c->page = NULL;
 	c->freelist = NULL;
-	local_irq_restore(flags);
 	return freelist;
 }
 
 /*
+ * Another one that disabled interrupt and compensates for possible
+ * cpu changes by refetching the per cpu area pointer.
+ */
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+			  unsigned long addr, struct kmem_cache_cpu *c)
+{
+	void *p;
+	unsigned long flags;
+
+	local_irq_save(flags);
+#ifdef CONFIG_PREEMPT
+	/*
+	 * We may have been preempted and rescheduled on a different
+	 * cpu before disabling interrupts. Need to reload cpu area
+	 * pointer.
+	 */
+	c = this_cpu_ptr(s->cpu_slab);
+#endif
+
+	p = ___slab_alloc(s, gfpflags, node, addr, c);
+	local_irq_restore(flags);
+	return p;
+}
+
+/*
  * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
  * have the fastpath folded into their functions. So no function call
  * overhead for requests that can be satisfied on the fastpath.
@@ -2419,7 +2482,7 @@
 static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 		gfp_t gfpflags, int node, unsigned long addr)
 {
-	void **object;
+	void *object;
 	struct kmem_cache_cpu *c;
 	struct page *page;
 	unsigned long tid;
@@ -2498,7 +2561,7 @@
 	if (unlikely(gfpflags & __GFP_ZERO) && object)
 		memset(object, 0, s->object_size);
 
-	slab_post_alloc_hook(s, gfpflags, object);
+	slab_post_alloc_hook(s, gfpflags, 1, &object);
 
 	return object;
 }
@@ -2569,10 +2632,11 @@
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-			void *x, unsigned long addr)
+			void *head, void *tail, int cnt,
+			unsigned long addr)
+
 {
 	void *prior;
-	void **object = (void *)x;
 	int was_frozen;
 	struct page new;
 	unsigned long counters;
@@ -2582,7 +2646,8 @@
 	stat(s, FREE_SLOWPATH);
 
 	if (kmem_cache_debug(s) &&
-		!(n = free_debug_processing(s, page, x, addr, &flags)))
+	    !(n = free_debug_processing(s, page, head, tail, cnt,
+					addr, &flags)))
 		return;
 
 	do {
@@ -2592,10 +2657,10 @@
 		}
 		prior = page->freelist;
 		counters = page->counters;
-		set_freepointer(s, object, prior);
+		set_freepointer(s, tail, prior);
 		new.counters = counters;
 		was_frozen = new.frozen;
-		new.inuse--;
+		new.inuse -= cnt;
 		if ((!new.inuse || !prior) && !was_frozen) {
 
 			if (kmem_cache_has_cpu_partial(s) && !prior) {
@@ -2626,7 +2691,7 @@
 
 	} while (!cmpxchg_double_slab(s, page,
 		prior, counters,
-		object, new.counters,
+		head, new.counters,
 		"__slab_free"));
 
 	if (likely(!n)) {
@@ -2691,15 +2756,20 @@
  *
  * If fastpath is not possible then fall back to __slab_free where we deal
  * with all sorts of special processing.
+ *
+ * Bulk free of a freelist with several objects (all pointing to the
+ * same page) possible by specifying head and tail ptr, plus objects
+ * count (cnt). Bulk free indicated by tail pointer being set.
  */
-static __always_inline void slab_free(struct kmem_cache *s,
-			struct page *page, void *x, unsigned long addr)
+static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+				      void *head, void *tail, int cnt,
+				      unsigned long addr)
 {
-	void **object = (void *)x;
+	void *tail_obj = tail ? : head;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
 
-	slab_free_hook(s, x);
+	slab_free_freelist_hook(s, head, tail);
 
 redo:
 	/*
@@ -2718,19 +2788,19 @@
 	barrier();
 
 	if (likely(page == c->page)) {
-		set_freepointer(s, object, c->freelist);
+		set_freepointer(s, tail_obj, c->freelist);
 
 		if (unlikely(!this_cpu_cmpxchg_double(
 				s->cpu_slab->freelist, s->cpu_slab->tid,
 				c->freelist, tid,
-				object, next_tid(tid)))) {
+				head, next_tid(tid)))) {
 
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
 		stat(s, FREE_FASTPATH);
 	} else
-		__slab_free(s, page, x, addr);
+		__slab_free(s, page, head, tail_obj, cnt, addr);
 
 }
 
@@ -2739,59 +2809,116 @@
 	s = cache_from_obj(s, x);
 	if (!s)
 		return;
-	slab_free(s, virt_to_head_page(x), x, _RET_IP_);
+	slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
 	trace_kmem_cache_free(_RET_IP_, x);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-/* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
-{
-	struct kmem_cache_cpu *c;
+struct detached_freelist {
 	struct page *page;
-	int i;
+	void *tail;
+	void *freelist;
+	int cnt;
+};
 
-	local_irq_disable();
-	c = this_cpu_ptr(s->cpu_slab);
+/*
+ * This function progressively scans the array with free objects (with
+ * a limited look ahead) and extract objects belonging to the same
+ * page.  It builds a detached freelist directly within the given
+ * page/objects.  This can happen without any need for
+ * synchronization, because the objects are owned by running process.
+ * The freelist is build up as a single linked list in the objects.
+ * The idea is, that this detached freelist can then be bulk
+ * transferred to the real freelist(s), but only requiring a single
+ * synchronization primitive.  Look ahead in the array is limited due
+ * to performance reasons.
+ */
+static int build_detached_freelist(struct kmem_cache *s, size_t size,
+				   void **p, struct detached_freelist *df)
+{
+	size_t first_skipped_index = 0;
+	int lookahead = 3;
+	void *object;
 
-	for (i = 0; i < size; i++) {
-		void *object = p[i];
+	/* Always re-init detached_freelist */
+	df->page = NULL;
 
-		BUG_ON(!object);
-		/* kmem cache debug support */
-		s = cache_from_obj(s, object);
-		if (unlikely(!s))
-			goto exit;
-		slab_free_hook(s, object);
+	do {
+		object = p[--size];
+	} while (!object && size);
 
-		page = virt_to_head_page(object);
+	if (!object)
+		return 0;
 
-		if (c->page == page) {
-			/* Fastpath: local CPU free */
-			set_freepointer(s, object, c->freelist);
-			c->freelist = object;
-		} else {
-			c->tid = next_tid(c->tid);
-			local_irq_enable();
-			/* Slowpath: overhead locked cmpxchg_double_slab */
-			__slab_free(s, page, object, _RET_IP_);
-			local_irq_disable();
-			c = this_cpu_ptr(s->cpu_slab);
+	/* Start new detached freelist */
+	set_freepointer(s, object, NULL);
+	df->page = virt_to_head_page(object);
+	df->tail = object;
+	df->freelist = object;
+	p[size] = NULL; /* mark object processed */
+	df->cnt = 1;
+
+	while (size) {
+		object = p[--size];
+		if (!object)
+			continue; /* Skip processed objects */
+
+		/* df->page is always set at this point */
+		if (df->page == virt_to_head_page(object)) {
+			/* Opportunity build freelist */
+			set_freepointer(s, object, df->freelist);
+			df->freelist = object;
+			df->cnt++;
+			p[size] = NULL; /* mark object processed */
+
+			continue;
 		}
+
+		/* Limit look ahead search */
+		if (!--lookahead)
+			break;
+
+		if (!first_skipped_index)
+			first_skipped_index = size + 1;
 	}
-exit:
-	c->tid = next_tid(c->tid);
-	local_irq_enable();
+
+	return first_skipped_index;
+}
+
+
+/* Note that interrupts must be enabled when calling this function. */
+void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+{
+	if (WARN_ON(!size))
+		return;
+
+	do {
+		struct detached_freelist df;
+		struct kmem_cache *s;
+
+		/* Support for memcg */
+		s = cache_from_obj(orig_s, p[size - 1]);
+
+		size = build_detached_freelist(s, size, p, &df);
+		if (unlikely(!df.page))
+			continue;
+
+		slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+	} while (likely(size));
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
 
 /* Note that interrupts must be enabled when calling this function. */
-bool kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
-			   void **p)
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
+			  void **p)
 {
 	struct kmem_cache_cpu *c;
 	int i;
 
+	/* memcg and kmem_cache debug support */
+	s = slab_pre_alloc_hook(s, flags);
+	if (unlikely(!s))
+		return false;
 	/*
 	 * Drain objects in the per cpu slab, while disabling local
 	 * IRQs, which protects against PREEMPT and interrupts
@@ -2804,36 +2931,20 @@
 		void *object = c->freelist;
 
 		if (unlikely(!object)) {
-			local_irq_enable();
 			/*
 			 * Invoking slow path likely have side-effect
 			 * of re-populating per CPU c->freelist
 			 */
-			p[i] = __slab_alloc(s, flags, NUMA_NO_NODE,
+			p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
 					    _RET_IP_, c);
-			if (unlikely(!p[i])) {
-				__kmem_cache_free_bulk(s, i, p);
-				return false;
-			}
-			local_irq_disable();
+			if (unlikely(!p[i]))
+				goto error;
+
 			c = this_cpu_ptr(s->cpu_slab);
 			continue; /* goto for-loop */
 		}
-
-		/* kmem_cache debug support */
-		s = slab_pre_alloc_hook(s, flags);
-		if (unlikely(!s)) {
-			__kmem_cache_free_bulk(s, i, p);
-			c->tid = next_tid(c->tid);
-			local_irq_enable();
-			return false;
-		}
-
 		c->freelist = get_freepointer(s, object);
 		p[i] = object;
-
-		/* kmem_cache debug support */
-		slab_post_alloc_hook(s, flags, object);
 	}
 	c->tid = next_tid(c->tid);
 	local_irq_enable();
@@ -2846,7 +2957,14 @@
 			memset(p[j], 0, s->object_size);
 	}
 
-	return true;
+	/* memcg and kmem_cache debug support */
+	slab_post_alloc_hook(s, flags, size, p);
+	return i;
+error:
+	local_irq_enable();
+	slab_post_alloc_hook(s, flags, i, p);
+	__kmem_cache_free_bulk(s, i, p);
+	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
 
@@ -3511,7 +3629,7 @@
 		__free_kmem_pages(page, compound_order(page));
 		return;
 	}
-	slab_free(page->slab_cache, page, object, _RET_IP_);
+	slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d045634..8e3c9c5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1443,7 +1443,6 @@
 		vmap_debug_free_range(va->va_start, va->va_end);
 		kasan_free_shadow(vm);
 		free_unmap_vmap_area(va);
-		vm->size -= PAGE_SIZE;
 
 		return vm;
 	}
@@ -1468,8 +1467,8 @@
 		return;
 	}
 
-	debug_check_no_locks_freed(addr, area->size);
-	debug_check_no_obj_freed(addr, area->size);
+	debug_check_no_locks_freed(addr, get_vm_area_size(area));
+	debug_check_no_obj_freed(addr, get_vm_area_size(area));
 
 	if (deallocate_pages) {
 		int i;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b275..e2ed698 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+	    !netif_is_macvlan_port(vlan_dev) &&
+	    !netif_is_bridge_port(vlan_dev)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1..70306cc 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -271,11 +271,11 @@
 		if (signal_pending(current) || !timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	__set_current_state(TASK_RUNNING);
@@ -441,7 +441,7 @@
 	if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c913538..ffed8a1 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3027,8 +3027,13 @@
 
 	BT_DBG("chan %p", chan);
 
+	/* No need to call l2cap_chan_hold() here since we already own
+	 * the reference taken in smp_new_conn_cb(). This is just the
+	 * first time that we tie it to a specific pointer. The code in
+	 * l2cap_core.c ensures that there's no risk this function wont
+	 * get called if smp_new_conn_cb was previously called.
+	 */
 	conn->smp = chan;
-	l2cap_chan_hold(chan);
 
 	if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
 		bredr_pairing(chan);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f7e8dee..5f3f645 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -48,7 +48,7 @@
 
 	p->state = state;
 	err = switchdev_port_attr_set(p->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
 				(unsigned int) p->port_no, p->dev->name);
 }
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index fa53d7a..5396ff08 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -50,7 +50,7 @@
 	p->config_pending = 0;
 
 	err = switchdev_port_attr_set(p->dev, &attr);
-	if (err)
+	if (err && err != -EOPNOTSUPP)
 		netdev_err(p->dev, "failed to set HW ageing time\n");
 }
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc85891..aa209b1 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@
 			!timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
@@ -331,7 +331,7 @@
 		if (sock_flag(sk, SOCK_DEAD))
 			break;
 
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	finish_wait(sk_sleep(sk), &wait);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088a..d62af69 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -785,7 +785,7 @@
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index ab9b8d0..ae00b89 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2403,17 +2403,20 @@
 {
 	static const netdev_features_t null_features = 0;
 	struct net_device *dev = skb->dev;
-	const char *driver = "";
+	const char *name = "";
 
 	if (!net_ratelimit())
 		return;
 
-	if (dev && dev->dev.parent)
-		driver = dev_driver_string(dev->dev.parent);
-
+	if (dev) {
+		if (dev->dev.parent)
+			name = dev_driver_string(dev->dev.parent);
+		else
+			name = netdev_name(dev);
+	}
 	WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
 	     "gso_type=%d ip_summed=%d\n",
-	     driver, dev ? &dev->features : &null_features,
+	     name, dev ? &dev->features : &null_features,
 	     skb->sk ? &skb->sk->sk_route_caps : &null_features,
 	     skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
 	     skb_shinfo(skb)->gso_type, skb->ip_summed);
@@ -6426,11 +6429,16 @@
 
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
+	else
+		err = 0;
 
 	if (unlikely(err < 0)) {
 		netdev_err(dev,
 			"set_features() failed (%d); wanted %pNF, left %pNF\n",
 			err, &features, &dev->features);
+		/* return non-0 since some features might have changed and
+		 * it's better to fire a spurious notification than miss it
+		 */
 		return -1;
 	}
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1aa8437..f18ae91 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -857,7 +857,7 @@
 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
 	/* keep skb alive even if arp_queue overflows */
 	if (skb)
-		skb = skb_copy(skb, GFP_ATOMIC);
+		skb = skb_clone(skb, GFP_ATOMIC);
 	write_unlock(&neigh->lock);
 	neigh->ops->solicit(neigh, skb);
 	atomic_inc(&neigh->probes);
@@ -2215,7 +2215,7 @@
 	ndm->ndm_pad2    = 0;
 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
 	ndm->ndm_type	 = RTN_UNICAST;
-	ndm->ndm_ifindex = pn->dev->ifindex;
+	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
 	ndm->ndm_state	 = NUD_NONE;
 
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@
 		if (h > s_h)
 			s_idx = 0;
 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
-			if (dev_net(n->dev) != net)
+			if (pneigh_net(n) != net)
 				continue;
 			if (idx < s_idx)
 				goto next;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47..2e4df84 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,7 +56,7 @@
 	kfree(css_cls_state(css));
 }
 
-static int update_classid(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned n)
 {
 	int err;
 	struct socket *sock = sock_from_file(file, &err);
@@ -67,18 +67,25 @@
 	return 0;
 }
 
+static void update_classid(struct cgroup_subsys_state *css, void *v)
+{
+	struct css_task_iter it;
+	struct task_struct *p;
+
+	css_task_iter_start(css, &it);
+	while ((p = css_task_iter_next(&it))) {
+		task_lock(p);
+		iterate_fd(p->files, 0, update_classid_sock, v);
+		task_unlock(p);
+	}
+	css_task_iter_end(&it);
+}
+
 static void cgrp_attach(struct cgroup_subsys_state *css,
 			struct cgroup_taskset *tset)
 {
-	struct cgroup_cls_state *cs = css_cls_state(css);
-	void *v = (void *)(unsigned long)cs->classid;
-	struct task_struct *p;
-
-	cgroup_taskset_for_each(p, tset) {
-		task_lock(p);
-		iterate_fd(p->files, 0, update_classid, v);
-		task_unlock(p);
-	}
+	update_classid(css,
+		       (void *)(unsigned long)css_cls_state(css)->classid);
 }
 
 static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -89,8 +96,11 @@
 static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 			 u64 value)
 {
-	css_cls_state(css)->classid = (u32) value;
+	struct cgroup_cls_state *cs = css_cls_state(css);
 
+	cs->classid = (u32)value;
+
+	update_classid(css, (void *)(unsigned long)cs->classid);
 	return 0;
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 504bd17..34ba7a0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1045,15 +1045,156 @@
 	return 0;
 }
 
+static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
+					      struct net_device *dev)
+{
+	const struct rtnl_link_stats64 *stats;
+	struct rtnl_link_stats64 temp;
+	struct nlattr *attr;
+
+	stats = dev_get_stats(dev, &temp);
+
+	attr = nla_reserve(skb, IFLA_STATS,
+			   sizeof(struct rtnl_link_stats));
+	if (!attr)
+		return -EMSGSIZE;
+
+	copy_rtnl_link_stats(nla_data(attr), stats);
+
+	attr = nla_reserve(skb, IFLA_STATS64,
+			   sizeof(struct rtnl_link_stats64));
+	if (!attr)
+		return -EMSGSIZE;
+
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
+	return 0;
+}
+
+static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
+					       struct net_device *dev,
+					       int vfs_num,
+					       struct nlattr *vfinfo)
+{
+	struct ifla_vf_rss_query_en vf_rss_query_en;
+	struct ifla_vf_link_state vf_linkstate;
+	struct ifla_vf_spoofchk vf_spoofchk;
+	struct ifla_vf_tx_rate vf_tx_rate;
+	struct ifla_vf_stats vf_stats;
+	struct ifla_vf_trust vf_trust;
+	struct ifla_vf_vlan vf_vlan;
+	struct ifla_vf_rate vf_rate;
+	struct nlattr *vf, *vfstats;
+	struct ifla_vf_mac vf_mac;
+	struct ifla_vf_info ivi;
+
+	/* Not all SR-IOV capable drivers support the
+	 * spoofcheck and "RSS query enable" query.  Preset to
+	 * -1 so the user space tool can detect that the driver
+	 * didn't report anything.
+	 */
+	ivi.spoofchk = -1;
+	ivi.rss_query_en = -1;
+	ivi.trusted = -1;
+	memset(ivi.mac, 0, sizeof(ivi.mac));
+	/* The default value for VF link state is "auto"
+	 * IFLA_VF_LINK_STATE_AUTO which equals zero
+	 */
+	ivi.linkstate = 0;
+	if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
+		return 0;
+
+	vf_mac.vf =
+		vf_vlan.vf =
+		vf_rate.vf =
+		vf_tx_rate.vf =
+		vf_spoofchk.vf =
+		vf_linkstate.vf =
+		vf_rss_query_en.vf =
+		vf_trust.vf = ivi.vf;
+
+	memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+	vf_vlan.vlan = ivi.vlan;
+	vf_vlan.qos = ivi.qos;
+	vf_tx_rate.rate = ivi.max_tx_rate;
+	vf_rate.min_tx_rate = ivi.min_tx_rate;
+	vf_rate.max_tx_rate = ivi.max_tx_rate;
+	vf_spoofchk.setting = ivi.spoofchk;
+	vf_linkstate.link_state = ivi.linkstate;
+	vf_rss_query_en.setting = ivi.rss_query_en;
+	vf_trust.setting = ivi.trusted;
+	vf = nla_nest_start(skb, IFLA_VF_INFO);
+	if (!vf) {
+		nla_nest_cancel(skb, vfinfo);
+		return -EMSGSIZE;
+	}
+	if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
+	    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+	    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+		    &vf_rate) ||
+	    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
+		    &vf_tx_rate) ||
+	    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
+		    &vf_spoofchk) ||
+	    nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
+		    &vf_linkstate) ||
+	    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+		    sizeof(vf_rss_query_en),
+		    &vf_rss_query_en) ||
+	    nla_put(skb, IFLA_VF_TRUST,
+		    sizeof(vf_trust), &vf_trust))
+		return -EMSGSIZE;
+	memset(&vf_stats, 0, sizeof(vf_stats));
+	if (dev->netdev_ops->ndo_get_vf_stats)
+		dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+						&vf_stats);
+	vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+	if (!vfstats) {
+		nla_nest_cancel(skb, vf);
+		nla_nest_cancel(skb, vfinfo);
+		return -EMSGSIZE;
+	}
+	if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+			vf_stats.rx_packets) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+			vf_stats.tx_packets) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+			vf_stats.rx_bytes) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+			vf_stats.tx_bytes) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+			vf_stats.broadcast) ||
+	    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+			vf_stats.multicast))
+		return -EMSGSIZE;
+	nla_nest_end(skb, vfstats);
+	nla_nest_end(skb, vf);
+	return 0;
+}
+
+static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
+{
+	struct rtnl_link_ifmap map = {
+		.mem_start   = dev->mem_start,
+		.mem_end     = dev->mem_end,
+		.base_addr   = dev->base_addr,
+		.irq         = dev->irq,
+		.dma         = dev->dma,
+		.port        = dev->if_port,
+	};
+	if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags, u32 ext_filter_mask)
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	struct rtnl_link_stats64 temp;
-	const struct rtnl_link_stats64 *stats;
-	struct nlattr *attr, *af_spec;
+	struct nlattr *af_spec;
 	struct rtnl_af_ops *af_ops;
 	struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
 
@@ -1096,18 +1237,8 @@
 	    nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
 		goto nla_put_failure;
 
-	if (1) {
-		struct rtnl_link_ifmap map = {
-			.mem_start   = dev->mem_start,
-			.mem_end     = dev->mem_end,
-			.base_addr   = dev->base_addr,
-			.irq         = dev->irq,
-			.dma         = dev->dma,
-			.port        = dev->if_port,
-		};
-		if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
-			goto nla_put_failure;
-	}
+	if (rtnl_fill_link_ifmap(skb, dev))
+		goto nla_put_failure;
 
 	if (dev->addr_len) {
 		if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) ||
@@ -1124,128 +1255,27 @@
 	if (rtnl_phys_switch_id_fill(skb, dev))
 		goto nla_put_failure;
 
-	attr = nla_reserve(skb, IFLA_STATS,
-			sizeof(struct rtnl_link_stats));
-	if (attr == NULL)
+	if (rtnl_fill_stats(skb, dev))
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev, &temp);
-	copy_rtnl_link_stats(nla_data(attr), stats);
-
-	attr = nla_reserve(skb, IFLA_STATS64,
-			sizeof(struct rtnl_link_stats64));
-	if (attr == NULL)
-		goto nla_put_failure;
-	copy_rtnl_link_stats64(nla_data(attr), stats);
-
 	if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
 	    nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
 		goto nla_put_failure;
 
-	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent
-	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
+	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
+	    ext_filter_mask & RTEXT_FILTER_VF) {
 		int i;
-
-		struct nlattr *vfinfo, *vf, *vfstats;
+		struct nlattr *vfinfo;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
 		if (!vfinfo)
 			goto nla_put_failure;
 		for (i = 0; i < num_vfs; i++) {
-			struct ifla_vf_info ivi;
-			struct ifla_vf_mac vf_mac;
-			struct ifla_vf_vlan vf_vlan;
-			struct ifla_vf_rate vf_rate;
-			struct ifla_vf_tx_rate vf_tx_rate;
-			struct ifla_vf_spoofchk vf_spoofchk;
-			struct ifla_vf_link_state vf_linkstate;
-			struct ifla_vf_rss_query_en vf_rss_query_en;
-			struct ifla_vf_stats vf_stats;
-			struct ifla_vf_trust vf_trust;
-
-			/*
-			 * Not all SR-IOV capable drivers support the
-			 * spoofcheck and "RSS query enable" query.  Preset to
-			 * -1 so the user space tool can detect that the driver
-			 * didn't report anything.
-			 */
-			ivi.spoofchk = -1;
-			ivi.rss_query_en = -1;
-			ivi.trusted = -1;
-			memset(ivi.mac, 0, sizeof(ivi.mac));
-			/* The default value for VF link state is "auto"
-			 * IFLA_VF_LINK_STATE_AUTO which equals zero
-			 */
-			ivi.linkstate = 0;
-			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
-				break;
-			vf_mac.vf =
-				vf_vlan.vf =
-				vf_rate.vf =
-				vf_tx_rate.vf =
-				vf_spoofchk.vf =
-				vf_linkstate.vf =
-				vf_rss_query_en.vf =
-				vf_trust.vf = ivi.vf;
-
-			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
-			vf_vlan.vlan = ivi.vlan;
-			vf_vlan.qos = ivi.qos;
-			vf_tx_rate.rate = ivi.max_tx_rate;
-			vf_rate.min_tx_rate = ivi.min_tx_rate;
-			vf_rate.max_tx_rate = ivi.max_tx_rate;
-			vf_spoofchk.setting = ivi.spoofchk;
-			vf_linkstate.link_state = ivi.linkstate;
-			vf_rss_query_en.setting = ivi.rss_query_en;
-			vf_trust.setting = ivi.trusted;
-			vf = nla_nest_start(skb, IFLA_VF_INFO);
-			if (!vf) {
-				nla_nest_cancel(skb, vfinfo);
+			if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
 				goto nla_put_failure;
-			}
-			if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
-			    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
-			    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
-				    &vf_rate) ||
-			    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
-				    &vf_tx_rate) ||
-			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
-				    &vf_spoofchk) ||
-			    nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
-				    &vf_linkstate) ||
-			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
-				    sizeof(vf_rss_query_en),
-				    &vf_rss_query_en) ||
-			    nla_put(skb, IFLA_VF_TRUST,
-				    sizeof(vf_trust), &vf_trust))
-				goto nla_put_failure;
-			memset(&vf_stats, 0, sizeof(vf_stats));
-			if (dev->netdev_ops->ndo_get_vf_stats)
-				dev->netdev_ops->ndo_get_vf_stats(dev, i,
-								  &vf_stats);
-			vfstats = nla_nest_start(skb, IFLA_VF_STATS);
-			if (!vfstats) {
-				nla_nest_cancel(skb, vf);
-				nla_nest_cancel(skb, vfinfo);
-				goto nla_put_failure;
-			}
-			if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
-					vf_stats.rx_packets) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
-					vf_stats.tx_packets) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
-					vf_stats.rx_bytes) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
-					vf_stats.tx_bytes) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
-					vf_stats.broadcast) ||
-			    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
-					vf_stats.multicast))
-				goto nla_put_failure;
-			nla_nest_end(skb, vfstats);
-			nla_nest_end(skb, vf);
 		}
+
 		nla_nest_end(skb, vfinfo);
 	}
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b..8a1741b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@
 			err = put_user(cmlen, &cm->cmsg_len);
 		if (!err) {
 			cmlen = CMSG_SPACE(i*sizeof(int));
+			if (msg->msg_controllen < cmlen)
+				cmlen = msg->msg_controllen;
 			msg->msg_control += cmlen;
 			msg->msg_controllen -= cmlen;
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index aa41e6d..152b9c7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4268,7 +4268,8 @@
 		return NULL;
 	}
 
-	memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+	memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len,
+		2 * ETH_ALEN);
 	skb->mac_header += VLAN_HLEN;
 	return skb;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54..e31dfce 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,7 +1530,6 @@
 		skb_queue_head_init(&newsk->sk_receive_queue);
 		skb_queue_head_init(&newsk->sk_write_queue);
 
-		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
 				af_callback_keys + newsk->sk_family,
@@ -1607,7 +1606,7 @@
 {
 	u32 max_segs = 1;
 
-	__sk_dst_set(sk, dst);
+	sk_dst_set(sk, dst);
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1814,7 @@
 {
 	DEFINE_WAIT(wait);
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 	for (;;) {
 		if (!timeo)
 			break;
@@ -1861,7 +1860,7 @@
 		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
 			break;
 
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		err = -EAGAIN;
 		if (!timeo)
@@ -2048,9 +2047,9 @@
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
@@ -2388,7 +2387,6 @@
 	} else
 		sk->sk_wq	=	NULL;
 
-	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a..b96f7a7 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@
 			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
-			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+			sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
 		rcu_read_unlock();
 	}
 }
@@ -126,7 +126,7 @@
 		current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
 
 	while (1) {
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
@@ -139,7 +139,7 @@
 		}
 		if (signal_pending(current))
 			goto do_interrupted;
-		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		if (sk_stream_memory_free(sk) && !vm_wait)
 			break;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc24..9c6d050 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@
 							 &ireq->ir_v6_loc_addr,
 							 &ireq->ir_v6_rmt_addr);
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+		rcu_read_lock();
+		err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+			       np->tclass);
+		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
 
@@ -387,6 +392,7 @@
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *newnp;
 	const struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
@@ -453,7 +459,7 @@
 	 * comment in that function for the gory details. -acme
 	 */
 
-	__ip6_dst_store(newsk, dst, NULL, NULL);
+	ip6_dst_store(newsk, dst, NULL, NULL);
 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
 						      NETIF_F_TSO);
 	newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@
 	 * Yes, keeping reference count would be much more clever, but we make
 	 * one more one thing there: reattach optmem to newsk.
 	 */
-	if (np->opt != NULL)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt != NULL)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
@@ -757,6 +765,7 @@
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -856,7 +865,8 @@
 	fl6.fl6_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@
 	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
-	__ip6_dst_store(sk, dst, NULL, NULL);
+	ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt != NULL)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
 
 	inet->inet_dport = usin->sin6_port;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a..41e6580 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@
 			if (sk_stream_is_writeable(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
-				set_bit(SOCK_ASYNC_NOSPACE,
-					&sk->sk_socket->flags);
+				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
 				/* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94..eebf5ac 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1747,9 +1747,9 @@
 		}
 
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		finish_wait(sk_sleep(sk), &wait);
 	}
 
@@ -2004,10 +2004,10 @@
 			}
 
 			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 			sk_wait_event(sk, &timeo,
 				      !dn_queue_too_long(scp, queue, flags));
-			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 			finish_wait(sk_sleep(sk), &wait);
 			continue;
 		}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6f..ecc28cf 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
  * Returns the size of the result on success, -ve error code otherwise.
  */
 int dns_query(const char *type, const char *name, size_t namelen,
-	      const char *options, char **_result, time_t *_expiry)
+	      const char *options, char **_result, time64_t *_expiry)
 {
 	struct key *rkey;
 	const struct user_key_payload *upayload;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788..c7d1adc 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@
 	return;
 
 out:
-	WARN_ON_ONCE("HSR: Could not send supervision frame\n");
+	WARN_ONCE(1, "HSR: Could not send supervision frame\n");
 	kfree_skb(skb);
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6baf36e..05e4cba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2126,7 +2126,7 @@
 	ASSERT_RTNL();
 
 	in_dev = ip_mc_find_dev(net, imr);
-	if (!in_dev) {
+	if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -2147,7 +2147,8 @@
 
 		*imlp = iml->next_rcu;
 
-		ip_mc_dec_group(in_dev, group);
+		if (in_dev)
+			ip_mc_dec_group(in_dev, group);
 
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1feb15f..46b9c88 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -563,7 +563,7 @@
 	int max_retries, thresh;
 	u8 defer_accept;
 
-	if (sk_listener->sk_state != TCP_LISTEN)
+	if (sk_state_load(sk_listener) != TCP_LISTEN)
 		goto drop;
 
 	max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
@@ -749,7 +749,7 @@
 	 * It is OK, because this socket enters to hash table only
 	 * after validation is complete.
 	 */
-	sk->sk_state = TCP_LISTEN;
+	sk_state_store(sk, TCP_LISTEN);
 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
 		inet->inet_sport = htons(inet->inet_num);
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b7..c3a3835 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@
 			      struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@
 static void ipmr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -441,10 +441,6 @@
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -540,10 +536,6 @@
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -1208,7 +1200,7 @@
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1217,8 +1209,9 @@
 	/* Shut down all active vif entries */
 
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif_table[i].flags & VIFF_STATIC))
-			vif_delete(mrt, i, 0, &list);
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+			continue;
+		vif_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1226,7 +1219,7 @@
 
 	for (i = 0; i < MFC_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			list_del_rcu(&c->list);
 			mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1254,7 @@
 						    NETCONFA_IFINDEX_ALL,
 						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 		}
 	}
 	rtnl_unlock();
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 657d230..b3ca21b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -45,7 +45,7 @@
 	struct net *net = nf_ct_net(ct);
 	const struct nf_conn *master = ct->master;
 	struct nf_conntrack_expect *other_exp;
-	struct nf_conntrack_tuple t;
+	struct nf_conntrack_tuple t = {};
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
 	struct nf_nat_range range;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8c0d0bd..63e5be0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -406,10 +406,12 @@
 			ip_select_ident(net, skb, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+		skb->transport_header += iphlen;
+		if (iph->protocol == IPPROTO_ICMP &&
+		    length >= iphlen + sizeof(struct icmphdr))
+			icmp_out_count(net, ((struct icmphdr *)
+				skb_transport_header(skb))->type);
 	}
-	if (iph->protocol == IPPROTO_ICMP)
-		icmp_out_count(net, ((struct icmphdr *)
-			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, rt->dst.dev,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cfa7c0..c82cca1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -451,11 +451,14 @@
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 	const struct tcp_sock *tp = tcp_sk(sk);
+	int state;
 
 	sock_rps_record_flow(sk);
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
-	if (sk->sk_state == TCP_LISTEN)
+
+	state = sk_state_load(sk);
+	if (state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
 	/* Socket is not locked. We are protected from async events
@@ -492,14 +495,14 @@
 	 * NOTE. Check for TCP_CLOSE is added. The goal is to prevent
 	 * blocking on fresh not-connected or disconnected socket. --ANK
 	 */
-	if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
+	if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
 		mask |= POLLHUP;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
 	/* Connected or passive Fast Open socket? */
-	if (sk->sk_state != TCP_SYN_SENT &&
-	    (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+	if (state != TCP_SYN_SENT &&
+	    (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
 		int target = sock_rcvlowat(sk, 0, INT_MAX);
 
 		if (tp->urg_seq == tp->copied_seq &&
@@ -507,9 +510,6 @@
 		    tp->urg_data)
 			target++;
 
-		/* Potential race condition. If read of tp below will
-		 * escape above sk->sk_state, we can be illegally awaken
-		 * in SYN_* states. */
 		if (tp->rcv_nxt - tp->copied_seq >= target)
 			mask |= POLLIN | POLLRDNORM;
 
@@ -517,8 +517,7 @@
 			if (sk_stream_is_writeable(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
-				set_bit(SOCK_ASYNC_NOSPACE,
-					&sk->sk_socket->flags);
+				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
 				/* Race breaker. If space is freed after
@@ -906,7 +905,7 @@
 			goto out_err;
 	}
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 	copied = 0;
@@ -1134,7 +1133,7 @@
 	}
 
 	/* This should be in poll */
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
@@ -1934,7 +1933,7 @@
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk->sk_state = state;
+	sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@ -2644,7 +2643,8 @@
 	if (sk->sk_type != SOCK_STREAM)
 		return;
 
-	info->tcpi_state = sk->sk_state;
+	info->tcpi_state = sk_state_load(sk);
+
 	info->tcpi_ca_state = icsk->icsk_ca_state;
 	info->tcpi_retransmits = icsk->icsk_retransmits;
 	info->tcpi_probes = icsk->icsk_probes_out;
@@ -2672,7 +2672,7 @@
 	info->tcpi_snd_mss = tp->mss_cache;
 	info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (info->tcpi_state == TCP_LISTEN) {
 		info->tcpi_unacked = sk->sk_ack_backlog;
 		info->tcpi_sacked = sk->sk_max_ack_backlog;
 	} else {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 479f349..b316040 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -21,7 +21,7 @@
 {
 	struct tcp_info *info = _info;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (sk_state_load(sk) == TCP_LISTEN) {
 		r->idiag_rqueue = sk->sk_ack_backlog;
 		r->idiag_wqueue = sk->sk_max_ack_backlog;
 	} else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3..2d656ee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4481,19 +4481,34 @@
 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_buff *skb;
+	int err = -ENOMEM;
+	int data_len = 0;
 	bool fragstolen;
 
 	if (size == 0)
 		return 0;
 
-	skb = alloc_skb(size, sk->sk_allocation);
+	if (size > PAGE_SIZE) {
+		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+		data_len = npages << PAGE_SHIFT;
+		size = data_len + (size & ~PAGE_MASK);
+	}
+	skb = alloc_skb_with_frags(size - data_len, data_len,
+				   PAGE_ALLOC_COSTLY_ORDER,
+				   &err, sk->sk_allocation);
 	if (!skb)
 		goto err;
 
+	skb_put(skb, size - data_len);
+	skb->data_len = data_len;
+	skb->len = size;
+
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+	if (err)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4524,8 @@
 err_free:
 	kfree_skb(skb);
 err:
-	return -ENOMEM;
+	return err;
+
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5683,7 @@
 		}
 
 		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->copied_seq = tp->rcv_nxt;
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 950e28c..db00343 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -921,7 +921,8 @@
 	}
 
 	md5sig = rcu_dereference_protected(tp->md5sig_info,
-					   sock_owned_by_user(sk));
+					   sock_owned_by_user(sk) ||
+					   lockdep_is_held(&sk->sk_lock.slock));
 	if (!md5sig) {
 		md5sig = kmalloc(sizeof(*md5sig), gfp);
 		if (!md5sig)
@@ -2158,6 +2159,7 @@
 	__u16 destp = ntohs(inet->inet_dport);
 	__u16 srcp = ntohs(inet->inet_sport);
 	int rx_queue;
+	int state;
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
 	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
@@ -2175,17 +2177,18 @@
 		timer_expires = jiffies;
 	}
 
-	if (sk->sk_state == TCP_LISTEN)
+	state = sk_state_load(sk);
+	if (state == TCP_LISTEN)
 		rx_queue = sk->sk_ack_backlog;
 	else
-		/*
-		 * because we dont lock socket, we might find a transient negative value
+		/* Because we don't lock the socket,
+		 * we might find a transient negative value.
 		 */
 		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
 			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
-		i, src, srcp, dest, destp, sk->sk_state,
+		i, src, srcp, dest, destp, state,
 		tp->write_seq - tp->snd_una,
 		rx_queue,
 		timer_active,
@@ -2199,8 +2202,8 @@
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		tp->snd_cwnd,
-		sk->sk_state == TCP_LISTEN ?
-		    (fastopenq ? fastopenq->max_qlen : 0) :
+		state == TCP_LISTEN ?
+		    fastopenq->max_qlen :
 		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
 }
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a..193ba1f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -168,7 +168,7 @@
 			dst_negative_advice(sk);
 			if (tp->syn_fastopen || tp->syn_data)
 				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
-			if (tp->syn_data)
+			if (tp->syn_data && icsk->icsk_retransmits == 1)
 				NET_INC_STATS_BH(sock_net(sk),
 						 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		}
@@ -176,6 +176,18 @@
 		syn_set = true;
 	} else {
 		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+			/* Some middle-boxes may black-hole Fast Open _after_
+			 * the handshake. Therefore we conservatively disable
+			 * Fast Open on this path on recurring timeouts with
+			 * few or zero bytes acked after Fast Open.
+			 */
+			if (tp->syn_data_acked &&
+			    tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+				if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+					NET_INC_STATS_BH(sock_net(sk),
+							 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+			}
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9..0c7b0e6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
 #include <linux/slab.h>
 #include <net/tcp_states.h>
 #include <linux/skbuff.h>
-#include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/net_namespace.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d84742f..61f2685 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3642,7 +3642,7 @@
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
 	rtnl_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66b..8ec0df7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@
 
 	/* Free tx options */
 
-	opt = xchg(&np->opt, NULL);
-	if (opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
+	opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+	if (opt) {
+		atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+		txopt_put(opt);
+	}
 }
 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
 
@@ -659,7 +661,10 @@
 		fl6.fl6_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		rcu_read_lock();
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+					 &final);
+		rcu_read_unlock();
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 		if (IS_ERR(dst)) {
@@ -668,7 +673,7 @@
 			return PTR_ERR(dst);
 		}
 
-		__ip6_dst_store(sk, dst, NULL, NULL);
+		ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b023..517c55b 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	opt = flowlabel ? flowlabel->opt : np->opt;
+	rcu_read_lock();
+	opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
 	final_p = fl6_update_dst(&fl6, opt, &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0..ea7c4d6 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@
 			*((char **)&opt2->dst1opt) += dif;
 		if (opt2->srcrt)
 			*((char **)&opt2->srcrt) += dif;
+		atomic_set(&opt2->refcnt, 1);
 	}
 	return opt2;
 }
@@ -790,7 +791,7 @@
 		return ERR_PTR(-ENOBUFS);
 
 	memset(opt2, 0, tot_len);
-
+	atomic_set(&opt2->refcnt, 1);
 	opt2->tot_len = tot_len;
 	p = (char *)(opt2 + 1);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98..0a37ddc 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 }
 
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
 static int __net_init icmpv6_sk_init(struct net *net)
 {
 	struct sock *sk;
@@ -860,15 +855,6 @@
 
 		net->ipv6.icmp_sk[i] = sk;
 
-		/*
-		 * Split off their lock-class, because sk->sk_dst_lock
-		 * gets used from softirqs, which is safe for
-		 * __icmpv6_sk (because those never get directly used
-		 * via userspace syscalls), but unsafe for normal sockets.
-		 */
-		lockdep_set_class(&sk->sk_dst_lock,
-				  &icmpv6_socket_sk_dst_lock_key);
-
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7ce..a7ca2cd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = proto;
 	fl6->daddr = ireq->ir_v6_rmt_addr;
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@
 EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
 
 static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
-			   const struct in6_addr *daddr,
-			   const struct in6_addr *saddr)
-{
-	__ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
 struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 {
 	return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@
 	fl6->fl6_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 	if (!dst) {
 		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 
 		if (!IS_ERR(dst))
-			__inet6_csk_dst_store(sk, dst, NULL, NULL);
+			ip6_dst_store(sk, dst, NULL, NULL);
 	}
 	return dst;
 }
@@ -175,7 +171,8 @@
 	/* Restore final destination back after routing done */
 	fl6.daddr = sk->sk_v6_daddr;
 
-	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+	res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+		       np->tclass);
 	rcu_read_unlock();
 	return res;
 }
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb..137fca4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@
 	int i;
 
 	for_each_possible_cpu(i)
-		ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+		ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136..a10e771 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@
 			      int cmd);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@
 static void ip6mr_free_table(struct mr6_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -765,10 +765,6 @@
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -1542,7 +1538,7 @@
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1552,8 +1548,9 @@
 	 *	Shut down all active vif entries
 	 */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(mrt, i, &list);
+		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+			continue;
+		mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1562,7 +1559,7 @@
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			write_lock_bh(&mrt_lock);
 			list_del(&c->list);
@@ -1625,7 +1622,7 @@
 						     net->ipv6.devconf_all);
 			write_unlock_bh(&mrt_lock);
 
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 			err = 0;
 			break;
 		}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956..4449ad1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@
 			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		}
 	}
-	opt = xchg(&inet6_sk(sk)->opt, opt);
+	opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+		   opt);
 	sk_dst_reset(sk);
 
 	return opt;
@@ -231,9 +232,12 @@
 				sk->sk_socket->ops = &inet_dgram_ops;
 				sk->sk_family = PF_INET;
 			}
-			opt = xchg(&np->opt, NULL);
-			if (opt)
-				sock_kfree_s(sk, opt, opt->tot_len);
+			opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+				   NULL);
+			if (opt) {
+				atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+				txopt_put(opt);
+			}
 			pktopt = xchg(&np->pktoptions, NULL);
 			kfree_skb(pktopt);
 
@@ -403,7 +407,8 @@
 		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
 			break;
 
-		opt = ipv6_renew_options(sk, np->opt, optname,
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		opt = ipv6_renew_options(sk, opt, optname,
 					 (struct ipv6_opt_hdr __user *)optval,
 					 optlen);
 		if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 sticky_done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 
@@ -486,6 +493,7 @@
 			break;
 
 		memset(opt, 0, sizeof(*opt));
+		atomic_set(&opt->refcnt, 1);
 		opt->tot_len = sizeof(*opt) + optlen;
 		retv = -EFAULT;
 		if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 	case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@
 	case IPV6_RTHDR:
 	case IPV6_DSTOPTS:
 	{
+		struct ipv6_txoptions *opt;
 
 		lock_sock(sk);
-		len = ipv6_getsockopt_sticky(sk, np->opt,
-					     optname, optval, len);
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
 		release_sock(sk);
 		/* check if ipv6_getsockopt_sticky() returns err code */
 		if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a..5ee56d0 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
 	} else {
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 	}
@@ -2015,7 +2014,6 @@
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855..d6161e1 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@
 }
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb)
+		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
 	struct sk_buff *skb;
 	struct in6_addr addr_buf;
@@ -593,9 +592,6 @@
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
 				       dev->dev_addr);
 
-	if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
-		skb_dst_copy(skb, oskb);
-
 	ndisc_send_skb(skb, daddr, saddr);
 }
 
@@ -682,12 +678,12 @@
 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
 				  __func__, target);
 		}
-		ndisc_send_ns(dev, target, target, saddr, skb);
+		ndisc_send_ns(dev, target, target, saddr);
 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
+		ndisc_send_ns(dev, target, &mcaddr, saddr);
 	}
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb8..bab4441 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@
 /* Creation primitives. */
 static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 					 u32 user, struct in6_addr *src,
-					 struct in6_addr *dst, u8 ecn)
+					 struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -200,6 +200,7 @@
 	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	local_bh_disable();
@@ -601,7 +602,7 @@
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec1..9914098 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@
 
 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions opt_space;
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@
 		if (!(opt->opt_nflen|opt->opt_flen))
 			opt = NULL;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+		}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	return err < 0 ? err : len;
 do_confirm:
 	dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a0..45f5ae5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@
 	return	fq->id == arg->id &&
 		fq->user == arg->user &&
 		ipv6_addr_equal(&fq->saddr, arg->src) &&
-		ipv6_addr_equal(&fq->daddr, arg->dst);
+		ipv6_addr_equal(&fq->daddr, arg->dst) &&
+		(arg->iif == fq->iif ||
+		 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+					       IPV6_ADDR_LINKLOCAL)));
 }
 EXPORT_SYMBOL(ip6_frag_match);
 
@@ -180,7 +183,7 @@
 
 static struct frag_queue *
 fq_find(struct net *net, __be32 id, const struct in6_addr *src,
-	const struct in6_addr *dst, u8 ecn)
+	const struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -190,6 +193,7 @@
 	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@
 	}
 
 	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq) {
 		int ret;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c8bc9b4..826e6aa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -404,6 +404,14 @@
 	}
 }
 
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+	if (rt->rt6i_flags & RTF_EXPIRES)
+		return time_after(jiffies, rt->dst.expires);
+	else
+		return false;
+}
+
 static bool rt6_check_expired(const struct rt6_info *rt)
 {
 	if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -515,7 +523,7 @@
 		container_of(w, struct __rt6_probe_work, work);
 
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
-	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
+	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 	dev_put(work->dev);
 	kfree(work);
 }
@@ -1252,7 +1260,8 @@
 
 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
-	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+	if (!__rt6_check_expired(rt) &&
+	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
 		return &rt->dst;
 	else
@@ -1272,7 +1281,8 @@
 
 	rt6_dst_from_metrics_check(rt);
 
-	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);
@@ -1322,6 +1332,12 @@
 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 }
 
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+	return !(rt->rt6i_flags & RTF_CACHE) &&
+		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 				 const struct ipv6hdr *iph, u32 mtu)
 {
@@ -1335,7 +1351,7 @@
 	if (mtu >= dst_mtu(dst))
 		return;
 
-	if (rt6->rt6i_flags & RTF_CACHE) {
+	if (!rt6_cache_allowed_for_pmtu(rt6)) {
 		rt6_do_update_pmtu(rt6, mtu);
 	} else {
 		const struct in6_addr *daddr, *saddr;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa..eaf7ac4 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5baa8e7..e7aab56 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -235,7 +236,8 @@
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
@@ -255,7 +257,7 @@
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(sk, dst, NULL, NULL);
+	ip6_dst_store(sk, dst, NULL, NULL);
 
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +265,9 @@
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen +
+					 opt->opt_nflen;
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -461,7 +463,8 @@
 		if (np->repflow && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+			       np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -972,6 +975,7 @@
 	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp;
 	const struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
@@ -1056,7 +1060,7 @@
 	 */
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(newsk, dst, NULL, NULL);
+	ip6_dst_store(newsk, dst, NULL, NULL);
 	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1102,15 @@
 	   but we make one more one thing there: reattach optmem
 	   to newsk.
 	 */
-	if (np->opt)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	tcp_ca_openreq_child(newsk, dst);
 
@@ -1690,6 +1696,8 @@
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+	int rx_queue;
+	int state;
 
 	dest  = &sp->sk_v6_daddr;
 	src   = &sp->sk_v6_rcv_saddr;
@@ -1710,6 +1718,15 @@
 		timer_expires = jiffies;
 	}
 
+	state = sk_state_load(sp);
+	if (state == TCP_LISTEN)
+		rx_queue = sp->sk_ack_backlog;
+	else
+		/* Because we don't lock the socket,
+		 * we might find a transient negative value.
+		 */
+		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
@@ -1718,9 +1735,9 @@
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
-		   sp->sk_state,
-		   tp->write_seq-tp->snd_una,
-		   (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
+		   state,
+		   tp->write_seq - tp->snd_una,
+		   rx_queue,
 		   timer_active,
 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
 		   icsk->icsk_retransmits,
@@ -1732,7 +1749,7 @@
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd,
-		   sp->sk_state == TCP_LISTEN ?
+		   state == TCP_LISTEN ?
 			fastopenq->max_qlen :
 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49..9da3287 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -1263,8 +1264,10 @@
 			opt = NULL;
 		connected = 0;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@
 out:
 	dst_release(dst);
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	if (!err)
 		return len;
 	/*
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752..435608c 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@
 	if (sock_writeable(sk) && iucv_below_msglim(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8..a2c8747 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@
 	DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@
 			opt = NULL;
 	}
 
-	if (opt == NULL)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 
 	return err < 0 ? err : len;
 
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index a758eb84..ff75718 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -500,7 +500,7 @@
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, start_seq_num,
-				     local->hw.max_tx_aggregation_subframes,
+				     IEEE80211_MAX_AMPDU_BUF,
 				     tid_tx->timeout);
 }
 
@@ -926,6 +926,7 @@
 	amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+	buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c2bd1b6..da471ee 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3454,8 +3454,12 @@
 			goto out_unlock;
 		}
 	} else {
-		/* for cookie below */
-		ack_skb = skb;
+		/* Assign a dummy non-zero cookie, it's not sent to
+		 * userspace in this case but we rely on its value
+		 * internally in the need_offchan case to distinguish
+		 * mgmt-tx from remain-on-channel.
+		 */
+		*cookie = 0xffffffff;
 	}
 
 	if (!need_offchan) {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d0dc1bf..c9e325d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -76,7 +76,8 @@
 void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
 			      bool update_bss)
 {
-	if (__ieee80211_recalc_txpower(sdata) || update_bss)
+	if (__ieee80211_recalc_txpower(sdata) ||
+	    (update_bss && ieee80211_sdata_running(sdata)))
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
 }
 
@@ -1861,6 +1862,7 @@
 		unregister_netdevice(sdata->dev);
 	} else {
 		cfg80211_unregister_wdev(&sdata->wdev);
+		ieee80211_teardown_sdata(sdata);
 		kfree(sdata);
 	}
 }
@@ -1870,7 +1872,6 @@
 	if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
 		return;
 	ieee80211_do_stop(sdata, true);
-	ieee80211_teardown_sdata(sdata);
 }
 
 void ieee80211_remove_interfaces(struct ieee80211_local *local)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 858f6b1..175ffcf 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -541,8 +541,7 @@
 			   NL80211_FEATURE_HT_IBSS |
 			   NL80211_FEATURE_VIF_TXPOWER |
 			   NL80211_FEATURE_MAC_ON_CREATE |
-			   NL80211_FEATURE_USERSPACE_MPM |
-			   NL80211_FEATURE_FULL_AP_CLIENT_STATE;
+			   NL80211_FEATURE_USERSPACE_MPM;
 
 	if (!ops->hw_scan)
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index b890e22..b3b44a5 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -779,10 +779,8 @@
 static void mesh_path_node_reclaim(struct rcu_head *rp)
 {
 	struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
-	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
 
 	del_timer_sync(&node->mpath->timer);
-	atomic_dec(&sdata->u.mesh.mpaths);
 	kfree(node->mpath);
 	kfree(node);
 }
@@ -790,8 +788,9 @@
 /* needs to be called with the corresponding hashwlock taken */
 static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
 {
-	struct mesh_path *mpath;
-	mpath = node->mpath;
+	struct mesh_path *mpath = node->mpath;
+	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
+
 	spin_lock(&mpath->state_lock);
 	mpath->flags |= MESH_PATH_RESOLVING;
 	if (mpath->is_gate)
@@ -799,6 +798,7 @@
 	hlist_del_rcu(&node->list);
 	call_rcu(&node->rcu, mesh_path_node_reclaim);
 	spin_unlock(&mpath->state_lock);
+	atomic_dec(&sdata->u.mesh.mpaths);
 	atomic_dec(&tbl->entries);
 }
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4aeca4b..a413e52 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -597,8 +597,8 @@
 		/* We need to ensure power level is at max for scanning. */
 		ieee80211_hw_config(local, 0);
 
-		if ((req->channels[0]->flags &
-		     IEEE80211_CHAN_NO_IR) ||
+		if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
+						IEEE80211_CHAN_RADAR)) ||
 		    !req->n_ssids) {
 			next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
 		} else {
@@ -645,7 +645,7 @@
 	 * TODO: channel switching also consumes quite some time,
 	 * add that delay as well to get a better estimation
 	 */
-	if (chan->flags & IEEE80211_CHAN_NO_IR)
+	if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))
 		return IEEE80211_PASSIVE_CHANNEL_TIME;
 	return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
 }
@@ -777,7 +777,8 @@
 	 *
 	 * In any case, it is not necessary for a passive scan.
 	 */
-	if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) {
+	if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
+	    !scan_req->n_ssids) {
 		*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
 		local->next_scan_state = SCAN_DECISION;
 		return;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349e..4692782 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -869,7 +869,7 @@
 	depends on IPV6 || IPV6=n
 	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DUP_IPV4
-	select NF_DUP_IPV6 if IP6_NF_IPTABLES
+	select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
@@ -882,7 +882,7 @@
 	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
 	depends on IP_NF_MANGLE
 	select NF_DEFRAG_IPV4
-	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
 	help
 	  This option adds a `TPROXY' target, which is somewhat similar to
 	  REDIRECT.  It can only be used in the mangle table and is useful
@@ -1375,7 +1375,7 @@
 	depends on IPV6 || IPV6=n
 	depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
 	select NF_DEFRAG_IPV4
-	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+	select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
 	help
 	  This option adds a `socket' match, which can be used to match
 	  packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759..b0bc475 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
 #define mtype_gc		IPSET_TOKEN(MTYPE, _gc)
 #define mtype			MTYPE
 
-#define get_ext(set, map, id)	((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id)	((map)->extensions + ((set)->dsize * (id)))
 
 static void
 mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@
 		del_timer_sync(&map->gc);
 
 	ip_set_free(map->members);
-	if (set->dsize) {
-		if (set->extensions & IPSET_EXT_DESTROY)
-			mtype_ext_cleanup(set);
-		ip_set_free(map->extensions);
-	}
-	kfree(map);
+	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+		mtype_ext_cleanup(set);
+	ip_set_free(map);
 
 	set->data = NULL;
 }
@@ -92,16 +89,14 @@
 {
 	const struct mtype *map = set->data;
 	struct nlattr *nested;
+	size_t memsize = sizeof(*map) + map->memsize;
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 	if (!nested)
 		goto nla_put_failure;
 	if (mtype_do_head(skb, map) ||
 	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
-	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-			  htonl(sizeof(*map) +
-				map->memsize +
-				set->dsize * map->elements)))
+	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
 		goto nla_put_failure;
 	if (unlikely(ip_set_put_flags(skb, set)))
 		goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a5643..4783eff 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@
 /* Type structure */
 struct bitmap_ip {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
@@ -49,6 +48,8 @@
 	size_t memsize;		/* members size */
 	u8 netmask;		/* subnet netmask */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -224,13 +225,6 @@
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -316,13 +310,13 @@
 	pr_debug("hosts %u, elements %llu\n",
 		 hosts, (unsigned long long)elements);
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ip;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_ip(set, map, first_ip, last_ip,
 			 elements, hosts, netmask)) {
 		kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535..29dde20 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@
 /* Type structure */
 struct bitmap_ipmac {
 	void *members;		/* the set members */
-	void *extensions;	/* MAC + data extensions */
 	u32 first_ip;		/* host byte order, included in range */
 	u32 last_ip;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collector */
+	unsigned char extensions[0]	/* MAC + data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
 struct bitmap_ipmac_adt_elem {
+	unsigned char ether[ETH_ALEN] __aligned(2);
 	u16 id;
-	unsigned char *ether;
+	u16 add_mac;
 };
 
 struct bitmap_ipmac_elem {
 	unsigned char ether[ETH_ALEN];
 	unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
 
 static inline u32
 ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@
 	return ip - m->first_ip;
 }
 
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
-	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize)		\
+	(struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize)	\
+	(const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
 
 /* Common functions */
 
@@ -88,10 +90,9 @@
 
 	if (!test_bit(e->id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, e->id, dsize);
-	if (elem->filled == MAC_FILLED)
-		return !e->ether ||
-		       ether_addr_equal(e->ether, elem->ether);
+	elem = get_const_elem(map->extensions, e->id, dsize);
+	if (e->add_mac && elem->filled == MAC_FILLED)
+		return ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
 }
@@ -103,7 +104,7 @@
 
 	if (!test_bit(id, map->members))
 		return 0;
-	elem = get_elem(map->extensions, id, dsize);
+	elem = get_const_elem(map->extensions, id, dsize);
 	/* Timer not started for the incomplete elements */
 	return elem->filled == MAC_FILLED;
 }
@@ -133,7 +134,7 @@
 		 * and we can reuse it later when MAC is filled out,
 		 * possibly by the kernel
 		 */
-		if (e->ether)
+		if (e->add_mac)
 			ip_set_timeout_set(timeout, t);
 		else
 			*timeout = t;
@@ -150,7 +151,7 @@
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (test_bit(e->id, map->members)) {
 		if (elem->filled == MAC_FILLED) {
-			if (e->ether &&
+			if (e->add_mac &&
 			    (flags & IPSET_FLAG_EXIST) &&
 			    !ether_addr_equal(e->ether, elem->ether)) {
 				/* memcpy isn't atomic */
@@ -159,7 +160,7 @@
 				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
-		} else if (!e->ether)
+		} else if (!e->add_mac)
 			/* Already added without ethernet address */
 			return IPSET_ADD_FAILED;
 		/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
-	} else if (e->ether) {
+	} else if (e->add_mac) {
 		/* We can store MAC too */
 		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@
 		     u32 id, size_t dsize)
 {
 	const struct bitmap_ipmac_elem *elem =
-		get_elem(map->extensions, id, dsize);
+		get_const_elem(map->extensions, id, dsize);
 
 	return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
 			       htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@
 {
 	struct bitmap_ipmac *map = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
-	struct bitmap_ipmac_adt_elem e = { .id = 0 };
+	struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
 	u32 ip;
 
@@ -231,7 +232,7 @@
 		return -EINVAL;
 
 	e.id = ip_to_id(map, ip);
-	e.ether = eth_hdr(skb)->h_source;
+	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
 
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
 }
@@ -265,11 +266,10 @@
 		return -IPSET_ERR_BITMAP_RANGE;
 
 	e.id = ip_to_id(map, ip);
-	if (tb[IPSET_ATTR_ETHER])
-		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
-	else
-		e.ether = NULL;
-
+	if (tb[IPSET_ATTR_ETHER]) {
+		memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+		e.add_mac = 1;
+	}
 	ret = adtfn(set, &e, &ext, &ext, flags);
 
 	return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_ip = first_ip;
 	map->last_ip = last_ip;
 	map->elements = elements;
@@ -361,14 +354,15 @@
 	if (elements > IPSET_BITMAP_MAX_RANGE + 1)
 		return -IPSET_ERR_BITMAP_RANGE_SIZE;
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	set->dsize = ip_set_elem_len(set, tb,
+				     sizeof(struct bitmap_ipmac_elem),
+				     __alignof__(struct bitmap_ipmac_elem));
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
 	map->memsize = bitmap_bytes(0, elements - 1);
 	set->variant = &bitmap_ipmac;
-	set->dsize = ip_set_elem_len(set, tb,
-				     sizeof(struct bitmap_ipmac_elem));
 	if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd..7f0c733 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@
 /* Type structure */
 struct bitmap_port {
 	void *members;		/* the set members */
-	void *extensions;	/* data extensions */
 	u16 first_port;		/* host byte order, included in range */
 	u16 last_port;		/* host byte order, included in range */
 	u32 elements;		/* number of max elements in the set */
 	size_t memsize;		/* members size */
 	struct timer_list gc;	/* garbage collection */
+	unsigned char extensions[0]	/* data extensions */
+		__aligned(__alignof__(u64));
 };
 
 /* ADT structure for generic function args */
@@ -209,13 +210,6 @@
 	map->members = ip_set_alloc(map->memsize);
 	if (!map->members)
 		return false;
-	if (set->dsize) {
-		map->extensions = ip_set_alloc(set->dsize * map->elements);
-		if (!map->extensions) {
-			kfree(map->members);
-			return false;
-		}
-	}
 	map->first_port = first_port;
 	map->last_port = last_port;
 	set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@
 {
 	struct bitmap_port *map;
 	u16 first_port, last_port;
+	u32 elements;
 
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
 		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@
 		last_port = tmp;
 	}
 
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	elements = last_port - first_port + 1;
+	set->dsize = ip_set_elem_len(set, tb, 0, 0);
+	map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
 	if (!map)
 		return -ENOMEM;
 
-	map->elements = last_port - first_port + 1;
+	map->elements = elements;
 	map->memsize = bitmap_bytes(0, map->elements);
 	set->variant = &bitmap_port;
-	set->dsize = ip_set_elem_len(set, tb, 0);
 	if (!init_map_port(set, map, first_port, last_port)) {
 		kfree(map);
 		return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c26..54f3d7c 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@
 }
 
 size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+		size_t align)
 {
 	enum ip_set_ext_id id;
-	size_t offset = len;
 	u32 cadt_flags = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS])
 		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
 	if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
 		set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+	if (!align)
+		align = 1;
 	for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
 		if (!add_extension(id, cadt_flags, tb))
 			continue;
-		offset = ALIGN(offset, ip_set_extensions[id].align);
-		set->offset[id] = offset;
+		len = ALIGN(len, ip_set_extensions[id].align);
+		set->offset[id] = len;
 		set->extensions |= ip_set_extensions[id].type;
-		offset += ip_set_extensions[id].len;
+		len += ip_set_extensions[id].len;
 	}
-	return offset;
+	return ALIGN(len, align);
 }
 EXPORT_SYMBOL_GPL(ip_set_elem_len);
 
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54f..e5336ab 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@
 	DECLARE_BITMAP(used, AHASH_MAX_TUNED);
 	u8 size;		/* size of the array */
 	u8 pos;			/* position of the first free entry */
-	unsigned char value[0];	/* the array of the values */
-} __attribute__ ((aligned));
+	unsigned char value[0]	/* the array of the values */
+		__aligned(__alignof__(u64));
+};
 
 /* The hash table: the table size stored here in order to make resizing easy */
 struct htable {
@@ -475,7 +476,7 @@
 mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 {
 	struct htable *t;
-	struct hbucket *n;
+	struct hbucket *n, *tmp;
 	struct mtype_elem *data;
 	u32 i, j, d;
 #ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@
 			}
 		}
 		if (d >= AHASH_INIT_SIZE) {
-			struct hbucket *tmp = kzalloc(sizeof(*tmp) +
-					(n->size - AHASH_INIT_SIZE) * dsize,
-					GFP_ATOMIC);
+			if (d >= n->size) {
+				rcu_assign_pointer(hbucket(t, i), NULL);
+				kfree_rcu(n, rcu);
+				continue;
+			}
+			tmp = kzalloc(sizeof(*tmp) +
+				      (n->size - AHASH_INIT_SIZE) * dsize,
+				      GFP_ATOMIC);
 			if (!tmp)
 				/* Still try to delete expired elements */
 				continue;
@@ -522,7 +528,7 @@
 					continue;
 				data = ahash_data(n, j, dsize);
 				memcpy(tmp->value + d * dsize, data, dsize);
-				set_bit(j, tmp->used);
+				set_bit(d, tmp->used);
 				d++;
 			}
 			tmp->pos = d;
@@ -1323,12 +1329,14 @@
 #endif
 		set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
 #ifndef IP_SET_PROTO_UNDEF
 	} else {
 		set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
 		set->dsize = ip_set_elem_len(set, tb,
-				sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+			sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+			__alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
 	}
 #endif
 	if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6..bbede95 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@
 	struct rcu_head rcu;
 	struct list_head list;
 	ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
 
 struct set_adt_elem {
 	ip_set_id_t id;
@@ -618,7 +618,8 @@
 		size = IP_SET_LIST_MIN_SIZE;
 
 	set->variant = &set_variant;
-	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+	set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+				     __alignof__(struct set_elem));
 	if (!init_list_set(net, set, size))
 		return -ENOMEM;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff..f57b4dc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1176,6 +1176,7 @@
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
+	struct sock *sk;
 
 	EnterFunction(11);
 
@@ -1183,13 +1184,12 @@
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
+	sk = skb_to_full_sk(skb);
 	/* Bad... Do not break raw sockets */
-	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
 		     af == AF_INET)) {
-		struct sock *sk = skb->sk;
-		struct inet_sock *inet = inet_sk(skb->sk);
 
-		if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
 			return NF_ACCEPT;
 	}
 
@@ -1681,6 +1681,7 @@
 	struct ip_vs_conn *cp;
 	int ret, pkts;
 	int conn_reuse_mode;
+	struct sock *sk;
 
 	/* Already marked as IPVS request or reply? */
 	if (skb->ipvs_property)
@@ -1708,12 +1709,11 @@
 	ip_vs_fill_iph_skb(af, skb, false, &iph);
 
 	/* Bad... Do not break raw sockets */
-	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+	sk = skb_to_full_sk(skb);
+	if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
 		     af == AF_INET)) {
-		struct sock *sk = skb->sk;
-		struct inet_sock *inet = inet_sk(skb->sk);
 
-		if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+		if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
 			return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48f..740cce4 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@
 	struct net *net = sock_net(ctnl);
 	struct nfnl_log_net *log = nfnl_log_pernet(net);
 	int ret = 0;
-	u16 flags;
+	u16 flags = 0;
 
 	if (nfula[NFULA_CFG_CMD]) {
 		u_int8_t pf = nfmsg->nfgen_family;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4..c7808fc 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@
 	local_bh_enable();
 }
 
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+			      struct nft_counter *total)
 {
-	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
-	struct nft_counter_percpu *cpu_stats;
-	struct nft_counter total;
+	const struct nft_counter_percpu *cpu_stats;
 	u64 bytes, packets;
 	unsigned int seq;
 	int cpu;
 
-	memset(&total, 0, sizeof(total));
+	memset(total, 0, sizeof(*total));
 	for_each_possible_cpu(cpu) {
-		cpu_stats = per_cpu_ptr(priv->counter, cpu);
+		cpu_stats = per_cpu_ptr(counter, cpu);
 		do {
 			seq	= u64_stats_fetch_begin_irq(&cpu_stats->syncp);
 			bytes	= cpu_stats->counter.bytes;
 			packets	= cpu_stats->counter.packets;
 		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
 
-		total.packets += packets;
-		total.bytes += bytes;
+		total->packets += packets;
+		total->bytes += bytes;
 	}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+	struct nft_counter total;
+
+	nft_counter_fetch(priv->counter, &total);
 
 	if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
 	    nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@
 	free_percpu(priv->counter);
 }
 
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+	struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+	struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+	struct nft_counter_percpu __percpu *cpu_stats;
+	struct nft_counter_percpu *this_cpu;
+	struct nft_counter total;
+
+	nft_counter_fetch(priv->counter, &total);
+
+	cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+					      GFP_ATOMIC);
+	if (cpu_stats == NULL)
+		return ENOMEM;
+
+	preempt_disable();
+	this_cpu = this_cpu_ptr(cpu_stats);
+	this_cpu->counter.packets = total.packets;
+	this_cpu->counter.bytes = total.bytes;
+	preempt_enable();
+
+	priv_clone->counter = cpu_stats;
+	return 0;
+}
+
 static struct nft_expr_type nft_counter_type;
 static const struct nft_expr_ops nft_counter_ops = {
 	.type		= &nft_counter_type,
@@ -126,6 +158,7 @@
 	.init		= nft_counter_init,
 	.destroy	= nft_counter_destroy,
 	.dump		= nft_counter_dump,
+	.clone		= nft_counter_clone,
 };
 
 static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef..9dec3bd 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@
 	}
 
 	ext = nft_set_elem_ext(set, elem);
-	if (priv->expr != NULL)
-		nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+	if (priv->expr != NULL &&
+	    nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+		return NULL;
 
 	return elem;
 }
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da..ecf0a01 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@
 	if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	pr_debug("mask 0x%x\n", mask);
 
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6..653d073 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@
 			struct hlist_node *n;
 
 			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
-				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+				if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
 					continue;
 
 				if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index efb736b..e41cd12 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -117,7 +117,6 @@
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.get_options	= geneve_get_options,
 	.send		= dev_queue_xmit,
-	.owner          = THIS_MODULE,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d7..7f8897f 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@
 	.create		= gre_create,
 	.send		= dev_queue_xmit,
 	.destroy	= ovs_netdev_tunnel_destroy,
-	.owner		= THIS_MODULE,
 };
 
 static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368..6b0190b 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@
 	if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
 		ovs_netdev_detach_dev(vport);
 
-	/* Early release so we can unregister the device */
+	/* We can be invoked by both explicit vport deletion and
+	 * underlying netdev deregistration; delete the link only
+	 * if it's not already shutting down.
+	 */
+	if (vport->dev->reg_state == NETREG_REGISTERED)
+		rtnl_delete_link(vport->dev);
 	dev_put(vport->dev);
-	rtnl_delete_link(vport->dev);
 	vport->dev = NULL;
 	rtnl_unlock();
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd0..31cbc8c 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@
 	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
 }
 
-int ovs_vport_ops_register(struct vport_ops *ops)
+int __ovs_vport_ops_register(struct vport_ops *ops)
 {
 	int err = -EEXIST;
 	struct vport_ops *o;
@@ -87,7 +87,7 @@
 	ovs_unlock();
 	return err;
 }
-EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
 
 void ovs_vport_ops_unregister(struct vport_ops *ops)
 {
@@ -256,8 +256,8 @@
  *
  * @vport: vport to delete.
  *
- * Detaches @vport from its datapath and destroys it.  It is possible to fail
- * for reasons such as lack of memory.  ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it.  ovs_mutex must
+ * be held.
  */
 void ovs_vport_del(struct vport *vport)
 {
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a..8ea3a96 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -196,7 +196,13 @@
 	return vport->dev->name;
 }
 
-int ovs_vport_ops_register(struct vport_ops *ops);
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops)		\
+	({					\
+		(ops)->owner = THIS_MODULE;	\
+		__ovs_vport_ops_register(ops);	\
+	})
+
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
 static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index af399ca..992396a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@
 		kfree_rcu(po->rollover, rcu);
 }
 
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+					  struct sk_buff *skb)
+{
+	/* Earlier code assumed this would be a VLAN pkt, double-check
+	 * this now that we have the actual packet in hand. We can only
+	 * do this check on Ethernet devices.
+	 */
+	if (unlikely(dev->type != ARPHRD_ETHER))
+		return false;
+
+	skb_reset_mac_header(skb);
+	return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
 static const struct proto_ops packet_ops;
 
 static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@
 		goto retry;
 	}
 
-	if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_unlock;
-		}
+	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_unlock;
 	}
 
 	skb->protocol = proto;
@@ -2323,8 +2329,8 @@
 static bool ll_header_truncated(const struct net_device *dev, int len)
 {
 	/* net device doesn't like empty head */
-	if (unlikely(len <= dev->hard_header_len)) {
-		net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
+	if (unlikely(len < dev->hard_header_len)) {
+		net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
 				     current->comm, len, dev->hard_header_len);
 		return true;
 	}
@@ -2332,6 +2338,15 @@
 	return false;
 }
 
+static void tpacket_set_protocol(const struct net_device *dev,
+				 struct sk_buff *skb)
+{
+	if (dev->type == ARPHRD_ETHER) {
+		skb_reset_mac_header(skb);
+		skb->protocol = eth_hdr(skb)->h_proto;
+	}
+}
+
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, int size_max,
 		__be16 proto, unsigned char *addr, int hlen)
@@ -2368,8 +2383,6 @@
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, 0);
 	if (unlikely(po->tp_tx_has_off)) {
 		int off_min, off_max, off;
 		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2415,6 +2428,8 @@
 				dev->hard_header_len);
 		if (unlikely(err))
 			return err;
+		if (!skb->protocol)
+			tpacket_set_protocol(dev, skb);
 
 		data += dev->hard_header_len;
 		to_write -= dev->hard_header_len;
@@ -2449,6 +2464,8 @@
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
+	skb_probe_transport_header(skb, 0);
+
 	return tp_len;
 }
 
@@ -2493,12 +2510,13 @@
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	reserve = dev->hard_header_len + VLAN_HLEN;
+	if (po->sk.sk_socket->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
-	if (size_max > dev->mtu + reserve)
-		size_max = dev->mtu + reserve;
+	if (size_max > dev->mtu + reserve + VLAN_HLEN)
+		size_max = dev->mtu + reserve + VLAN_HLEN;
 
 	do {
 		ph = packet_current_frame(po, &po->tx_ring,
@@ -2525,18 +2543,10 @@
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
-		    tp_len > dev->mtu + dev->hard_header_len) {
-			struct ethhdr *ehdr;
-			/* Earlier code assumed this would be a VLAN pkt,
-			 * double-check this now that we have the actual
-			 * packet in hand.
-			 */
+		    tp_len > dev->mtu + reserve &&
+		    !packet_extra_vlan_len_allowed(dev, skb))
+			tp_len = -EMSGSIZE;
 
-			skb_reset_mac_header(skb);
-			ehdr = eth_hdr(skb);
-			if (ehdr->h_proto != htons(ETH_P_8021Q))
-				tp_len = -EMSGSIZE;
-		}
 		if (unlikely(tp_len < 0)) {
 			if (po->tp_loss) {
 				__packet_set_status(po, ph,
@@ -2765,18 +2775,10 @@
 
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
-	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_free;
-		}
+	if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_free;
 	}
 
 	skb->protocol = proto;
@@ -2807,8 +2809,8 @@
 		len += vnet_hdr_len;
 	}
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, reserve);
+	skb_probe_transport_header(skb, reserve);
+
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 
@@ -4107,7 +4109,7 @@
 		err = -EINVAL;
 		if (unlikely((int)req->tp_block_size <= 0))
 			goto out;
-		if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
+		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
 		if (po->tp_version >= TPACKET_V3 &&
 		    (int)(req->tp_block_size -
@@ -4119,8 +4121,8 @@
 		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
 			goto out;
 
-		rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
-		if (unlikely(rb->frames_per_block <= 0))
+		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
+		if (unlikely(rb->frames_per_block == 0))
 			goto out;
 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
 					req->tp_frame_nr))
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d456403..e3b118c 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@
 		}
 	}
 
-	if (trans == NULL) {
-		kmem_cache_free(rds_conn_slab, conn);
-		conn = ERR_PTR(-ENODEV);
-		goto out;
-	}
-
 	conn->c_trans = trans;
 
 	ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c..c9cdb35 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@
 		release_sock(sk);
 	}
 
-	/* racing with another thread binding seems ok here */
+	lock_sock(sk);
 	if (daddr == 0 || rs->rs_bound_addr == 0) {
+		release_sock(sk);
 		ret = -ENOTCONN; /* XXX not a great errno */
 		goto out;
 	}
+	release_sock(sk);
 
 	if (payload_len > rds_sk_sndbuf(rs)) {
 		ret = -EMSGSIZE;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f5..adc555e 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@
 
 			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
 			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
-			    hard > tx)
+			    hard > tx) {
+				call->acks_hard = tx;
 				goto all_acked;
+			}
 
 			smp_rmb();
 			rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3af..14c4e12 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
 	/* this should be in poll */
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f3..7ec667d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@
 }
 
 /* We know handle. Find qdisc among all qdisc's attached to device
-   (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
  */
 
 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry(q, &root->list, list) {
+	list_for_each_entry_rcu(q, &root->list, list) {
 		if (q->handle == handle)
 			return q;
 	}
@@ -277,15 +278,18 @@
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
-		list_add_tail(&q->list, &root->list);
+		ASSERT_RTNL();
+		list_add_tail_rcu(&q->list, &root->list);
 	}
 }
 EXPORT_SYMBOL(qdisc_list_add);
 
 void qdisc_list_del(struct Qdisc *q)
 {
-	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
-		list_del(&q->list);
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		ASSERT_RTNL();
+		list_del_rcu(&q->list);
+	}
 }
 EXPORT_SYMBOL(qdisc_list_del);
 
@@ -750,14 +754,18 @@
 	if (n == 0)
 		return;
 	drops = max_t(int, n, 0);
+	rcu_read_lock();
 	while ((parentid = sch->parent)) {
 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
-			return;
+			break;
 
+		if (sch->flags & TCQ_F_NOPARENT)
+			break;
+		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
-			WARN_ON(parentid != TC_H_ROOT);
-			return;
+			WARN_ON_ONCE(parentid != TC_H_ROOT);
+			break;
 		}
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
@@ -768,6 +776,7 @@
 		sch->q.qlen -= n;
 		__qdisc_qstats_drop(sch, drops);
 	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
@@ -941,7 +950,7 @@
 		}
 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
-			sch->flags |= TCQ_F_ONETXQUEUE;
+			sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad..e82a1ad 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -737,7 +737,7 @@
 		return;
 	}
 	if (!netif_is_multiqueue(dev))
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaec..3e82f04 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@
 		if (qdisc == NULL)
 			goto err;
 		priv->qdiscs[ntx] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@
 
 	*old = dev_graft_qdisc(dev_queue, new);
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
 	return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a74..ad70ecf 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@
 			goto err;
 		}
 		priv->qdiscs[i] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@
 	*old = dev_graft_qdisc(dev_queue, new);
 
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d..1543e39 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@
 	if (!has_sha1)
 		return -EINVAL;
 
-	memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
-		hmacs->shmac_num_idents * sizeof(__u16));
+	for (i = 0; i < hmacs->shmac_num_idents; i++)
+		ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
 	ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
 				hmacs->shmac_num_idents * sizeof(__u16));
 	return 0;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27..acb45b8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 *fl6 = &transport->fl.u.ip6;
+	int res;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
 		 skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@
 
 	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+	rcu_read_lock();
+	res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+	rcu_read_unlock();
+	return res;
 }
 
 /* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@
 		pr_debug("src=%pI6 - ", &fl6->saddr);
 	}
 
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
+
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 	if (!asoc || saddr)
 		goto out;
@@ -321,7 +328,7 @@
 	if (baddr) {
 		fl6->saddr = baddr->v6.sin6_addr;
 		fl6->fl6_sport = baddr->v6.sin6_port;
-		final_p = fl6_update_dst(fl6, np->opt, &final);
+		final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
 		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 	}
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c..03c8256 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,7 +972,7 @@
 		return -EFAULT;
 
 	/* Alloc space for the address array in kernel memory.  */
-	kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+	kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
 	if (unlikely(!kaddrs))
 		return -ENOMEM;
 
@@ -4928,7 +4928,7 @@
 	to = optval + offsetof(struct sctp_getaddrs, addrs);
 	space_left = len - offsetof(struct sctp_getaddrs, addrs);
 
-	addrs = kmalloc(space_left, GFP_KERNEL);
+	addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
 	if (!addrs)
 		return -ENOMEM;
 
@@ -6458,7 +6458,7 @@
 	if (sctp_writeable(sk)) {
 		mask |= POLLOUT | POLLWRNORM;
 	} else {
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		/*
 		 * Since the socket is not locked, the buffer
 		 * might be made available after the writeable check and
@@ -6801,26 +6801,30 @@
 static void __sctp_write_space(struct sctp_association *asoc)
 {
 	struct sock *sk = asoc->base.sk;
-	struct socket *sock = sk->sk_socket;
 
-	if ((sctp_wspace(asoc) > 0) && sock) {
-		if (waitqueue_active(&asoc->wait))
-			wake_up_interruptible(&asoc->wait);
+	if (sctp_wspace(asoc) <= 0)
+		return;
 
-		if (sctp_writeable(sk)) {
-			wait_queue_head_t *wq = sk_sleep(sk);
+	if (waitqueue_active(&asoc->wait))
+		wake_up_interruptible(&asoc->wait);
 
-			if (wq && waitqueue_active(wq))
-				wake_up_interruptible(wq);
+	if (sctp_writeable(sk)) {
+		struct socket_wq *wq;
+
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq) {
+			if (waitqueue_active(&wq->wait))
+				wake_up_interruptible(&wq->wait);
 
 			/* Note that we try to include the Async I/O support
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
 			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
-				sock_wake_async(sock,
-						SOCK_WAKE_SPACE, POLL_OUT);
+				sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
 		}
+		rcu_read_unlock();
 	}
 }
 
@@ -7375,6 +7379,13 @@
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+	sctp_destroy_sock(sk);
+	inet6_destroy_sock(sk);
+}
+
 struct proto sctpv6_prot = {
 	.name		= "SCTPv6",
 	.owner		= THIS_MODULE,
@@ -7384,7 +7395,7 @@
 	.accept		= sctp_accept,
 	.ioctl		= sctp_ioctl,
 	.init		= sctp_init_sock,
-	.destroy	= sctp_destroy_sock,
+	.destroy	= sctp_v6_destroy_sock,
 	.shutdown	= sctp_shutdown,
 	.setsockopt	= sctp_setsockopt,
 	.getsockopt	= sctp_getsockopt,
diff --git a/net/socket.c b/net/socket.c
index dd2c247..456fadb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1056,27 +1056,20 @@
 	return 0;
 }
 
-/* This function may be called only under socket lock or callback_lock or rcu_lock */
+/* This function may be called only under rcu_lock */
 
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket_wq *wq, int how, int band)
 {
-	struct socket_wq *wq;
+	if (!wq || !wq->fasync_list)
+		return -1;
 
-	if (!sock)
-		return -1;
-	rcu_read_lock();
-	wq = rcu_dereference(sock->wq);
-	if (!wq || !wq->fasync_list) {
-		rcu_read_unlock();
-		return -1;
-	}
 	switch (how) {
 	case SOCK_WAKE_WAITD:
-		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+		if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
 			break;
 		goto call_kill;
 	case SOCK_WAKE_SPACE:
-		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+		if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
 			break;
 		/* fall through */
 	case SOCK_WAKE_IO:
@@ -1086,7 +1079,7 @@
 	case SOCK_WAKE_URG:
 		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
-	rcu_read_unlock();
+
 	return 0;
 }
 EXPORT_SYMBOL(sock_wake_async);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 229956b..95f82d8 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -353,12 +353,20 @@
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct svc_serv *bc_serv = xprt->bc_serv;
+	struct xdr_buf *rq_rcv_buf = &req->rq_rcv_buf;
 
 	spin_lock(&xprt->bc_pa_lock);
 	list_del(&req->rq_bc_pa_list);
 	xprt_dec_alloc_count(xprt, 1);
 	spin_unlock(&xprt->bc_pa_lock);
 
+	if (copied <= rq_rcv_buf->head[0].iov_len) {
+		rq_rcv_buf->head[0].iov_len = copied;
+		rq_rcv_buf->page_len = 0;
+	} else {
+		rq_rcv_buf->page_len = copied - rq_rcv_buf->head[0].iov_len;
+	}
+
 	req->rq_private_buf.len = copied;
 	set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bc5b7b5..7fccf96 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1363,6 +1363,7 @@
 	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
 	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
 	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+	rqstp->rq_arg.len = req->rq_private_buf.len;
 
 	/* reset result send buffer "put" position */
 	resv->iov_len = 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1d1a704..2ffaf6a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@
 	if (unlikely(!sock))
 		return -ENOTSOCK;
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+	clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
 	if (base != 0) {
 		addr = NULL;
 		addrlen = 0;
@@ -442,7 +442,7 @@
 	struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
 
 	transport->inet->sk_write_pending--;
-	clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+	clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 }
 
 /**
@@ -467,7 +467,7 @@
 
 	/* Don't race with disconnect */
 	if (xprt_connected(xprt)) {
-		if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+		if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
 			/*
 			 * Notify TCP that we're limited by the application
 			 * window size
@@ -478,7 +478,7 @@
 			xprt_wait_for_buffer_space(task, xs_nospace_callback);
 		}
 	} else {
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 		ret = -ENOTCONN;
 	}
 
@@ -626,7 +626,7 @@
 	case -EPERM:
 		/* When the server has died, an ICMP port unreachable message
 		 * prompts ECONNREFUSED. */
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 	}
 
 	return status;
@@ -715,7 +715,7 @@
 	case -EADDRINUSE:
 	case -ENOBUFS:
 	case -EPIPE:
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 	}
 
 	return status;
@@ -1618,7 +1618,7 @@
 
 	if (unlikely(!(xprt = xprt_from_sock(sk))))
 		return;
-	if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+	if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
 		return;
 
 	xprt_write_space(xprt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbd..91aea07 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -191,6 +191,7 @@
 
 	snd_l->ackers++;
 	rcv_l->acked = snd_l->snd_nxt - 1;
+	snd_l->state = LINK_ESTABLISHED;
 	tipc_link_build_bc_init_msg(uc_l, xmitq);
 }
 
@@ -206,6 +207,7 @@
 	rcv_l->state = LINK_RESET;
 	if (!snd_l->ackers) {
 		tipc_link_reset(snd_l);
+		snd_l->state = LINK_RESET;
 		__skb_queue_purge(xmitq);
 	}
 }
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbab..b53246f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
+static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
+	sk->sk_destruct = tipc_sock_destruct;
 	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
 	tsk->sent_unacked = 0;
 	atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@
 		tipc_node_remove_conn(net, dnode, tsk->portid);
 	}
 
-	/* Discard any remaining (connection-based) messages in receive queue */
-	__skb_queue_purge(&sk->sk_receive_queue);
-
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
 	release_sock(sk);
@@ -1515,6 +1514,11 @@
 	rcu_read_unlock();
 }
 
+static void tipc_sock_destruct(struct sock *sk)
+{
+	__skb_queue_purge(&sk->sk_receive_queue);
+}
+
 /**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsk: TIPC socket
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719a..70c0327 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -158,8 +158,11 @@
 	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
 	struct rtable *rt;
 
-	if (skb_headroom(skb) < UDP_MIN_HEADROOM)
-		pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+		if (err)
+			goto tx_error;
+	}
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
 	ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aaa0b58..45aebd9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@
 	return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+				      void *key)
+{
+	struct unix_sock *u;
+	wait_queue_head_t *u_sleep;
+
+	u = container_of(q, struct unix_sock, peer_wake);
+
+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+			    q);
+	u->peer_wake.private = NULL;
+
+	/* relaying can only happen while the wq still exists */
+	u_sleep = sk_sleep(&u->sk);
+	if (u_sleep)
+		wake_up_interruptible_poll(u_sleep, key);
+
+	return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (!u->peer_wake.private) {
+		u->peer_wake.private = other;
+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+					    struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (u->peer_wake.private == other) {
+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+		u->peer_wake.private = NULL;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+						   struct sock *other)
+{
+	unix_dgram_peer_wake_disconnect(sk, other);
+	wake_up_interruptible_poll(sk_sleep(sk),
+				   POLLOUT |
+				   POLLWRNORM |
+				   POLLWRBAND);
+}
+
+/* preconditions:
+ *	- unix_peer(sk) == other
+ *	- association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+	int connected;
+
+	connected = unix_dgram_peer_wake_connect(sk, other);
+
+	if (unix_recvq_full(other))
+		return 1;
+
+	if (connected)
+		unix_dgram_peer_wake_disconnect(sk, other);
+
+	return 0;
+}
+
 static int unix_writable(const struct sock *sk)
 {
 	return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@
 			skpair->sk_state_change(skpair);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 		}
+
+		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
 	}
@@ -441,6 +555,7 @@
 		if (state == TCP_LISTEN)
 			unix_release_sock(skb->sk, 1);
 		/* passed fds are erased in the kfree_skb hook	      */
+		UNIXCB(skb).consumed = skb->len;
 		kfree_skb(skb);
 	}
 
@@ -665,6 +780,7 @@
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -1032,6 +1148,8 @@
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk) = other;
+		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
@@ -1433,6 +1551,14 @@
 	return err;
 }
 
+static bool unix_passcred_enabled(const struct socket *sock,
+				  const struct sock *other)
+{
+	return test_bit(SOCK_PASSCRED, &sock->flags) ||
+	       !other->sk_socket ||
+	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
 /*
  * Some apps rely on write() giving SCM_CREDENTIALS
  * We include credentials if source or destination socket
@@ -1443,14 +1569,41 @@
 {
 	if (UNIXCB(skb).pid)
 		return;
-	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-	    !other->sk_socket ||
-	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+	if (unix_passcred_enabled(sock, other)) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
 	}
 }
 
+static int maybe_init_creds(struct scm_cookie *scm,
+			    struct socket *socket,
+			    const struct sock *other)
+{
+	int err;
+	struct msghdr msg = { .msg_controllen = 0 };
+
+	err = scm_send(socket, &msg, scm, false);
+	if (err)
+		return err;
+
+	if (unix_passcred_enabled(socket, other)) {
+		scm->pid = get_pid(task_tgid(current));
+		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+	}
+	return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+			    struct scm_cookie *scm)
+{
+	const struct unix_skb_parms *u = &UNIXCB(skb);
+
+	return u->pid == scm->pid &&
+	       uid_eq(u->uid, scm->creds.uid) &&
+	       gid_eq(u->gid, scm->creds.gid) &&
+	       unix_secdata_eq(scm, skb);
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1471,6 +1624,7 @@
 	struct scm_cookie scm;
 	int max_level;
 	int data_len = 0;
+	int sk_locked;
 
 	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
@@ -1549,12 +1703,14 @@
 		goto out_free;
 	}
 
+	sk_locked = 0;
 	unix_state_lock(other);
+restart_locked:
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
 		goto out_unlock;
 
-	if (sock_flag(other, SOCK_DEAD)) {
+	if (unlikely(sock_flag(other, SOCK_DEAD))) {
 		/*
 		 *	Check with 1003.1g - what should
 		 *	datagram error
@@ -1562,10 +1718,14 @@
 		unix_state_unlock(other);
 		sock_put(other);
 
+		if (!sk_locked)
+			unix_state_lock(sk);
+
 		err = 0;
-		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
+			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1591,21 +1751,38 @@
 			goto out_unlock;
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
-		if (!timeo) {
+	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+		if (timeo) {
+			timeo = unix_wait_for_peer(other, timeo);
+
+			err = sock_intr_errno(timeo);
+			if (signal_pending(current))
+				goto out_free;
+
+			goto restart;
+		}
+
+		if (!sk_locked) {
+			unix_state_unlock(other);
+			unix_state_double_lock(sk, other);
+		}
+
+		if (unix_peer(sk) != other ||
+		    unix_dgram_peer_wake_me(sk, other)) {
 			err = -EAGAIN;
+			sk_locked = 1;
 			goto out_unlock;
 		}
 
-		timeo = unix_wait_for_peer(other, timeo);
-
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			goto out_free;
-
-		goto restart;
+		if (!sk_locked) {
+			sk_locked = 1;
+			goto restart_locked;
+		}
 	}
 
+	if (unlikely(sk_locked))
+		unix_state_unlock(sk);
+
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
@@ -1619,6 +1796,8 @@
 	return len;
 
 out_unlock:
+	if (sk_locked)
+		unix_state_unlock(sk);
 	unix_state_unlock(other);
 out_free:
 	kfree_skb(skb);
@@ -1740,8 +1919,10 @@
 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 				    int offset, size_t size, int flags)
 {
-	int err = 0;
-	bool send_sigpipe = true;
+	int err;
+	bool send_sigpipe = false;
+	bool init_scm = true;
+	struct scm_cookie scm;
 	struct sock *other, *sk = socket->sk;
 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
 
@@ -1759,7 +1940,7 @@
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
-			return err;
+			goto err;
 	}
 
 	/* we must acquire readlock as we modify already present
@@ -1768,12 +1949,12 @@
 	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
-		send_sigpipe = false;
 		goto err;
 	}
 
 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_unlock;
 	}
 
@@ -1782,23 +1963,34 @@
 	if (sock_flag(other, SOCK_DEAD) ||
 	    other->sk_shutdown & RCV_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_state_unlock;
 	}
 
+	if (init_scm) {
+		err = maybe_init_creds(&scm, socket, other);
+		if (err)
+			goto err_state_unlock;
+		init_scm = false;
+	}
+
 	skb = skb_peek_tail(&other->sk_receive_queue);
 	if (tail && tail == skb) {
 		skb = newskb;
-	} else if (!skb) {
-		if (newskb)
+	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+		if (newskb) {
 			skb = newskb;
-		else
+		} else {
+			tail = skb;
 			goto alloc_skb;
+		}
 	} else if (newskb) {
 		/* this is fast path, we don't necessarily need to
 		 * call to kfree_skb even though with newskb == NULL
 		 * this - does no harm
 		 */
 		consume_skb(newskb);
+		newskb = NULL;
 	}
 
 	if (skb_append_pagefrags(skb, page, offset, size)) {
@@ -1811,14 +2003,20 @@
 	skb->truesize += size;
 	atomic_add(size, &sk->sk_wmem_alloc);
 
-	if (newskb)
+	if (newskb) {
+		err = unix_scm_to_skb(&scm, skb, false);
+		if (err)
+			goto err_state_unlock;
+		spin_lock(&other->sk_receive_queue.lock);
 		__skb_queue_tail(&other->sk_receive_queue, newskb);
+		spin_unlock(&other->sk_receive_queue.lock);
+	}
 
 	unix_state_unlock(other);
 	mutex_unlock(&unix_sk(other)->readlock);
 
 	other->sk_data_ready(other);
-
+	scm_destroy(&scm);
 	return size;
 
 err_state_unlock:
@@ -1829,6 +2027,8 @@
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
 		send_sig(SIGPIPE, current, 0);
+	if (!init_scm)
+		scm_destroy(&scm);
 	return err;
 }
 
@@ -1991,7 +2191,7 @@
 		    !timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		unix_state_unlock(sk);
 		timeo = freezable_schedule_timeout(timeo);
 		unix_state_lock(sk);
@@ -1999,7 +2199,7 @@
 		if (sock_flag(sk, SOCK_DEAD))
 			break;
 
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	finish_wait(sk_sleep(sk), &wait);
@@ -2072,6 +2272,7 @@
 
 	do {
 		int chunk;
+		bool drop_skb;
 		struct sk_buff *skb, *last;
 
 		unix_state_lock(sk);
@@ -2131,10 +2332,7 @@
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if ((UNIXCB(skb).pid  != scm.pid) ||
-			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
-			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
-			    !unix_secdata_eq(&scm, skb))
+			if (!unix_skb_scm_eq(skb, &scm))
 				break;
 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */
@@ -2152,7 +2350,11 @@
 		}
 
 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+		skb_get(skb);
 		chunk = state->recv_actor(skb, skip, chunk, state);
+		drop_skb = !unix_skb_len(skb);
+		/* skb is only safe to use if !drop_skb */
+		consume_skb(skb);
 		if (chunk < 0) {
 			if (copied == 0)
 				copied = -EFAULT;
@@ -2161,6 +2363,18 @@
 		copied += chunk;
 		size -= chunk;
 
+		if (drop_skb) {
+			/* the skb was touched by a concurrent reader;
+			 * we should not expect anything from this skb
+			 * anymore and assume it invalid - we can be
+			 * sure it was dropped from the socket queue
+			 *
+			 * let's report a short read
+			 */
+			err = 0;
+			break;
+		}
+
 		/* Mark read part of skb as used */
 		if (!(flags & MSG_PEEK)) {
 			UNIXCB(skb).consumed += chunk;
@@ -2454,20 +2668,22 @@
 		return mask;
 
 	writable = unix_writable(sk);
-	other = unix_peer_get(sk);
-	if (other) {
-		if (unix_peer(other) != sk) {
-			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-			if (unix_recvq_full(other))
-				writable = 0;
-		}
-		sock_put(other);
+	if (writable) {
+		unix_state_lock(sk);
+
+		other = unix_peer(sk);
+		if (other && unix_peer(other) != sk &&
+		    unix_recvq_full(other) &&
+		    unix_dgram_peer_wake_me(sk, other))
+			writable = 0;
+
+		unix_state_unlock(sk);
 	}
 
 	if (writable)
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 79b4596..edd638b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -67,10 +67,13 @@
 # point this to your LLVM backend with bpf support
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
 
+# asm/sysreg.h inline assmbly used by it is incompatible with llvm.
+# But, ehere is not easy way to fix it, so just exclude it since it is
+# useless for BPF samples.
 $(obj)/%.o: $(src)/%.c
 	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
 	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+		-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 125b906..638a38e 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -2711,7 +2711,7 @@
 
 # generate a sequence of code that will splice in highlighting information
 # using the s// operator.
-foreach my $k (keys @highlights) {
+for (my $k = 0; $k < @highlights; $k++) {
     my $pattern = $highlights[$k][0];
     my $result = $highlights[$k][1];
 #   print STDERR "scanning pattern:$pattern, highlight:($result)\n";
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 927db9f..696ccfa 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -845,6 +845,8 @@
 	size_t datalen = prep->datalen;
 	int ret = 0;
 
+	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+		return -ENOKEY;
 	if (datalen <= 0 || datalen > 32767 || !prep->data)
 		return -EINVAL;
 
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 903dace..16dec53 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1007,13 +1007,16 @@
  */
 static int trusted_update(struct key *key, struct key_preparsed_payload *prep)
 {
-	struct trusted_key_payload *p = key->payload.data[0];
+	struct trusted_key_payload *p;
 	struct trusted_key_payload *new_p;
 	struct trusted_key_options *new_o;
 	size_t datalen = prep->datalen;
 	char *datablob;
 	int ret = 0;
 
+	if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+		return -ENOKEY;
+	p = key->payload.data[0];
 	if (!p->migratable)
 		return -EPERM;
 	if (datalen <= 0 || datalen > 32767 || !prep->data)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 28cb30f..8705d79 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -120,7 +120,10 @@
 
 	if (ret == 0) {
 		/* attach the new data, displacing the old */
-		zap = key->payload.data[0];
+		if (!test_bit(KEY_FLAG_NEGATIVE, &key->flags))
+			zap = key->payload.data[0];
+		else
+			zap = NULL;
 		rcu_assign_keypointer(key, upayload);
 		key->expiry = 0;
 	}
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 18643bf..456e1a9 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -638,7 +638,7 @@
 {
 	struct avtab_node *node;
 
-	if (!ctab || !key || !avd || !xperms)
+	if (!ctab || !key || !avd)
 		return;
 
 	for (node = avtab_search_node(ctab, key); node;
@@ -657,7 +657,7 @@
 		if ((u16)(AVTAB_AUDITALLOW|AVTAB_ENABLED) ==
 		    (node->key.specified & (AVTAB_AUDITALLOW|AVTAB_ENABLED)))
 			avd->auditallow |= node->datum.u.data;
-		if ((node->key.specified & AVTAB_ENABLED) &&
+		if (xperms && (node->key.specified & AVTAB_ENABLED) &&
 				(node->key.specified & AVTAB_XPERMS))
 			services_compute_xperms_drivers(xperms, node);
 	}
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 5d99436..0cda05c 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -12,9 +12,11 @@
 MODULE_LICENSE("GPL v2");
 
 #define OUI_WEISS		0x001c6a
+#define OUI_LOUD		0x000ff2
 
 #define DICE_CATEGORY_ID	0x04
 #define WEISS_CATEGORY_ID	0x00
+#define LOUD_CATEGORY_ID	0x10
 
 static int dice_interface_check(struct fw_unit *unit)
 {
@@ -57,6 +59,8 @@
 	}
 	if (vendor == OUI_WEISS)
 		category = WEISS_CATEGORY_ID;
+	else if (vendor == OUI_LOUD)
+		category = LOUD_CATEGORY_ID;
 	else
 		category = DICE_CATEGORY_ID;
 	if (device->config_rom[3] != ((vendor << 8) | category) ||
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 8a7fbdc..963f824 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -312,6 +312,10 @@
 	(AZX_DCAPS_INTEL_PCH | AZX_DCAPS_SEPARATE_STREAM_TAG |\
 	 AZX_DCAPS_I915_POWERWELL)
 
+#define AZX_DCAPS_INTEL_BROXTON \
+	(AZX_DCAPS_INTEL_PCH | AZX_DCAPS_SEPARATE_STREAM_TAG |\
+	 AZX_DCAPS_I915_POWERWELL)
+
 /* quirks for ATI SB / AMD Hudson */
 #define AZX_DCAPS_PRESET_ATI_SB \
 	(AZX_DCAPS_NO_TCSEL | AZX_DCAPS_SYNC_WRITE | AZX_DCAPS_POSFIX_LPIB |\
@@ -2124,6 +2128,9 @@
 	/* Sunrise Point-LP */
 	{ PCI_DEVICE(0x8086, 0x9d70),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
+	/* Broxton-P(Apollolake) */
+	{ PCI_DEVICE(0x8086, 0x5a98),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
 	/* Haswell */
 	{ PCI_DEVICE(0x8086, 0x0a0c),
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index c8b8ef5..ef19890 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -955,6 +955,7 @@
  */
 
 static const struct hda_device_id snd_hda_id_conexant[] = {
+	HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),
@@ -972,9 +973,9 @@
 	HDA_CODEC_ENTRY(0x14f150ac, "CX20652", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150b8, "CX20664", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150b9, "CX20665", patch_conexant_auto),
-	HDA_CODEC_ENTRY(0x14f150f1, "CX20721", patch_conexant_auto),
+	HDA_CODEC_ENTRY(0x14f150f1, "CX21722", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150f2, "CX20722", patch_conexant_auto),
-	HDA_CODEC_ENTRY(0x14f150f3, "CX20723", patch_conexant_auto),
+	HDA_CODEC_ENTRY(0x14f150f3, "CX21724", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150f4, "CX20724", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f1510f, "CX20751/2", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15110, "CX20751/2", patch_conexant_auto),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 60cd9e7..4b6fb66 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2352,6 +2352,12 @@
 	struct hda_codec *codec = audio_ptr;
 	int pin_nid = port + 0x04;
 
+	/* skip notification during system suspend (but not in runtime PM);
+	 * the state will be updated at resume
+	 */
+	if (snd_power_get_state(codec->card) != SNDRV_CTL_POWER_D0)
+		return;
+
 	check_presence_and_report(codec, pin_nid);
 }
 
@@ -2378,7 +2384,8 @@
 	 * can cover the codec power request, and so need not set this flag.
 	 * For previous platforms, there is no such power well feature.
 	 */
-	if (is_valleyview_plus(codec) || is_skylake(codec))
+	if (is_valleyview_plus(codec) || is_skylake(codec) ||
+			is_broxton(codec))
 		codec->core.link_power_control = 1;
 
 	if (is_haswell_plus(codec) || is_valleyview_plus(codec)) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2f7b065..9bedf7c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1759,6 +1759,7 @@
 	ALC882_FIXUP_NO_PRIMARY_HP,
 	ALC887_FIXUP_ASUS_BASS,
 	ALC887_FIXUP_BASS_CHMAP,
+	ALC882_FIXUP_DISABLE_AAMIX,
 };
 
 static void alc889_fixup_coef(struct hda_codec *codec,
@@ -1920,6 +1921,8 @@
 
 static void alc_fixup_bass_chmap(struct hda_codec *codec,
 				 const struct hda_fixup *fix, int action);
+static void alc_fixup_disable_aamix(struct hda_codec *codec,
+				    const struct hda_fixup *fix, int action);
 
 static const struct hda_fixup alc882_fixups[] = {
 	[ALC882_FIXUP_ABIT_AW9D_MAX] = {
@@ -2151,6 +2154,10 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_bass_chmap,
 	},
+	[ALC882_FIXUP_DISABLE_AAMIX] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+	},
 };
 
 static const struct snd_pci_quirk alc882_fixup_tbl[] = {
@@ -2218,6 +2225,7 @@
 	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1458, 0xa182, "Gigabyte Z170X-UD3", ALC882_FIXUP_DISABLE_AAMIX),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
@@ -4587,6 +4595,7 @@
 	ALC292_FIXUP_DISABLE_AAMIX,
 	ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC275_FIXUP_DELL_XPS,
+	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5167,6 +5176,17 @@
 			{}
 		}
 	},
+	[ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* Disable pass-through path for FRONT 14h */
+			{0x20, AC_VERB_SET_COEF_INDEX, 0x36},
+			{0x20, AC_VERB_SET_PROC_COEF, 0x1737},
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5180,8 +5200,10 @@
 	SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
 	SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
+	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
+	SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
 	SND_PCI_QUIRK(0x1028, 0x05ca, "Dell Latitude E7240", ALC292_FIXUP_DELL_E7X),
 	SND_PCI_QUIRK(0x1028, 0x05cb, "Dell Latitude E7440", ALC292_FIXUP_DELL_E7X),
 	SND_PCI_QUIRK(0x1028, 0x05da, "Dell Vostro 5460", ALC290_FIXUP_SUBWOOFER),
@@ -5204,6 +5226,7 @@
 	SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
 	SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
 	SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC292_FIXUP_DISABLE_AAMIX),
+	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 826122d..2c7c5eb 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -3110,6 +3110,29 @@
 	spec->gpio_led = 0x08;
 }
 
+static bool is_hp_output(struct hda_codec *codec, hda_nid_t pin)
+{
+	unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin);
+
+	/* count line-out, too, as BIOS sets often so */
+	return get_defcfg_connect(pin_cfg) != AC_JACK_PORT_NONE &&
+		(get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT ||
+		 get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT);
+}
+
+static void fixup_hp_headphone(struct hda_codec *codec, hda_nid_t pin)
+{
+	unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, pin);
+
+	/* It was changed in the BIOS to just satisfy MS DTM.
+	 * Lets turn it back into slaved HP
+	 */
+	pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE)) |
+		(AC_JACK_HP_OUT << AC_DEFCFG_DEVICE_SHIFT);
+	pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC | AC_DEFCFG_SEQUENCE))) |
+		0x1f;
+	snd_hda_codec_set_pincfg(codec, pin, pin_cfg);
+}
 
 static void stac92hd71bxx_fixup_hp(struct hda_codec *codec,
 				   const struct hda_fixup *fix, int action)
@@ -3119,22 +3142,12 @@
 	if (action != HDA_FIXUP_ACT_PRE_PROBE)
 		return;
 
-	if (hp_blike_system(codec->core.subsystem_id)) {
-		unsigned int pin_cfg = snd_hda_codec_get_pincfg(codec, 0x0f);
-		if (get_defcfg_device(pin_cfg) == AC_JACK_LINE_OUT ||
-			get_defcfg_device(pin_cfg) == AC_JACK_SPEAKER  ||
-			get_defcfg_device(pin_cfg) == AC_JACK_HP_OUT) {
-			/* It was changed in the BIOS to just satisfy MS DTM.
-			 * Lets turn it back into slaved HP
-			 */
-			pin_cfg = (pin_cfg & (~AC_DEFCFG_DEVICE))
-					| (AC_JACK_HP_OUT <<
-						AC_DEFCFG_DEVICE_SHIFT);
-			pin_cfg = (pin_cfg & (~(AC_DEFCFG_DEF_ASSOC
-							| AC_DEFCFG_SEQUENCE)))
-								| 0x1f;
-			snd_hda_codec_set_pincfg(codec, 0x0f, pin_cfg);
-		}
+	/* when both output A and F are assigned, these are supposedly
+	 * dock and built-in headphones; fix both pin configs
+	 */
+	if (is_hp_output(codec, 0x0a) && is_hp_output(codec, 0x0f)) {
+		fixup_hp_headphone(codec, 0x0a);
+		fixup_hp_headphone(codec, 0x0f);
 	}
 
 	if (find_mute_led_cfg(codec, 1))
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 9929efc..b3ea24d 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1023,24 +1023,18 @@
 }
 EXPORT_SYMBOL_GPL(arizona_init_dvfs);
 
-static unsigned int arizona_sysclk_48k_rates[] = {
+static unsigned int arizona_opclk_ref_48k_rates[] = {
 	6144000,
 	12288000,
 	24576000,
 	49152000,
-	73728000,
-	98304000,
-	147456000,
 };
 
-static unsigned int arizona_sysclk_44k1_rates[] = {
+static unsigned int arizona_opclk_ref_44k1_rates[] = {
 	5644800,
 	11289600,
 	22579200,
 	45158400,
-	67737600,
-	90316800,
-	135475200,
 };
 
 static int arizona_set_opclk(struct snd_soc_codec *codec, unsigned int clk,
@@ -1065,11 +1059,11 @@
 	}
 
 	if (refclk % 8000)
-		rates = arizona_sysclk_44k1_rates;
+		rates = arizona_opclk_ref_44k1_rates;
 	else
-		rates = arizona_sysclk_48k_rates;
+		rates = arizona_opclk_ref_48k_rates;
 
-	for (ref = 0; ref < ARRAY_SIZE(arizona_sysclk_48k_rates) &&
+	for (ref = 0; ref < ARRAY_SIZE(arizona_opclk_ref_48k_rates) &&
 		     rates[ref] <= refclk; ref++) {
 		div = 1;
 		while (rates[ref] / div >= freq && div < 32) {
diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
index 969e337..84f5eb0 100644
--- a/sound/soc/codecs/es8328.c
+++ b/sound/soc/codecs/es8328.c
@@ -205,18 +205,18 @@
 
 /* Left Mixer */
 static const struct snd_kcontrol_new es8328_left_mixer_controls[] = {
-	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 8, 1, 0),
-	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 7, 1, 0),
-	SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 8, 1, 0),
-	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 7, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0),
+	SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0),
 };
 
 /* Right Mixer */
 static const struct snd_kcontrol_new es8328_right_mixer_controls[] = {
-	SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 8, 1, 0),
-	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 7, 1, 0),
-	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 8, 1, 0),
-	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0),
 };
 
 static const char * const es8328_pga_sel[] = {
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 7fc7b4e..c1b87c5 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -1271,6 +1271,36 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int nau8825_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+	disable_irq(client->irq);
+	regcache_cache_only(nau8825->regmap, true);
+	regcache_mark_dirty(nau8825->regmap);
+
+	return 0;
+}
+
+static int nau8825_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+	regcache_cache_only(nau8825->regmap, false);
+	regcache_sync(nau8825->regmap);
+	enable_irq(client->irq);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops nau8825_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(nau8825_suspend, nau8825_resume)
+};
+
 static const struct i2c_device_id nau8825_i2c_ids[] = {
 	{ "nau8825", 0 },
 	{ }
@@ -1297,6 +1327,7 @@
 		.name = "nau8825",
 		.of_match_table = of_match_ptr(nau8825_of_ids),
 		.acpi_match_table = ACPI_PTR(nau8825_acpi_match),
+		.pm = &nau8825_pm,
 	},
 	.probe = nau8825_i2c_probe,
 	.remove = nau8825_i2c_remove,
diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c
index aca479f..1dc68ab 100644
--- a/sound/soc/codecs/rl6231.c
+++ b/sound/soc/codecs/rl6231.c
@@ -80,8 +80,10 @@
 	}
 
 	for (i = 0; i < ARRAY_SIZE(div); i++) {
-		/* find divider that gives DMIC frequency below 3MHz */
-		if (3000000 * div[i] >= rate)
+		if ((div[i] % 3) == 0)
+			continue;
+		/* find divider that gives DMIC frequency below 3.072MHz */
+		if (3072000 * div[i] >= rate)
 			return i;
 	}
 
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 2813237..ef76940 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -245,7 +245,7 @@
 	struct snd_soc_jack *hp_jack;
 	struct snd_soc_jack *mic_jack;
 	struct snd_soc_jack *btn_jack;
-	struct delayed_work jack_detect_work;
+	struct delayed_work jack_detect_work, rcclock_work;
 	struct regulator_bulk_data supplies[ARRAY_SIZE(rt5645_supply_names)];
 	struct rt5645_eq_param_s *eq_param;
 
@@ -565,12 +565,33 @@
 	.put = rt5645_hweq_put \
 }
 
+static int rt5645_spk_put_volsw(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+	struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
+	int ret;
+
+	cancel_delayed_work_sync(&rt5645->rcclock_work);
+
+	regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+		RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PU);
+
+	ret = snd_soc_put_volsw(kcontrol, ucontrol);
+
+	queue_delayed_work(system_power_efficient_wq, &rt5645->rcclock_work,
+		msecs_to_jiffies(200));
+
+	return ret;
+}
+
 static const struct snd_kcontrol_new rt5645_snd_controls[] = {
 	/* Speaker Output Volume */
 	SOC_DOUBLE("Speaker Channel Switch", RT5645_SPK_VOL,
 		RT5645_VOL_L_SFT, RT5645_VOL_R_SFT, 1, 1),
-	SOC_DOUBLE_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
-		RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, out_vol_tlv),
+	SOC_DOUBLE_EXT_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
+		RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, snd_soc_get_volsw,
+		rt5645_spk_put_volsw, out_vol_tlv),
 
 	/* ClassD modulator Speaker Gain Ratio */
 	SOC_SINGLE_TLV("Speaker ClassD Playback Volume", RT5645_SPO_CLSD_RATIO,
@@ -1498,7 +1519,7 @@
 				regmap_write(rt5645->regmap, RT5645_PR_BASE +
 					RT5645_MAMP_INT_REG2, 0xfc00);
 				snd_soc_write(codec, RT5645_DEPOP_M2, 0x1140);
-				msleep(40);
+				msleep(70);
 				rt5645->hp_on = true;
 			} else {
 				/* depop parameters */
@@ -3122,6 +3143,15 @@
 				SND_JACK_BTN_2 | SND_JACK_BTN_3);
 }
 
+static void rt5645_rcclock_work(struct work_struct *work)
+{
+	struct rt5645_priv *rt5645 =
+		container_of(work, struct rt5645_priv, rcclock_work.work);
+
+	regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+		RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PD);
+}
+
 static irqreturn_t rt5645_irq(int irq, void *data)
 {
 	struct rt5645_priv *rt5645 = data;
@@ -3348,6 +3378,27 @@
 			DMI_MATCH(DMI_PRODUCT_NAME, "Reks"),
 		},
 	},
+	{
+		.ident = "Google Edgar",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
+		},
+	},
+	{
+		.ident = "Google Wizpig",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Wizpig"),
+		},
+	},
+	{
+		.ident = "Google Terra",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Terra"),
+		},
+	},
 	{ }
 };
 
@@ -3587,6 +3638,7 @@
 	}
 
 	INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work);
+	INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work);
 
 	if (rt5645->i2c->irq) {
 		ret = request_threaded_irq(rt5645->i2c->irq, NULL, rt5645_irq,
@@ -3621,6 +3673,7 @@
 		free_irq(i2c->irq, rt5645);
 
 	cancel_delayed_work_sync(&rt5645->jack_detect_work);
+	cancel_delayed_work_sync(&rt5645->rcclock_work);
 
 	snd_soc_unregister_codec(&i2c->dev);
 	regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h
index dc2b462..3f1b0f1 100644
--- a/sound/soc/codecs/rt5670.h
+++ b/sound/soc/codecs/rt5670.h
@@ -973,12 +973,12 @@
 #define RT5670_SCLK_SRC_MCLK			(0x0 << 14)
 #define RT5670_SCLK_SRC_PLL1			(0x1 << 14)
 #define RT5670_SCLK_SRC_RCCLK			(0x2 << 14) /* 15MHz */
-#define RT5670_PLL1_SRC_MASK			(0x3 << 12)
-#define RT5670_PLL1_SRC_SFT			12
-#define RT5670_PLL1_SRC_MCLK			(0x0 << 12)
-#define RT5670_PLL1_SRC_BCLK1			(0x1 << 12)
-#define RT5670_PLL1_SRC_BCLK2			(0x2 << 12)
-#define RT5670_PLL1_SRC_BCLK3			(0x3 << 12)
+#define RT5670_PLL1_SRC_MASK			(0x7 << 11)
+#define RT5670_PLL1_SRC_SFT			11
+#define RT5670_PLL1_SRC_MCLK			(0x0 << 11)
+#define RT5670_PLL1_SRC_BCLK1			(0x1 << 11)
+#define RT5670_PLL1_SRC_BCLK2			(0x2 << 11)
+#define RT5670_PLL1_SRC_BCLK3			(0x3 << 11)
 #define RT5670_PLL1_PD_MASK			(0x1 << 3)
 #define RT5670_PLL1_PD_SFT			3
 #define RT5670_PLL1_PD_1			(0x0 << 3)
diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index b4cd7e3..69d987a 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -1386,90 +1386,90 @@
 };
 
 static const struct snd_kcontrol_new rt5677_sto1_dac_l_mix[] = {
-	SOC_DAPM_SINGLE("ST L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_ST_DAC1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_L_STO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC2_L_STO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_R_STO_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_sto1_dac_r_mix[] = {
-	SOC_DAPM_SINGLE("ST R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_ST_DAC1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_R_STO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC2_R_STO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_L_STO_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_mono_dac_l_mix[] = {
-	SOC_DAPM_SINGLE("ST L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_ST_DAC2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC1_L_MONO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_L_MONO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_R_MONO_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_mono_dac_r_mix[] = {
-	SOC_DAPM_SINGLE("ST R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_ST_DAC2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC1_R_MONO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_R_MONO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_L_MONO_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd1_l_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
 			RT5677_M_STO_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
 			RT5677_M_MONO_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_R_DD1_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd1_r_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
 			RT5677_M_STO_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
 			RT5677_M_MONO_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_L_DD1_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd2_l_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
 			RT5677_M_STO_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
 			RT5677_M_MONO_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_R_DD2_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd2_r_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
 			RT5677_M_STO_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
 			RT5677_M_MONO_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_L_DD2_R_SFT, 1, 1),
 };
 
@@ -2596,6 +2596,21 @@
 	return 0;
 }
 
+static int rt5677_filter_power_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		msleep(50);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 static const struct snd_soc_dapm_widget rt5677_dapm_widgets[] = {
 	SND_SOC_DAPM_SUPPLY("PLL1", RT5677_PWR_ANLG2, RT5677_PWR_PLL1_BIT,
 		0, rt5677_set_pll1_event, SND_SOC_DAPM_PRE_PMU |
@@ -3072,19 +3087,26 @@
 
 	/* DAC Mixer */
 	SND_SOC_DAPM_SUPPLY("dac stereo1 filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_S1F_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_S1F_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono2 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M2F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M2F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono2 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M2F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M2F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono3 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M3F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M3F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono3 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M3F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M3F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono4 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M4F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M4F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono4 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M4F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M4F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 
 	SND_SOC_DAPM_MIXER("Stereo DAC MIXL", SND_SOC_NOPM, 0, 0,
 		rt5677_sto1_dac_l_mix, ARRAY_SIZE(rt5677_sto1_dac_l_mix)),
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index 0563753..5380798 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -229,7 +229,7 @@
 SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
 	6, 1, 0),
 SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
-	7, 1, 0),
+	7, 1, 1),
 
 SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
 	       WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 39ebd7b..a7e7978 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -365,8 +365,8 @@
 	{ 16924, 0x0059 },   /* R16924 - HDBASS_PG_1 */
 	{ 16925, 0x999A },   /* R16925 - HDBASS_PG_0 */
 
-	{ 17048, 0x0083 },   /* R17408 - HPF_C_1 */
-	{ 17049, 0x98AD },   /* R17409 - HPF_C_0 */
+	{ 17408, 0x0083 },   /* R17408 - HPF_C_1 */
+	{ 17409, 0x98AD },   /* R17409 - HPF_C_0 */
 
 	{ 17920, 0x007F },   /* R17920 - ADCL_RETUNE_C1_1 */
 	{ 17921, 0xFFFF },   /* R17921 - ADCL_RETUNE_C1_0 */
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 4495a40..c1c9c2e 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -681,8 +681,8 @@
 	}
 
 	mcasp->tdm_slots = slots;
-	mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = rx_mask;
-	mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = tx_mask;
+	mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask;
+	mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = rx_mask;
 	mcasp->slot_width = slot_width;
 
 	return davinci_mcasp_set_ch_constraints(mcasp);
@@ -908,6 +908,14 @@
 		mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, busel | RXORD);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG,
 			       FSRMOD(total_slots), FSRMOD(0x1FF));
+		/*
+		 * If McASP is set to be TX/RX synchronous and the playback is
+		 * not running already we need to configure the TX slots in
+		 * order to have correct FSX on the bus
+		 */
+		if (mcasp_is_synchronous(mcasp) && !mcasp->channels)
+			mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG,
+				       FSXMOD(total_slots), FSXMOD(0x1FF));
 	}
 
 	return 0;
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 19c302b..14dfdee 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -283,6 +283,8 @@
 config SND_SOC_FSL_ASOC_CARD
 	tristate "Generic ASoC Sound Card with ASRC support"
 	depends on OF && I2C
+	# enforce SND_SOC_FSL_ASOC_CARD=m if SND_AC97_CODEC=m:
+	depends on SND_AC97_CODEC || SND_AC97_CODEC=n
 	select SND_SOC_IMX_AUDMUX
 	select SND_SOC_IMX_PCM_DMA
 	select SND_SOC_FSL_ESAI
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index a4435f5..ffd5f9a 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -454,7 +454,8 @@
 	 * Rx sync with Tx clocks: Clear SYNC for Tx, set it for Rx.
 	 * Tx sync with Rx clocks: Clear SYNC for Rx, set it for Tx.
 	 */
-	regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC, 0);
+	regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC,
+		           sai->synchronous[TX] ? FSL_SAI_CR2_SYNC : 0);
 	regmap_update_bits(sai->regmap, FSL_SAI_RCR2, FSL_SAI_CR2_SYNC,
 			   sai->synchronous[RX] ? FSL_SAI_CR2_SYNC : 0);
 
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 7b778ab..d430ef5 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -144,7 +144,7 @@
 
 config SND_SOC_INTEL_SKL_RT286_MACH
 	tristate "ASoC Audio driver for SKL with RT286 I2S mode"
-	depends on X86 && ACPI
+	depends on X86 && ACPI && I2C
 	select SND_SOC_INTEL_SST
 	select SND_SOC_INTEL_SKYLAKE
 	select SND_SOC_RT286
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a7854c8..ffea427 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -1240,6 +1240,7 @@
 	 */
 	ret = snd_soc_tplg_component_load(&platform->component,
 					&skl_tplg_ops, fw, 0);
+	release_firmware(fw);
 	if (ret < 0) {
 		dev_err(bus->dev, "tplg component load failed%d\n", ret);
 		return -EINVAL;
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index a38a302..ac72ff5 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -280,7 +280,7 @@
 	int ret;
 
 	match = of_match_node(rk_spdif_match, np);
-	if ((int) match->data == RK_SPDIF_RK3288) {
+	if (match->data == (void *)RK_SPDIF_RK3288) {
 		struct regmap *grf;
 
 		grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
diff --git a/sound/soc/rockchip/rockchip_spdif.h b/sound/soc/rockchip/rockchip_spdif.h
index 07f86a2..921b409 100644
--- a/sound/soc/rockchip/rockchip_spdif.h
+++ b/sound/soc/rockchip/rockchip_spdif.h
@@ -28,9 +28,9 @@
 #define SPDIF_CFGR_VDW(x)	(x << SPDIF_CFGR_VDW_SHIFT)
 #define SDPIF_CFGR_VDW_MASK	(0xf << SPDIF_CFGR_VDW_SHIFT)
 
-#define SPDIF_CFGR_VDW_16	SPDIF_CFGR_VDW(0x00)
-#define SPDIF_CFGR_VDW_20	SPDIF_CFGR_VDW(0x01)
-#define SPDIF_CFGR_VDW_24	SPDIF_CFGR_VDW(0x10)
+#define SPDIF_CFGR_VDW_16	SPDIF_CFGR_VDW(0x0)
+#define SPDIF_CFGR_VDW_20	SPDIF_CFGR_VDW(0x1)
+#define SPDIF_CFGR_VDW_24	SPDIF_CFGR_VDW(0x2)
 
 /*
  * DMACR
diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index 76da762..edcf4cc 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c
@@ -235,7 +235,7 @@
 		RSND_GEN_S_REG(SCU_SYS_STATUS0,	0x1c8),
 		RSND_GEN_S_REG(SCU_SYS_INT_EN0,	0x1cc),
 		RSND_GEN_S_REG(SCU_SYS_STATUS1,	0x1d0),
-		RSND_GEN_S_REG(SCU_SYS_INT_EN1,	0x1c4),
+		RSND_GEN_S_REG(SCU_SYS_INT_EN1,	0x1d4),
 		RSND_GEN_M_REG(SRC_SWRSR,	0x200,	0x40),
 		RSND_GEN_M_REG(SRC_SRCIR,	0x204,	0x40),
 		RSND_GEN_M_REG(SRC_ADINR,	0x214,	0x40),
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 261b502..68b439e 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -923,6 +923,7 @@
 			    struct snd_soc_pcm_runtime *rtd)
 {
 	struct rsnd_dai *rdai = rsnd_io_to_rdai(io);
+	struct rsnd_mod *dvc = rsnd_io_to_mod_dvc(io);
 	struct rsnd_src *src = rsnd_mod_to_src(mod);
 	int ret;
 
@@ -937,6 +938,12 @@
 		return 0;
 
 	/*
+	 * SRC In doesn't work if DVC was enabled
+	 */
+	if (dvc && !rsnd_io_is_play(io))
+		return 0;
+
+	/*
 	 * enable sync convert
 	 */
 	ret = rsnd_kctrl_new_s(mod, io, rtd,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 24b0960..a1305f8 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -795,12 +795,12 @@
 
 	dev_dbg(card->dev, "ASoC: resume work completed\n");
 
-	/* userspace can access us now we are back as we were before */
-	snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
-
 	/* Recheck all endpoints too, their state is affected by suspend */
 	dapm_mark_endpoints_dirty(card);
 	snd_soc_dapm_sync(&card->dapm);
+
+	/* userspace can access us now we are back as we were before */
+	snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
 }
 
 /* powers up audio subsystem after a suspend */
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 016eba1..7d00942 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2293,6 +2293,12 @@
 	kfree(w);
 }
 
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm)
+{
+	dapm->path_sink_cache.widget = NULL;
+	dapm->path_source_cache.widget = NULL;
+}
+
 /* free all dapm widgets and resources */
 static void dapm_free_widgets(struct snd_soc_dapm_context *dapm)
 {
@@ -2303,6 +2309,7 @@
 			continue;
 		snd_soc_dapm_free_widget(w);
 	}
+	snd_soc_dapm_reset_cache(dapm);
 }
 
 static struct snd_soc_dapm_widget *dapm_find_widget(
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index ecd38e5..2f67ba6 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -404,7 +404,7 @@
 /**
  * snd_soc_put_volsw_sx - double mixer set callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a double mixer control that spans 2 registers.
  *
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 8d7ec80..6963ba2 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -531,7 +531,7 @@
 		/* TLV bytes controls need standard kcontrol info handler,
 		 * TLV callback and extended put/get handlers.
 		 */
-		k->info = snd_soc_bytes_info;
+		k->info = snd_soc_bytes_info_ext;
 		k->tlv.c = snd_soc_bytes_tlv_callback;
 
 		ext_ops = tplg->bytes_ext_ops;
@@ -1805,6 +1805,7 @@
 		snd_soc_tplg_widget_remove(w);
 		snd_soc_dapm_free_widget(w);
 	}
+	snd_soc_dapm_reset_cache(dapm);
 }
 EXPORT_SYMBOL_GPL(snd_soc_tplg_widget_remove_all);
 
diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c
index 843f037..5c2bc53 100644
--- a/sound/soc/sti/uniperif_player.c
+++ b/sound/soc/sti/uniperif_player.c
@@ -669,6 +669,7 @@
 {
 	struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
 	struct uniperif *player = priv->dai_data.uni;
+	player->substream = substream;
 
 	player->clk_adj = 0;
 
@@ -950,6 +951,8 @@
 	if (player->state != UNIPERIF_STATE_STOPPED)
 		/* Stop the player */
 		uni_player_stop(player);
+
+	player->substream = NULL;
 }
 
 static int uni_player_parse_dt_clk_glue(struct platform_device *pdev,
@@ -989,7 +992,7 @@
 	if (!info)
 		return -ENOMEM;
 
-	if (of_property_read_u32(pnode, "version", &player->ver) ||
+	if (of_property_read_u32(pnode, "st,version", &player->ver) ||
 	    player->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
 		dev_err(dev, "Unknown uniperipheral version ");
 		return -EINVAL;
@@ -998,13 +1001,13 @@
 	if (player->ver >= SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0)
 		info->underflow_enabled = 1;
 
-	if (of_property_read_u32(pnode, "uniperiph-id", &info->id)) {
+	if (of_property_read_u32(pnode, "st,uniperiph-id", &info->id)) {
 		dev_err(dev, "uniperipheral id not defined");
 		return -EINVAL;
 	}
 
 	/* Read the device mode property */
-	if (of_property_read_string(pnode, "mode", &mode)) {
+	if (of_property_read_string(pnode, "st,mode", &mode)) {
 		dev_err(dev, "uniperipheral mode not defined");
 		return -EINVAL;
 	}
diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
index f791239..8a0eb20 100644
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -316,7 +316,7 @@
 	if (!info)
 		return -ENOMEM;
 
-	if (of_property_read_u32(node, "version", &reader->ver) ||
+	if (of_property_read_u32(node, "st,version", &reader->ver) ||
 	    reader->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
 		dev_err(&pdev->dev, "Unknown uniperipheral version ");
 		return -EINVAL;
@@ -346,7 +346,6 @@
 	reader->hw = &uni_reader_pcm_hw;
 	reader->dai_ops = &uni_reader_dai_ops;
 
-	dev_err(reader->dev, "%s: enter\n", __func__);
 	ret = uni_reader_parse_dt(pdev, reader);
 	if (ret < 0) {
 		dev_err(reader->dev, "Failed to parse DeviceTree");
diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c
index bcbf4da..1bb896d 100644
--- a/sound/soc/sunxi/sun4i-codec.c
+++ b/sound/soc/sunxi/sun4i-codec.c
@@ -2,6 +2,7 @@
  * Copyright 2014 Emilio López <emilio@elopez.com.ar>
  * Copyright 2014 Jon Smirl <jonsmirl@gmail.com>
  * Copyright 2015 Maxime Ripard <maxime.ripard@free-electrons.com>
+ * Copyright 2015 Adam Sampson <ats@offog.org>
  *
  * Based on the Allwinner SDK driver, released under the GPL.
  *
@@ -404,7 +405,7 @@
 static DECLARE_TLV_DB_SCALE(sun4i_codec_pa_volume_scale, -6300, 100, 1);
 
 static const struct snd_kcontrol_new sun4i_codec_widgets[] = {
-	SOC_SINGLE_TLV("PA Volume", SUN4I_CODEC_DAC_ACTL,
+	SOC_SINGLE_TLV("Power Amplifier Volume", SUN4I_CODEC_DAC_ACTL,
 		       SUN4I_CODEC_DAC_ACTL_PA_VOL, 0x3F, 0,
 		       sun4i_codec_pa_volume_scale),
 };
@@ -452,12 +453,12 @@
 	SND_SOC_DAPM_SUPPLY("Mixer Enable", SUN4I_CODEC_DAC_ACTL,
 			    SUN4I_CODEC_DAC_ACTL_MIXEN, 0, NULL, 0),
 
-	/* Pre-Amplifier */
-	SND_SOC_DAPM_MIXER("Pre-Amplifier", SUN4I_CODEC_ADC_ACTL,
+	/* Power Amplifier */
+	SND_SOC_DAPM_MIXER("Power Amplifier", SUN4I_CODEC_ADC_ACTL,
 			   SUN4I_CODEC_ADC_ACTL_PA_EN, 0,
 			   sun4i_codec_pa_mixer_controls,
 			   ARRAY_SIZE(sun4i_codec_pa_mixer_controls)),
-	SND_SOC_DAPM_SWITCH("Pre-Amplifier Mute", SND_SOC_NOPM, 0, 0,
+	SND_SOC_DAPM_SWITCH("Power Amplifier Mute", SND_SOC_NOPM, 0, 0,
 			    &sun4i_codec_pa_mute),
 
 	SND_SOC_DAPM_OUTPUT("HP Right"),
@@ -480,16 +481,16 @@
 	{ "Left Mixer", NULL, "Mixer Enable" },
 	{ "Left Mixer", "Left DAC Playback Switch", "Left DAC" },
 
-	/* Pre-Amplifier Mixer Routes */
-	{ "Pre-Amplifier", "Mixer Playback Switch", "Left Mixer" },
-	{ "Pre-Amplifier", "Mixer Playback Switch", "Right Mixer" },
-	{ "Pre-Amplifier", "DAC Playback Switch", "Left DAC" },
-	{ "Pre-Amplifier", "DAC Playback Switch", "Right DAC" },
+	/* Power Amplifier Routes */
+	{ "Power Amplifier", "Mixer Playback Switch", "Left Mixer" },
+	{ "Power Amplifier", "Mixer Playback Switch", "Right Mixer" },
+	{ "Power Amplifier", "DAC Playback Switch", "Left DAC" },
+	{ "Power Amplifier", "DAC Playback Switch", "Right DAC" },
 
-	/* PA -> HP path */
-	{ "Pre-Amplifier Mute", "Switch", "Pre-Amplifier" },
-	{ "HP Right", NULL, "Pre-Amplifier Mute" },
-	{ "HP Left", NULL, "Pre-Amplifier Mute" },
+	/* Headphone Output Routes */
+	{ "Power Amplifier Mute", "Switch", "Power Amplifier" },
+	{ "HP Right", NULL, "Power Amplifier Mute" },
+	{ "HP Left", NULL, "Power Amplifier Mute" },
 };
 
 static struct snd_soc_codec_driver sun4i_codec_codec = {
diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 7661616..5b4c58c 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -174,6 +174,8 @@
 		u8 running_status_length;
 	} ports[0x10];
 	u8 seen_f5;
+	bool in_sysex;
+	u8 last_cin;
 	u8 error_resubmit;
 	int current_port;
 };
@@ -468,6 +470,39 @@
 }
 
 /*
+ * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4
+ * but the previously seen CIN, but still with three data bytes.
+ */
+static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep,
+				     uint8_t *buffer, int buffer_length)
+{
+	unsigned int i, cin, length;
+
+	for (i = 0; i + 3 < buffer_length; i += 4) {
+		if (buffer[i] == 0 && i > 0)
+			break;
+		cin = buffer[i] & 0x0f;
+		if (ep->in_sysex &&
+		    cin == ep->last_cin &&
+		    (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0)
+			cin = 0x4;
+#if 0
+		if (buffer[i + 1] == 0x90) {
+			/*
+			 * Either a corrupted running status or a real note-on
+			 * message; impossible to detect reliably.
+			 */
+		}
+#endif
+		length = snd_usbmidi_cin_length[cin];
+		snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length);
+		ep->in_sysex = cin == 0x4;
+		if (!ep->in_sysex)
+			ep->last_cin = cin;
+	}
+}
+
+/*
  * CME protocol: like the standard protocol, but SysEx commands are sent as a
  * single USB packet preceded by a 0x0F byte.
  */
@@ -660,6 +695,12 @@
 	.output_packet = snd_usbmidi_output_standard_packet,
 };
 
+static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = {
+	.input = ch345_broken_sysex_input,
+	.output = snd_usbmidi_standard_output,
+	.output_packet = snd_usbmidi_output_standard_packet,
+};
+
 /*
  * AKAI MPD16 protocol:
  *
@@ -1341,6 +1382,7 @@
 		 * Various chips declare a packet size larger than 4 bytes, but
 		 * do not actually work with larger packets:
 		 */
+	case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */
 	case USB_ID(0x0a92, 0x1020): /* ESI M4U */
 	case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */
 	case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */
@@ -2378,6 +2420,10 @@
 
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
+	case QUIRK_MIDI_CH345:
+		umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops;
+		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
+		break;
 	default:
 		dev_err(&umidi->dev->dev, "invalid quirk type %d\n",
 			quirk->type);
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 1a1e2e4..c60a776 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -2829,6 +2829,17 @@
 	.idProduct = 0x1020,
 },
 
+/* QinHeng devices */
+{
+	USB_DEVICE(0x1a86, 0x752d),
+	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+		.vendor_name = "QinHeng",
+		.product_name = "CH345",
+		.ifnum = 1,
+		.type = QUIRK_MIDI_CH345
+	}
+},
+
 /* KeithMcMillen Stringport */
 {
 	USB_DEVICE(0x1f38, 0x0001),
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 5ca80e7..7016ad8 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -538,6 +538,7 @@
 		[QUIRK_MIDI_CME] = create_any_midi_quirk,
 		[QUIRK_MIDI_AKAI] = create_any_midi_quirk,
 		[QUIRK_MIDI_FTDI] = create_any_midi_quirk,
+		[QUIRK_MIDI_CH345] = create_any_midi_quirk,
 		[QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk,
 		[QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk,
 		[QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk,
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 15a1271..b665d85 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -95,6 +95,7 @@
 	QUIRK_MIDI_AKAI,
 	QUIRK_MIDI_US122L,
 	QUIRK_MIDI_FTDI,
+	QUIRK_MIDI_CH345,
 	QUIRK_AUDIO_STANDARD_INTERFACE,
 	QUIRK_AUDIO_FIXED_ENDPOINT,
 	QUIRK_AUDIO_EDIROL_UAXX,
diff --git a/tools/Makefile b/tools/Makefile
index d6f307d..7dc820a 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -32,6 +32,10 @@
 	@echo '  from the kernel command line to build and install one of'
 	@echo '  the tools above'
 	@echo ''
+	@echo '  $$ make tools/all'
+	@echo ''
+	@echo '  builds all tools.'
+	@echo ''
 	@echo '  $$ make tools/install'
 	@echo ''
 	@echo '  installs all tools.'
@@ -77,6 +81,11 @@
 freefall: FORCE
 	$(call descend,laptop/$@)
 
+all: acpi cgroup cpupower hv firewire lguest \
+		perf selftests turbostat usb \
+		virtio vm net x86_energy_perf_policy \
+		tmon freefall
+
 acpi_install:
 	$(call descend,power/$(@:_install=),install)
 
@@ -101,7 +110,7 @@
 install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install vm_install net_install x86_energy_perf_policy_install \
-		tmon freefall_install
+		tmon_install freefall_install
 
 acpi_clean:
 	$(call descend,power/acpi,clean)
diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea..ddf8880 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -4,6 +4,9 @@
 LEX = flex
 YACC = bison
 
+CFLAGS += -Wall -O2
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
 %.yacc.c: %.y
 	$(YACC) -o $@ -d $<
 
@@ -12,15 +15,13 @@
 
 all : bpf_jit_disasm bpf_dbg bpf_asm
 
-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
 bpf_jit_disasm : bpf_jit_disasm.o
 
-bpf_dbg : CFLAGS = -Wall -O2
 bpf_dbg : LDLIBS = -lreadline
 bpf_dbg : bpf_dbg.o
 
-bpf_asm : CFLAGS = -Wall -O2 -I.
 bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0a945d2..99d127f 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -675,6 +675,7 @@
 			.fork		= perf_event__repipe,
 			.exit		= perf_event__repipe,
 			.lost		= perf_event__repipe,
+			.lost_samples	= perf_event__repipe,
 			.aux		= perf_event__repipe,
 			.itrace_start	= perf_event__repipe,
 			.context_switch	= perf_event__repipe,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2853ad2..f256fac 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -44,7 +44,7 @@
 struct report {
 	struct perf_tool	tool;
 	struct perf_session	*session;
-	bool			force, use_tui, use_gtk, use_stdio;
+	bool			use_tui, use_gtk, use_stdio;
 	bool			hide_unresolved;
 	bool			dont_use_callchains;
 	bool			show_full_info;
@@ -678,7 +678,7 @@
 		   "file", "vmlinux pathname"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
 		   "file", "kallsyms pathname"),
-	OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"),
+	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
 		    "load module symbols - WARNING: use only with -k and LIVE kernel"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
@@ -832,7 +832,7 @@
 	}
 
 	file.path  = input_name;
-	file.force = report.force;
+	file.force = symbol_conf.force;
 
 repeat:
 	session = perf_session__new(&file, false, &report.tool);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e5afb89..fa9eb92 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1430,7 +1430,6 @@
 
 struct popup_action {
 	struct thread 		*thread;
-	struct dso		*dso;
 	struct map_symbol 	ms;
 	int			socket;
 
@@ -1565,7 +1564,6 @@
 		return 0;
 
 	act->ms.map = map;
-	act->dso = map->dso;
 	act->fn = do_zoom_dso;
 	return 1;
 }
@@ -1827,7 +1825,6 @@
 
 	while (1) {
 		struct thread *thread = NULL;
-		struct dso *dso = NULL;
 		struct map *map = NULL;
 		int choice = 0;
 		int socked_id = -1;
@@ -1839,8 +1836,6 @@
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);
 			map = browser->selection->map;
-			if (map)
-				dso = map->dso;
 			socked_id = browser->he_selection->socket;
 		}
 		switch (key) {
@@ -1874,7 +1869,7 @@
 			hist_browser__dump(browser);
 			continue;
 		case 'd':
-			actions->dso = dso;
+			actions->ms.map = map;
 			do_zoom_dso(browser, actions);
 			continue;
 		case 'V':
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index d909459..217b5a6 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -76,6 +76,7 @@
 	.exit	= perf_event__exit_del_thread,
 	.attr		 = perf_event__process_attr,
 	.build_id	 = perf_event__process_build_id,
+	.ordered_events	 = true,
 };
 
 int build_id__sprintf(const u8 *build_id, int len, char *bf)
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 7c0c083..425df5c 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -933,6 +933,7 @@
 		/* Add new node and rebalance tree */
 		rb_link_node(&dso->rb_node, parent, p);
 		rb_insert_color(&dso->rb_node, root);
+		dso->root = root;
 	}
 	return NULL;
 }
@@ -945,15 +946,30 @@
 
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
 {
+	struct rb_root *root = dso->root;
+
 	if (name == NULL)
 		return;
 
 	if (dso->long_name_allocated)
 		free((char *)dso->long_name);
 
+	if (root) {
+		rb_erase(&dso->rb_node, root);
+		/*
+		 * __dso__findlink_by_longname() isn't guaranteed to add it
+		 * back, so a clean removal is required here.
+		 */
+		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
+	}
+
 	dso->long_name		 = name;
 	dso->long_name_len	 = strlen(name);
 	dso->long_name_allocated = name_allocated;
+
+	if (root)
+		__dso__findlink_by_longname(root, dso, NULL);
 }
 
 void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
@@ -1046,6 +1062,7 @@
 		dso->kernel = DSO_TYPE_USER;
 		dso->needs_swap = DSO_SWAP__UNSET;
 		RB_CLEAR_NODE(&dso->rb_node);
+		dso->root = NULL;
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 		pthread_mutex_init(&dso->lock, NULL);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index fc8db9c..45ec4d0 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -135,6 +135,7 @@
 	pthread_mutex_t	 lock;
 	struct list_head node;
 	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
+	struct rb_root	 *root;		/* root of rbtree that rb_node is in */
 	struct rb_root	 symbols[MAP__NR_TYPES];
 	struct rb_root	 symbol_names[MAP__NR_TYPES];
 	struct {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5ef90be..8b303ff 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -91,6 +91,7 @@
 
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		RB_CLEAR_NODE(&pos->rb_node);
+		pos->root = NULL;
 		list_del_init(&pos->node);
 		dso__put(pos);
 	}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index bd8f03d..05012bb 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1183,7 +1183,7 @@
 			container_of(pf, struct trace_event_finder, pf);
 	struct perf_probe_point *pp = &pf->pev->point;
 	struct probe_trace_event *tev;
-	struct perf_probe_arg *args;
+	struct perf_probe_arg *args = NULL;
 	int ret, i;
 
 	/* Check number of tevs */
@@ -1198,19 +1198,23 @@
 	ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
 				     pp->retprobe, pp->function, &tev->point);
 	if (ret < 0)
-		return ret;
+		goto end;
 
 	tev->point.realname = strdup(dwarf_diename(sc_die));
-	if (!tev->point.realname)
-		return -ENOMEM;
+	if (!tev->point.realname) {
+		ret = -ENOMEM;
+		goto end;
+	}
 
 	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
 		 tev->point.offset);
 
 	/* Expand special probe argument if exist */
 	args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
-	if (args == NULL)
-		return -ENOMEM;
+	if (args == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
 
 	ret = expand_probe_args(sc_die, pf, args);
 	if (ret < 0)
@@ -1234,6 +1238,10 @@
 	}
 
 end:
+	if (ret) {
+		clear_probe_trace_event(tev);
+		tf->ntevs--;
+	}
 	free(args);
 	return ret;
 }
@@ -1246,7 +1254,7 @@
 	struct trace_event_finder tf = {
 			.pf = {.pev = pev, .callback = add_probe_trace_event},
 			.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
-	int ret;
+	int ret, i;
 
 	/* Allocate result tevs array */
 	*tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
@@ -1258,6 +1266,8 @@
 
 	ret = debuginfo__find_probes(dbg, &tf.pf);
 	if (ret < 0) {
+		for (i = 0; i < tf.ntevs; i++)
+			clear_probe_trace_event(&tf.tevs[i]);
 		zfree(tevs);
 		return ret;
 	}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b4cc766..cd08027 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -654,19 +654,24 @@
 	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
 	struct symbol *pos;
-	int count = 0, moved = 0;
+	int count = 0;
+	struct rb_root old_root = dso->symbols[map->type];
 	struct rb_root *root = &dso->symbols[map->type];
 	struct rb_node *next = rb_first(root);
 
 	if (!kmaps)
 		return -1;
 
+	*root = RB_ROOT;
+
 	while (next) {
 		char *module;
 
 		pos = rb_entry(next, struct symbol, rb_node);
 		next = rb_next(&pos->rb_node);
 
+		rb_erase_init(&pos->rb_node, &old_root);
+
 		module = strchr(pos->name, '\t');
 		if (module)
 			*module = '\0';
@@ -674,28 +679,21 @@
 		curr_map = map_groups__find(kmaps, map->type, pos->start);
 
 		if (!curr_map || (filter && filter(curr_map, pos))) {
-			rb_erase_init(&pos->rb_node, root);
 			symbol__delete(pos);
-		} else {
-			pos->start -= curr_map->start - curr_map->pgoff;
-			if (pos->end)
-				pos->end -= curr_map->start - curr_map->pgoff;
-			if (curr_map->dso != map->dso) {
-				rb_erase_init(&pos->rb_node, root);
-				symbols__insert(
-					&curr_map->dso->symbols[curr_map->type],
-					pos);
-				++moved;
-			} else {
-				++count;
-			}
+			continue;
 		}
+
+		pos->start -= curr_map->start - curr_map->pgoff;
+		if (pos->end)
+			pos->end -= curr_map->start - curr_map->pgoff;
+		symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+		++count;
 	}
 
 	/* Symbols have been adjusted */
 	dso->adjust_symbols = 1;
 
-	return count + moved;
+	return count;
 }
 
 /*
@@ -1438,9 +1436,9 @@
 		if (lstat(dso->name, &st) < 0)
 			goto out;
 
-		if (st.st_uid && (st.st_uid != geteuid())) {
+		if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) {
 			pr_warning("File %s not owned by current user or root, "
-				"ignoring it.\n", dso->name);
+				   "ignoring it (use -f to override).\n", dso->name);
 			goto out;
 		}
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 40073c6..dcd786e 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -84,6 +84,7 @@
 	unsigned short	priv_size;
 	unsigned short	nr_events;
 	bool		try_vmlinux_path,
+			force,
 			ignore_vmlinux,
 			ignore_vmlinux_buildid,
 			show_kernel_path,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d8e4b20..0dac7e0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1173,9 +1173,9 @@
 	unsigned long long msr;
 	unsigned int ratio;
 
-	get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 
-	fprintf(stderr, "cpu%d: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
+	fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
 
 	ratio = (msr >> 40) & 0xFF;
 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
@@ -1807,7 +1807,7 @@
  *
  * MSR_SMI_COUNT                   0x00000034
  *
- * MSR_NHM_PLATFORM_INFO           0x000000ce
+ * MSR_PLATFORM_INFO               0x000000ce
  * MSR_NHM_SNB_PKG_CST_CFG_CTL     0x000000e2
  *
  * MSR_PKG_C3_RESIDENCY            0x000003f8
@@ -1876,7 +1876,7 @@
 	get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
-	get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
+	get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
 	base_ratio = (msr >> 8) & 0xFF;
 
 	base_hz = base_ratio * bclk * 1000000;
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 40ab447..51cf825 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -420,8 +420,7 @@
 
 static int nfit_test0_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_table_nfit)
-			+ sizeof(struct acpi_nfit_system_address) * NUM_SPA
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
 			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
 			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
 			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
@@ -471,8 +470,7 @@
 
 static int nfit_test1_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_table_nfit)
-		+ sizeof(struct acpi_nfit_system_address)
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address)
 		+ sizeof(struct acpi_nfit_memory_map)
 		+ sizeof(struct acpi_nfit_control_region);
 
@@ -488,39 +486,24 @@
 	return 0;
 }
 
-static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size)
-{
-	memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4);
-	nfit->header.length = size;
-	nfit->header.revision = 1;
-	memcpy(nfit->header.oem_id, "LIBND", 6);
-	memcpy(nfit->header.oem_table_id, "TEST", 5);
-	nfit->header.oem_revision = 1;
-	memcpy(nfit->header.asl_compiler_id, "TST", 4);
-	nfit->header.asl_compiler_revision = 1;
-}
-
 static void nfit_test0_setup(struct nfit_test *t)
 {
 	struct nvdimm_bus_descriptor *nd_desc;
 	struct acpi_nfit_desc *acpi_desc;
 	struct acpi_nfit_memory_map *memdev;
 	void *nfit_buf = t->nfit_buf;
-	size_t size = t->nfit_size;
 	struct acpi_nfit_system_address *spa;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
 	unsigned int offset;
 
-	nfit_test_init_header(nfit_buf, size);
-
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit);
+	spa = nfit_buf;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -533,7 +516,7 @@
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa);
+	spa = nfit_buf + sizeof(*spa);
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -542,7 +525,7 @@
 	spa->length = SPA1_SIZE;
 
 	/* spa2 (dcr0) dimm0 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2;
+	spa = nfit_buf + sizeof(*spa) * 2;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -551,7 +534,7 @@
 	spa->length = DCR_SIZE;
 
 	/* spa3 (dcr1) dimm1 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3;
+	spa = nfit_buf + sizeof(*spa) * 3;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -560,7 +543,7 @@
 	spa->length = DCR_SIZE;
 
 	/* spa4 (dcr2) dimm2 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4;
+	spa = nfit_buf + sizeof(*spa) * 4;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -569,7 +552,7 @@
 	spa->length = DCR_SIZE;
 
 	/* spa5 (dcr3) dimm3 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5;
+	spa = nfit_buf + sizeof(*spa) * 5;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -578,7 +561,7 @@
 	spa->length = DCR_SIZE;
 
 	/* spa6 (bdw for dcr0) dimm0 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6;
+	spa = nfit_buf + sizeof(*spa) * 6;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -587,7 +570,7 @@
 	spa->length = DIMM_SIZE;
 
 	/* spa7 (bdw for dcr1) dimm1 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7;
+	spa = nfit_buf + sizeof(*spa) * 7;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -596,7 +579,7 @@
 	spa->length = DIMM_SIZE;
 
 	/* spa8 (bdw for dcr2) dimm2 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8;
+	spa = nfit_buf + sizeof(*spa) * 8;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -605,7 +588,7 @@
 	spa->length = DIMM_SIZE;
 
 	/* spa9 (bdw for dcr3) dimm3 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9;
+	spa = nfit_buf + sizeof(*spa) * 9;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -613,7 +596,7 @@
 	spa->address = t->dimm_dma[3];
 	spa->length = DIMM_SIZE;
 
-	offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10;
+	offset = sizeof(*spa) * 10;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1100,15 +1083,13 @@
 
 static void nfit_test1_setup(struct nfit_test *t)
 {
-	size_t size = t->nfit_size, offset;
+	size_t offset;
 	void *nfit_buf = t->nfit_buf;
 	struct acpi_nfit_memory_map *memdev;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_system_address *spa;
 
-	nfit_test_init_header(nfit_buf, size);
-
-	offset = sizeof(struct acpi_table_nfit);
+	offset = 0;
 	/* spa0 (flat range with no bdw aliasing) */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
index 3224a04..0558bb9 100644
--- a/tools/testing/selftests/futex/README
+++ b/tools/testing/selftests/futex/README
@@ -27,7 +27,7 @@
 o Where possible, any helper functions or other package-wide code shall be
   implemented in header files, avoiding the need to compile intermediate object
   files.
-o External dependendencies shall remain as minimal as possible. Currently gcc
+o External dependencies shall remain as minimal as possible. Currently gcc
   and glibc are the only dependencies.
 o Tests return 0 for success and < 0 for failure.
 
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e38cc54..882fe83 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -492,6 +492,9 @@
 	pid_t parent = getppid();
 	int fd;
 	void *map1, *map2;
+	int page_size = sysconf(_SC_PAGESIZE);
+
+	ASSERT_LT(0, page_size);
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
@@ -504,16 +507,16 @@
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	map1 = (void *)syscall(sysno,
-		NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, PAGE_SIZE);
+		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
 	EXPECT_NE(MAP_FAILED, map1);
 	/* mmap2() should never return. */
 	map2 = (void *)syscall(sysno,
-		 NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
 	EXPECT_EQ(MAP_FAILED, map2);
 
 	/* The test failed, so clean up the resources. */
-	munmap(map1, PAGE_SIZE);
-	munmap(map2, PAGE_SIZE);
+	munmap(map1, page_size);
+	munmap(map2, page_size);
 	close(fd);
 }
 
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index bcf5ec7..5a60162 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -128,6 +128,7 @@
 	[KPF_THP]		= "t:thp",
 	[KPF_BALLOON]		= "o:balloon",
 	[KPF_ZERO_PAGE]		= "z:zero_page",
+	[KPF_IDLE]              = "i:idle_page",
 
 	[KPF_RESERVED]		= "r:reserved",
 	[KPF_MLOCKED]		= "m:mlocked",
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 21a0ab2..69bca18 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -221,17 +221,23 @@
 	kvm_timer_update_state(vcpu);
 
 	/*
-	 * If we enter the guest with the virtual input level to the VGIC
-	 * asserted, then we have already told the VGIC what we need to, and
-	 * we don't need to exit from the guest until the guest deactivates
-	 * the already injected interrupt, so therefore we should set the
-	 * hardware active state to prevent unnecessary exits from the guest.
-	 *
-	 * Conversely, if the virtual input level is deasserted, then always
-	 * clear the hardware active state to ensure that hardware interrupts
-	 * from the timer triggers a guest exit.
-	 */
-	if (timer->irq.level)
+	* If we enter the guest with the virtual input level to the VGIC
+	* asserted, then we have already told the VGIC what we need to, and
+	* we don't need to exit from the guest until the guest deactivates
+	* the already injected interrupt, so therefore we should set the
+	* hardware active state to prevent unnecessary exits from the guest.
+	*
+	* Also, if we enter the guest with the virtual timer interrupt active,
+	* then it must be active on the physical distributor, because we set
+	* the HW bit and the guest must be able to deactivate the virtual and
+	* physical interrupt at the same time.
+	*
+	* Conversely, if the virtual input level is deasserted and the virtual
+	* interrupt is not active, then always clear the hardware active state
+	* to ensure that hardware interrupts from the timer triggers a guest
+	* exit.
+	*/
+	if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
 		phys_active = true;
 	else
 		phys_active = false;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5335383..65461f8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1096,6 +1096,27 @@
 	vgic_set_lr(vcpu, lr_nr, vlr);
 }
 
+static bool dist_active_irq(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+{
+	int i;
+
+	for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) {
+		struct vgic_lr vlr = vgic_get_lr(vcpu, i);
+
+		if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE)
+			return true;
+	}
+
+	return dist_active_irq(vcpu);
+}
+
 /*
  * An interrupt may have been disabled after being made pending on the
  * CPU interface (the classic case is a timer running while we're
@@ -1248,7 +1269,7 @@
 	 * may have been serviced from another vcpu. In all cases,
 	 * move along.
 	 */
-	if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
+	if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu))
 		goto epilog;
 
 	/* SGIs */
@@ -1396,25 +1417,13 @@
 static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	struct irq_phys_map *map;
-	bool phys_active;
 	bool level_pending;
-	int ret;
 
 	if (!(vlr.state & LR_HW))
 		return false;
 
-	map = vgic_irq_map_search(vcpu, vlr.irq);
-	BUG_ON(!map);
-
-	ret = irq_get_irqchip_state(map->irq,
-				    IRQCHIP_STATE_ACTIVE,
-				    &phys_active);
-
-	WARN_ON(ret);
-
-	if (phys_active)
-		return 0;
+	if (vlr.state & LR_STATE_ACTIVE)
+		return false;
 
 	spin_lock(&dist->lock);
 	level_pending = process_queued_irq(vcpu, lr, vlr);
@@ -1479,17 +1488,6 @@
 	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
-{
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
-	return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
-}
-
-
 void vgic_kick_vcpus(struct kvm *kvm)
 {
 	struct kvm_vcpu *vcpu;