Merge tag 'drm/tegra/for-3.19-rc1' of git://people.freedesktop.org/~tagr/linux into drm-next

drm/tegra: Changes for v3.19-rc1

The highlights in this pull request are:

  * IOMMU support: The Tegra DRM driver can now deal with discontiguous
    buffers if an IOMMU exists in the system. That means it can allocate
    using drm_gem_get_pages() and will map them into IOVA space via the
    IOMMU API. Similarly, non-contiguous PRIME buffers can be imported
    from a different driver, which allows better integration with gk20a
    (nouveau) and less hacks.

  * Universal planes: This is precursory work for atomic modesetting and
    will allow hardware cursor support to be implemented on pre-Tegra114
    where RGB cursors were not supported.

  * DSI ganged-mode support: The DSI controller can now gang up with a
    second DSI controller to drive high resolution DSI panels.

Besides those bigger changes there is a slew of fixes, cleanups, plugged
memory leaks and so on.

* tag 'drm/tegra/for-3.19-rc1' of git://people.freedesktop.org/~tagr/linux: (44 commits)
  drm/tegra: gem: Check before freeing CMA memory
  drm/tegra: fb: Add error codes to error messages
  drm/tegra: fb: Properly release GEM objects on failure
  drm/tegra: Detach panel when a connector is removed
  drm/tegra: Plug memory leak
  drm/tegra: gem: Use more consistent data types
  drm/tegra: fb: Do not destroy framebuffer
  drm/tegra: gem: dumb: pitch and size are outputs
  drm/tegra: Enable the hotplug interrupt only when necessary
  drm/tegra: dc: Universal plane support
  drm/tegra: dc: Registers are 32 bits wide
  drm/tegra: dc: Factor out DC, window and cursor commit
  drm/tegra: Add IOMMU support
  drm/tegra: Fix error handling cleanup
  drm/tegra: gem: Use dma_mmap_writecombine()
  drm/tegra: gem: Remove redundant drm_gem_free_mmap_offset()
  drm/tegra: gem: Cleanup tegra_bo_create_with_handle()
  drm/tegra: gem: Extract tegra_bo_alloc_object()
  drm/tegra: dsi: Set up PHY_TIMING & BTA_TIMING registers earlier
  drm/tegra: dsi: Replace 1000000 by USEC_PER_SEC
  ...
diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
index b48f4ef..4c32ef0 100644
--- a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
@@ -191,6 +191,8 @@
   - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
   - nvidia,edid: supplies a binary EDID blob
   - nvidia,panel: phandle of a display panel
+  - nvidia,ganged-mode: contains a phandle to a second DSI controller to gang
+    up with in order to support up to 8 data lanes
 
 - sor: serial output resource
 
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 354ddb2..74d9d62 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -1,6 +1,7 @@
 config DRM_TEGRA
 	tristate "NVIDIA Tegra DRM"
 	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+	depends on COMMON_CLK
 	depends on DRM
 	depends on RESET_CONTROLLER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index cdfa126..b957908 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -9,8 +9,11 @@
 
 #include <linux/clk.h>
 #include <linux/debugfs.h>
+#include <linux/iommu.h>
 #include <linux/reset.h>
 
+#include <soc/tegra/pmc.h>
+
 #include "dc.h"
 #include "drm.h"
 #include "gem.h"
@@ -22,6 +25,7 @@
 	bool supports_cursor;
 	bool supports_block_linear;
 	unsigned int pitch_align;
+	bool has_powergate;
 };
 
 struct tegra_plane {
@@ -34,6 +38,26 @@
 	return container_of(plane, struct tegra_plane, base);
 }
 
+static void tegra_dc_window_commit(struct tegra_dc *dc, unsigned int index)
+{
+	u32 value = WIN_A_ACT_REQ << index;
+
+	tegra_dc_writel(dc, value << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+}
+
+static void tegra_dc_cursor_commit(struct tegra_dc *dc)
+{
+	tegra_dc_writel(dc, CURSOR_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, CURSOR_ACT_REQ, DC_CMD_STATE_CONTROL);
+}
+
+static void tegra_dc_commit(struct tegra_dc *dc)
+{
+	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
+	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
+}
+
 static unsigned int tegra_dc_format(uint32_t format, uint32_t *swap)
 {
 	/* assume no swapping of fetched data */
@@ -305,17 +329,260 @@
 		break;
 	}
 
-	tegra_dc_writel(dc, WIN_A_UPDATE << index, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, WIN_A_ACT_REQ << index, DC_CMD_STATE_CONTROL);
+	tegra_dc_window_commit(dc, index);
 
 	return 0;
 }
 
-static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
-			      struct drm_framebuffer *fb, int crtc_x,
-			      int crtc_y, unsigned int crtc_w,
-			      unsigned int crtc_h, uint32_t src_x,
-			      uint32_t src_y, uint32_t src_w, uint32_t src_h)
+static int tegra_window_plane_disable(struct drm_plane *plane)
+{
+	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	u32 value;
+
+	if (!plane->crtc)
+		return 0;
+
+	value = WINDOW_A_SELECT << p->index;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+	value &= ~WIN_ENABLE;
+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+	tegra_dc_window_commit(dc, p->index);
+
+	return 0;
+}
+
+static void tegra_plane_destroy(struct drm_plane *plane)
+{
+	struct tegra_plane *p = to_tegra_plane(plane);
+
+	drm_plane_cleanup(plane);
+	kfree(p);
+}
+
+static const u32 tegra_primary_plane_formats[] = {
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGB565,
+};
+
+static int tegra_primary_plane_update(struct drm_plane *plane,
+				      struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb, int crtc_x,
+				      int crtc_y, unsigned int crtc_w,
+				      unsigned int crtc_h, uint32_t src_x,
+				      uint32_t src_y, uint32_t src_w,
+				      uint32_t src_h)
+{
+	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_dc_window window;
+	int err;
+
+	memset(&window, 0, sizeof(window));
+	window.src.x = src_x >> 16;
+	window.src.y = src_y >> 16;
+	window.src.w = src_w >> 16;
+	window.src.h = src_h >> 16;
+	window.dst.x = crtc_x;
+	window.dst.y = crtc_y;
+	window.dst.w = crtc_w;
+	window.dst.h = crtc_h;
+	window.format = tegra_dc_format(fb->pixel_format, &window.swap);
+	window.bits_per_pixel = fb->bits_per_pixel;
+	window.bottom_up = tegra_fb_is_bottom_up(fb);
+
+	err = tegra_fb_get_tiling(fb, &window.tiling);
+	if (err < 0)
+		return err;
+
+	window.base[0] = bo->paddr + fb->offsets[0];
+	window.stride[0] = fb->pitches[0];
+
+	err = tegra_dc_setup_window(dc, p->index, &window);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void tegra_primary_plane_destroy(struct drm_plane *plane)
+{
+	tegra_window_plane_disable(plane);
+	tegra_plane_destroy(plane);
+}
+
+static const struct drm_plane_funcs tegra_primary_plane_funcs = {
+	.update_plane = tegra_primary_plane_update,
+	.disable_plane = tegra_window_plane_disable,
+	.destroy = tegra_primary_plane_destroy,
+};
+
+static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
+						       struct tegra_dc *dc)
+{
+	struct tegra_plane *plane;
+	unsigned int num_formats;
+	const u32 *formats;
+	int err;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	num_formats = ARRAY_SIZE(tegra_primary_plane_formats);
+	formats = tegra_primary_plane_formats;
+
+	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
+				       &tegra_primary_plane_funcs, formats,
+				       num_formats, DRM_PLANE_TYPE_PRIMARY);
+	if (err < 0) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
+	return &plane->base;
+}
+
+static const u32 tegra_cursor_plane_formats[] = {
+	DRM_FORMAT_RGBA8888,
+};
+
+static int tegra_cursor_plane_update(struct drm_plane *plane,
+				     struct drm_crtc *crtc,
+				     struct drm_framebuffer *fb, int crtc_x,
+				     int crtc_y, unsigned int crtc_w,
+				     unsigned int crtc_h, uint32_t src_x,
+				     uint32_t src_y, uint32_t src_w,
+				     uint32_t src_h)
+{
+	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	u32 value = CURSOR_CLIP_DISPLAY;
+
+	/* scaling not supported for cursor */
+	if ((src_w >> 16 != crtc_w) || (src_h >> 16 != crtc_h))
+		return -EINVAL;
+
+	/* only square cursors supported */
+	if (src_w != src_h)
+		return -EINVAL;
+
+	switch (crtc_w) {
+	case 32:
+		value |= CURSOR_SIZE_32x32;
+		break;
+
+	case 64:
+		value |= CURSOR_SIZE_64x64;
+		break;
+
+	case 128:
+		value |= CURSOR_SIZE_128x128;
+		break;
+
+	case 256:
+		value |= CURSOR_SIZE_256x256;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	value |= (bo->paddr >> 10) & 0x3fffff;
+	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	value = (bo->paddr >> 32) & 0x3;
+	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI);
+#endif
+
+	/* enable cursor and set blend mode */
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value |= CURSOR_ENABLE;
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+
+	value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL);
+	value &= ~CURSOR_DST_BLEND_MASK;
+	value &= ~CURSOR_SRC_BLEND_MASK;
+	value |= CURSOR_MODE_NORMAL;
+	value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC;
+	value |= CURSOR_SRC_BLEND_K1_TIMES_SRC;
+	value |= CURSOR_ALPHA;
+	tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL);
+
+	/* position the cursor */
+	value = (crtc_y & 0x3fff) << 16 | (crtc_x & 0x3fff);
+	tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
+
+	/* apply changes */
+	tegra_dc_cursor_commit(dc);
+	tegra_dc_commit(dc);
+
+	return 0;
+}
+
+static int tegra_cursor_plane_disable(struct drm_plane *plane)
+{
+	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
+	u32 value;
+
+	if (!plane->crtc)
+		return 0;
+
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value &= ~CURSOR_ENABLE;
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+
+	tegra_dc_cursor_commit(dc);
+	tegra_dc_commit(dc);
+
+	return 0;
+}
+
+static const struct drm_plane_funcs tegra_cursor_plane_funcs = {
+	.update_plane = tegra_cursor_plane_update,
+	.disable_plane = tegra_cursor_plane_disable,
+	.destroy = tegra_plane_destroy,
+};
+
+static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
+						      struct tegra_dc *dc)
+{
+	struct tegra_plane *plane;
+	unsigned int num_formats;
+	const u32 *formats;
+	int err;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
+	formats = tegra_cursor_plane_formats;
+
+	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
+				       &tegra_cursor_plane_funcs, formats,
+				       num_formats, DRM_PLANE_TYPE_CURSOR);
+	if (err < 0) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
+	return &plane->base;
+}
+
+static int tegra_overlay_plane_update(struct drm_plane *plane,
+				      struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb, int crtc_x,
+				      int crtc_y, unsigned int crtc_w,
+				      unsigned int crtc_h, uint32_t src_x,
+				      uint32_t src_y, uint32_t src_w,
+				      uint32_t src_h)
 {
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
@@ -361,44 +628,19 @@
 	return tegra_dc_setup_window(dc, p->index, &window);
 }
 
-static int tegra_plane_disable(struct drm_plane *plane)
+static void tegra_overlay_plane_destroy(struct drm_plane *plane)
 {
-	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
-	struct tegra_plane *p = to_tegra_plane(plane);
-	unsigned long value;
-
-	if (!plane->crtc)
-		return 0;
-
-	value = WINDOW_A_SELECT << p->index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
-	value &= ~WIN_ENABLE;
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	tegra_dc_writel(dc, WIN_A_UPDATE << p->index, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, WIN_A_ACT_REQ << p->index, DC_CMD_STATE_CONTROL);
-
-	return 0;
+	tegra_window_plane_disable(plane);
+	tegra_plane_destroy(plane);
 }
 
-static void tegra_plane_destroy(struct drm_plane *plane)
-{
-	struct tegra_plane *p = to_tegra_plane(plane);
-
-	tegra_plane_disable(plane);
-	drm_plane_cleanup(plane);
-	kfree(p);
-}
-
-static const struct drm_plane_funcs tegra_plane_funcs = {
-	.update_plane = tegra_plane_update,
-	.disable_plane = tegra_plane_disable,
-	.destroy = tegra_plane_destroy,
+static const struct drm_plane_funcs tegra_overlay_plane_funcs = {
+	.update_plane = tegra_overlay_plane_update,
+	.disable_plane = tegra_window_plane_disable,
+	.destroy = tegra_overlay_plane_destroy,
 };
 
-static const uint32_t plane_formats[] = {
+static const uint32_t tegra_overlay_plane_formats[] = {
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_RGB565,
@@ -408,27 +650,44 @@
 	DRM_FORMAT_YUV422,
 };
 
+static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
+						       struct tegra_dc *dc,
+						       unsigned int index)
+{
+	struct tegra_plane *plane;
+	unsigned int num_formats;
+	const u32 *formats;
+	int err;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	plane->index = index;
+
+	num_formats = ARRAY_SIZE(tegra_overlay_plane_formats);
+	formats = tegra_overlay_plane_formats;
+
+	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
+				       &tegra_overlay_plane_funcs, formats,
+				       num_formats, DRM_PLANE_TYPE_OVERLAY);
+	if (err < 0) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
+	return &plane->base;
+}
+
 static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
 {
+	struct drm_plane *plane;
 	unsigned int i;
-	int err = 0;
 
 	for (i = 0; i < 2; i++) {
-		struct tegra_plane *plane;
-
-		plane = kzalloc(sizeof(*plane), GFP_KERNEL);
-		if (!plane)
-			return -ENOMEM;
-
-		plane->index = 1 + i;
-
-		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
-				     &tegra_plane_funcs, plane_formats,
-				     ARRAY_SIZE(plane_formats), false);
-		if (err < 0) {
-			kfree(plane);
-			return err;
-		}
+		plane = tegra_dc_overlay_plane_create(drm, dc, 1 + i);
+		if (IS_ERR(plane))
+			return PTR_ERR(plane);
 	}
 
 	return 0;
@@ -515,10 +774,8 @@
 	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
 	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
 
-	value = GENERAL_UPDATE | WIN_A_UPDATE;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
 	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
+	tegra_dc_writel(dc, value << 8, DC_CMD_STATE_CONTROL);
 	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
 
 	return 0;
@@ -550,109 +807,6 @@
 	spin_unlock_irqrestore(&dc->lock, flags);
 }
 
-static int tegra_dc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file,
-				uint32_t handle, uint32_t width,
-				uint32_t height, int32_t hot_x, int32_t hot_y)
-{
-	unsigned long value = CURSOR_CLIP_DISPLAY;
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo = NULL;
-
-	if (!dc->soc->supports_cursor)
-		return -ENXIO;
-
-	if (width != height)
-		return -EINVAL;
-
-	switch (width) {
-	case 32:
-		value |= CURSOR_SIZE_32x32;
-		break;
-
-	case 64:
-		value |= CURSOR_SIZE_64x64;
-		break;
-
-	case 128:
-		value |= CURSOR_SIZE_128x128;
-
-	case 256:
-		value |= CURSOR_SIZE_256x256;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	if (handle) {
-		gem = drm_gem_object_lookup(crtc->dev, file, handle);
-		if (!gem)
-			return -ENOENT;
-
-		bo = to_tegra_bo(gem);
-	}
-
-	if (bo) {
-		unsigned long addr = (bo->paddr & 0xfffffc00) >> 10;
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		unsigned long high = (bo->paddr & 0xfffffffc) >> 32;
-#endif
-
-		tegra_dc_writel(dc, value | addr, DC_DISP_CURSOR_START_ADDR);
-
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-		tegra_dc_writel(dc, high, DC_DISP_CURSOR_START_ADDR_HI);
-#endif
-
-		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-		value |= CURSOR_ENABLE;
-		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-
-		value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL);
-		value &= ~CURSOR_DST_BLEND_MASK;
-		value &= ~CURSOR_SRC_BLEND_MASK;
-		value |= CURSOR_MODE_NORMAL;
-		value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC;
-		value |= CURSOR_SRC_BLEND_K1_TIMES_SRC;
-		value |= CURSOR_ALPHA;
-		tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL);
-	} else {
-		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-		value &= ~CURSOR_ENABLE;
-		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-	}
-
-	tegra_dc_writel(dc, CURSOR_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, CURSOR_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-static int tegra_dc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned long value;
-
-	if (!dc->soc->supports_cursor)
-		return -ENXIO;
-
-	value = ((y & 0x3fff) << 16) | (x & 0x3fff);
-	tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
-
-	tegra_dc_writel(dc, CURSOR_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, CURSOR_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	/* XXX: only required on generations earlier than Tegra124? */
-	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
 static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
 {
 	struct drm_device *drm = dc->base.dev;
@@ -729,8 +883,6 @@
 }
 
 static const struct drm_crtc_funcs tegra_crtc_funcs = {
-	.cursor_set2 = tegra_dc_cursor_set2,
-	.cursor_move = tegra_dc_cursor_move,
 	.page_flip = tegra_dc_page_flip,
 	.set_config = drm_crtc_helper_set_config,
 	.destroy = tegra_dc_destroy,
@@ -744,7 +896,7 @@
 
 	drm_for_each_legacy_plane(plane, &drm->mode_config.plane_list) {
 		if (plane->crtc == crtc) {
-			tegra_plane_disable(plane);
+			tegra_window_plane_disable(plane);
 			plane->crtc = NULL;
 
 			if (plane->fb) {
@@ -755,6 +907,7 @@
 	}
 
 	drm_vblank_off(drm, dc->pipe);
+	tegra_dc_commit(dc);
 }
 
 static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -937,15 +1090,9 @@
 static void tegra_crtc_commit(struct drm_crtc *crtc)
 {
 	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned long value;
-
-	value = GENERAL_UPDATE | WIN_A_UPDATE;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
 
 	drm_vblank_post_modeset(crtc->dev, dc->pipe);
+	tegra_dc_commit(dc);
 }
 
 static void tegra_crtc_load_lut(struct drm_crtc *crtc)
@@ -999,7 +1146,7 @@
 	struct tegra_dc *dc = node->info_ent->data;
 
 #define DUMP_REG(name)						\
-	seq_printf(s, "%-40s %#05x %08lx\n", #name, name,	\
+	seq_printf(s, "%-40s %#05x %08x\n", #name, name,	\
 		   tegra_dc_readl(dc, name))
 
 	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT);
@@ -1287,9 +1434,40 @@
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
+	struct drm_plane *primary = NULL;
+	struct drm_plane *cursor = NULL;
 	int err;
 
-	drm_crtc_init(drm, &dc->base, &tegra_crtc_funcs);
+	if (tegra->domain) {
+		err = iommu_attach_device(tegra->domain, dc->dev);
+		if (err < 0) {
+			dev_err(dc->dev, "failed to attach to domain: %d\n",
+				err);
+			return err;
+		}
+
+		dc->domain = tegra->domain;
+	}
+
+	primary = tegra_dc_primary_plane_create(drm, dc);
+	if (IS_ERR(primary)) {
+		err = PTR_ERR(primary);
+		goto cleanup;
+	}
+
+	if (dc->soc->supports_cursor) {
+		cursor = tegra_dc_cursor_plane_create(drm, dc);
+		if (IS_ERR(cursor)) {
+			err = PTR_ERR(cursor);
+			goto cleanup;
+		}
+	}
+
+	err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor,
+					&tegra_crtc_funcs);
+	if (err < 0)
+		goto cleanup;
+
 	drm_mode_crtc_set_gamma_size(&dc->base, 256);
 	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
 
@@ -1303,12 +1481,12 @@
 	err = tegra_dc_rgb_init(drm, dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
-		return err;
+		goto cleanup;
 	}
 
 	err = tegra_dc_add_planes(drm, dc);
 	if (err < 0)
-		return err;
+		goto cleanup;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
 		err = tegra_dc_debugfs_init(dc, drm->primary);
@@ -1321,10 +1499,24 @@
 	if (err < 0) {
 		dev_err(dc->dev, "failed to request IRQ#%u: %d\n", dc->irq,
 			err);
-		return err;
+		goto cleanup;
 	}
 
 	return 0;
+
+cleanup:
+	if (cursor)
+		drm_plane_cleanup(cursor);
+
+	if (primary)
+		drm_plane_cleanup(primary);
+
+	if (tegra->domain) {
+		iommu_detach_device(tegra->domain, dc->dev);
+		dc->domain = NULL;
+	}
+
+	return err;
 }
 
 static int tegra_dc_exit(struct host1x_client *client)
@@ -1346,6 +1538,11 @@
 		return err;
 	}
 
+	if (dc->domain) {
+		iommu_detach_device(dc->domain, dc->dev);
+		dc->domain = NULL;
+	}
+
 	return 0;
 }
 
@@ -1359,6 +1556,7 @@
 	.supports_cursor = false,
 	.supports_block_linear = false,
 	.pitch_align = 8,
+	.has_powergate = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -1366,6 +1564,7 @@
 	.supports_cursor = false,
 	.supports_block_linear = false,
 	.pitch_align = 8,
+	.has_powergate = false,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -1373,6 +1572,7 @@
 	.supports_cursor = false,
 	.supports_block_linear = false,
 	.pitch_align = 64,
+	.has_powergate = true,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -1380,6 +1580,7 @@
 	.supports_cursor = true,
 	.supports_block_linear = true,
 	.pitch_align = 64,
+	.has_powergate = true,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
@@ -1387,6 +1588,9 @@
 		.compatible = "nvidia,tegra124-dc",
 		.data = &tegra124_dc_soc_info,
 	}, {
+		.compatible = "nvidia,tegra114-dc",
+		.data = &tegra114_dc_soc_info,
+	}, {
 		.compatible = "nvidia,tegra30-dc",
 		.data = &tegra30_dc_soc_info,
 	}, {
@@ -1469,9 +1673,34 @@
 		return PTR_ERR(dc->rst);
 	}
 
-	err = clk_prepare_enable(dc->clk);
-	if (err < 0)
-		return err;
+	if (dc->soc->has_powergate) {
+		if (dc->pipe == 0)
+			dc->powergate = TEGRA_POWERGATE_DIS;
+		else
+			dc->powergate = TEGRA_POWERGATE_DISB;
+
+		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
+							dc->rst);
+		if (err < 0) {
+			dev_err(&pdev->dev, "failed to power partition: %d\n",
+				err);
+			return err;
+		}
+	} else {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0) {
+			dev_err(&pdev->dev, "failed to enable clock: %d\n",
+				err);
+			return err;
+		}
+
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(&pdev->dev, "failed to deassert reset: %d\n",
+				err);
+			return err;
+		}
+	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dc->regs = devm_ioremap_resource(&pdev->dev, regs);
@@ -1525,6 +1754,10 @@
 	}
 
 	reset_control_assert(dc->rst);
+
+	if (dc->soc->has_powergate)
+		tegra_powergate_power_off(dc->powergate);
+
 	clk_disable_unprepare(dc->clk);
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 59736bb..e549afe 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/host1x.h>
+#include <linux/iommu.h>
 
 #include "drm.h"
 #include "gem.h"
@@ -33,6 +34,17 @@
 	if (!tegra)
 		return -ENOMEM;
 
+	if (iommu_present(&platform_bus_type)) {
+		tegra->domain = iommu_domain_alloc(&platform_bus_type);
+		if (IS_ERR(tegra->domain)) {
+			err = PTR_ERR(tegra->domain);
+			goto free;
+		}
+
+		DRM_DEBUG("IOMMU context initialized\n");
+		drm_mm_init(&tegra->mm, 0, SZ_2G);
+	}
+
 	mutex_init(&tegra->clients_lock);
 	INIT_LIST_HEAD(&tegra->clients);
 	drm->dev_private = tegra;
@@ -42,13 +54,13 @@
 
 	err = tegra_drm_fb_prepare(drm);
 	if (err < 0)
-		return err;
+		goto config;
 
 	drm_kms_helper_poll_init(drm);
 
 	err = host1x_device_init(device);
 	if (err < 0)
-		return err;
+		goto fbdev;
 
 	/*
 	 * We don't use the drm_irq_install() helpers provided by the DRM
@@ -59,18 +71,37 @@
 
 	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
 	if (err < 0)
-		return err;
+		goto device;
 
 	err = tegra_drm_fb_init(drm);
 	if (err < 0)
-		return err;
+		goto vblank;
 
 	return 0;
+
+vblank:
+	drm_vblank_cleanup(drm);
+device:
+	host1x_device_exit(device);
+fbdev:
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_free(drm);
+config:
+	drm_mode_config_cleanup(drm);
+
+	if (tegra->domain) {
+		iommu_domain_free(tegra->domain);
+		drm_mm_takedown(&tegra->mm);
+	}
+free:
+	kfree(tegra);
+	return err;
 }
 
 static int tegra_drm_unload(struct drm_device *drm)
 {
 	struct host1x_device *device = to_host1x_device(drm->dev);
+	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
 	drm_kms_helper_poll_fini(drm);
@@ -82,6 +113,13 @@
 	if (err < 0)
 		return err;
 
+	if (tegra->domain) {
+		iommu_domain_free(tegra->domain);
+		drm_mm_takedown(&tegra->mm);
+	}
+
+	kfree(tegra);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index e89c70f..3a3b2e7 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -39,6 +39,9 @@
 struct tegra_drm {
 	struct drm_device *drm;
 
+	struct iommu_domain *domain;
+	struct drm_mm mm;
+
 	struct mutex clients_lock;
 	struct list_head clients;
 
@@ -101,6 +104,7 @@
 	spinlock_t lock;
 
 	struct drm_crtc base;
+	int powergate;
 	int pipe;
 
 	struct clk *clk;
@@ -120,6 +124,8 @@
 	struct drm_pending_vblank_event *event;
 
 	const struct tegra_dc_soc_info *soc;
+
+	struct iommu_domain *domain;
 };
 
 static inline struct tegra_dc *
@@ -133,16 +139,15 @@
 	return crtc ? container_of(crtc, struct tegra_dc, base) : NULL;
 }
 
-static inline void tegra_dc_writel(struct tegra_dc *dc, unsigned long value,
-				   unsigned long reg)
+static inline void tegra_dc_writel(struct tegra_dc *dc, u32 value,
+				   unsigned long offset)
 {
-	writel(value, dc->regs + (reg << 2));
+	writel(value, dc->regs + (offset << 2));
 }
 
-static inline unsigned long tegra_dc_readl(struct tegra_dc *dc,
-					   unsigned long reg)
+static inline u32 tegra_dc_readl(struct tegra_dc *dc, unsigned long offset)
 {
-	return readl(dc->regs + (reg << 2));
+	return readl(dc->regs + (offset << 2));
 }
 
 struct tegra_dc_window {
@@ -287,6 +292,7 @@
 int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 			struct tegra_bo_tiling *tiling);
 int tegra_drm_fb_prepare(struct drm_device *drm);
+void tegra_drm_fb_free(struct drm_device *drm);
 int tegra_drm_fb_init(struct drm_device *drm);
 void tegra_drm_fb_exit(struct drm_device *drm);
 #ifdef CONFIG_DRM_TEGRA_FBDEV
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index f787445..33f67fd 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -11,6 +11,7 @@
 #include <linux/host1x.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
@@ -26,9 +27,6 @@
 #include "dsi.h"
 #include "mipi-phy.h"
 
-#define DSI_VIDEO_FIFO_DEPTH (1920 / 4)
-#define DSI_HOST_FIFO_DEPTH 64
-
 struct tegra_dsi {
 	struct host1x_client client;
 	struct tegra_output output;
@@ -54,6 +52,13 @@
 
 	struct regulator *vdd;
 	bool enabled;
+
+	unsigned int video_fifo_depth;
+	unsigned int host_fifo_depth;
+
+	/* for ganged-mode support */
+	struct tegra_dsi *master;
+	struct tegra_dsi *slave;
 };
 
 static inline struct tegra_dsi *
@@ -318,6 +323,21 @@
 	[11] = PKT_ID0(MIPI_DSI_BLANKING_PACKET) | PKT_LEN0(4),
 };
 
+static const u32 pkt_seq_command_mode[NUM_PKT_SEQ] = {
+	[ 0] = 0,
+	[ 1] = 0,
+	[ 2] = 0,
+	[ 3] = 0,
+	[ 4] = 0,
+	[ 5] = 0,
+	[ 6] = PKT_ID0(MIPI_DSI_DCS_LONG_WRITE) | PKT_LEN0(3) | PKT_LP,
+	[ 7] = 0,
+	[ 8] = 0,
+	[ 9] = 0,
+	[10] = PKT_ID0(MIPI_DSI_DCS_LONG_WRITE) | PKT_LEN0(5) | PKT_LP,
+	[11] = 0,
+};
+
 static int tegra_dsi_set_phy_timing(struct tegra_dsi *dsi)
 {
 	struct mipi_dphy_timing timing;
@@ -329,7 +349,7 @@
 	if (rate < 0)
 		return rate;
 
-	period = DIV_ROUND_CLOSEST(1000000000UL, rate * 2);
+	period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate * 2);
 
 	err = mipi_dphy_timing_get_default(&timing, period);
 	if (err < 0)
@@ -369,6 +389,9 @@
 		DSI_TIMING_FIELD(timing.tago, period, 1);
 	tegra_dsi_writel(dsi, value, DSI_BTA_TIMING);
 
+	if (dsi->slave)
+		return tegra_dsi_set_phy_timing(dsi->slave);
+
 	return 0;
 }
 
@@ -426,26 +449,59 @@
 	return 0;
 }
 
-static int tegra_output_dsi_enable(struct tegra_output *output)
+static void tegra_dsi_ganged_enable(struct tegra_dsi *dsi, unsigned int start,
+				    unsigned int size)
 {
-	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
-	struct drm_display_mode *mode = &dc->base.mode;
-	unsigned int hact, hsw, hbp, hfp, i, mul, div;
-	struct tegra_dsi *dsi = to_dsi(output);
-	enum tegra_dsi_format format;
-	unsigned long value;
-	const u32 *pkt_seq;
-	int err;
+	u32 value;
 
-	if (dsi->enabled)
-		return 0;
+	tegra_dsi_writel(dsi, start, DSI_GANGED_MODE_START);
+	tegra_dsi_writel(dsi, size << 16 | size, DSI_GANGED_MODE_SIZE);
+
+	value = DSI_GANGED_MODE_CONTROL_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_GANGED_MODE_CONTROL);
+}
+
+static void tegra_dsi_enable(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
+	value |= DSI_POWER_CONTROL_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
+
+	if (dsi->slave)
+		tegra_dsi_enable(dsi->slave);
+}
+
+static unsigned int tegra_dsi_get_lanes(struct tegra_dsi *dsi)
+{
+	if (dsi->master)
+		return dsi->master->lanes + dsi->lanes;
+
+	if (dsi->slave)
+		return dsi->lanes + dsi->slave->lanes;
+
+	return dsi->lanes;
+}
+
+static int tegra_dsi_configure(struct tegra_dsi *dsi, unsigned int pipe,
+			       const struct drm_display_mode *mode)
+{
+	unsigned int hact, hsw, hbp, hfp, i, mul, div;
+	enum tegra_dsi_format format;
+	const u32 *pkt_seq;
+	u32 value;
+	int err;
 
 	if (dsi->flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
 		DRM_DEBUG_KMS("Non-burst video mode with sync pulses\n");
 		pkt_seq = pkt_seq_video_non_burst_sync_pulses;
-	} else {
+	} else if (dsi->flags & MIPI_DSI_MODE_VIDEO) {
 		DRM_DEBUG_KMS("Non-burst video mode with sync events\n");
 		pkt_seq = pkt_seq_video_non_burst_sync_events;
+	} else {
+		DRM_DEBUG_KMS("Command mode\n");
+		pkt_seq = pkt_seq_command_mode;
 	}
 
 	err = tegra_dsi_get_muldiv(dsi->format, &mul, &div);
@@ -456,61 +512,136 @@
 	if (err < 0)
 		return err;
 
-	err = clk_enable(dsi->clk);
-	if (err < 0)
-		return err;
-
-	reset_control_deassert(dsi->rst);
-
 	value = DSI_CONTROL_CHANNEL(0) | DSI_CONTROL_FORMAT(format) |
 		DSI_CONTROL_LANES(dsi->lanes - 1) |
-		DSI_CONTROL_SOURCE(dc->pipe);
+		DSI_CONTROL_SOURCE(pipe);
 	tegra_dsi_writel(dsi, value, DSI_CONTROL);
 
-	tegra_dsi_writel(dsi, DSI_VIDEO_FIFO_DEPTH, DSI_MAX_THRESHOLD);
+	tegra_dsi_writel(dsi, dsi->video_fifo_depth, DSI_MAX_THRESHOLD);
 
-	value = DSI_HOST_CONTROL_HS | DSI_HOST_CONTROL_CS |
-		DSI_HOST_CONTROL_ECC;
+	value = DSI_HOST_CONTROL_HS;
 	tegra_dsi_writel(dsi, value, DSI_HOST_CONTROL);
 
 	value = tegra_dsi_readl(dsi, DSI_CONTROL);
+
 	if (dsi->flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)
 		value |= DSI_CONTROL_HS_CLK_CTRL;
+
 	value &= ~DSI_CONTROL_TX_TRIG(3);
-	value &= ~DSI_CONTROL_DCS_ENABLE;
+
+	/* enable DCS commands for command mode */
+	if (dsi->flags & MIPI_DSI_MODE_VIDEO)
+		value &= ~DSI_CONTROL_DCS_ENABLE;
+	else
+		value |= DSI_CONTROL_DCS_ENABLE;
+
 	value |= DSI_CONTROL_VIDEO_ENABLE;
 	value &= ~DSI_CONTROL_HOST_ENABLE;
 	tegra_dsi_writel(dsi, value, DSI_CONTROL);
 
-	err = tegra_dsi_set_phy_timing(dsi);
-	if (err < 0)
-		return err;
-
 	for (i = 0; i < NUM_PKT_SEQ; i++)
 		tegra_dsi_writel(dsi, pkt_seq[i], DSI_PKT_SEQ_0_LO + i);
 
-	/* horizontal active pixels */
-	hact = mode->hdisplay * mul / div;
+	if (dsi->flags & MIPI_DSI_MODE_VIDEO) {
+		/* horizontal active pixels */
+		hact = mode->hdisplay * mul / div;
 
-	/* horizontal sync width */
-	hsw = (mode->hsync_end - mode->hsync_start) * mul / div;
-	hsw -= 10;
+		/* horizontal sync width */
+		hsw = (mode->hsync_end - mode->hsync_start) * mul / div;
+		hsw -= 10;
 
-	/* horizontal back porch */
-	hbp = (mode->htotal - mode->hsync_end) * mul / div;
-	hbp -= 14;
+		/* horizontal back porch */
+		hbp = (mode->htotal - mode->hsync_end) * mul / div;
+		hbp -= 14;
 
-	/* horizontal front porch */
-	hfp = (mode->hsync_start  - mode->hdisplay) * mul / div;
-	hfp -= 8;
+		/* horizontal front porch */
+		hfp = (mode->hsync_start - mode->hdisplay) * mul / div;
+		hfp -= 8;
 
-	tegra_dsi_writel(dsi, hsw << 16 | 0, DSI_PKT_LEN_0_1);
-	tegra_dsi_writel(dsi, hact << 16 | hbp, DSI_PKT_LEN_2_3);
-	tegra_dsi_writel(dsi, hfp, DSI_PKT_LEN_4_5);
-	tegra_dsi_writel(dsi, 0x0f0f << 16, DSI_PKT_LEN_6_7);
+		tegra_dsi_writel(dsi, hsw << 16 | 0, DSI_PKT_LEN_0_1);
+		tegra_dsi_writel(dsi, hact << 16 | hbp, DSI_PKT_LEN_2_3);
+		tegra_dsi_writel(dsi, hfp, DSI_PKT_LEN_4_5);
+		tegra_dsi_writel(dsi, 0x0f0f << 16, DSI_PKT_LEN_6_7);
 
-	/* set SOL delay */
-	tegra_dsi_writel(dsi, 8 * mul / div, DSI_SOL_DELAY);
+		/* set SOL delay (for non-burst mode only) */
+		tegra_dsi_writel(dsi, 8 * mul / div, DSI_SOL_DELAY);
+
+		/* TODO: implement ganged mode */
+	} else {
+		u16 bytes;
+
+		if (dsi->master || dsi->slave) {
+			/*
+			 * For ganged mode, assume symmetric left-right mode.
+			 */
+			bytes = 1 + (mode->hdisplay / 2) * mul / div;
+		} else {
+			/* 1 byte (DCS command) + pixel data */
+			bytes = 1 + mode->hdisplay * mul / div;
+		}
+
+		tegra_dsi_writel(dsi, 0, DSI_PKT_LEN_0_1);
+		tegra_dsi_writel(dsi, bytes << 16, DSI_PKT_LEN_2_3);
+		tegra_dsi_writel(dsi, bytes << 16, DSI_PKT_LEN_4_5);
+		tegra_dsi_writel(dsi, 0, DSI_PKT_LEN_6_7);
+
+		value = MIPI_DCS_WRITE_MEMORY_START << 8 |
+			MIPI_DCS_WRITE_MEMORY_CONTINUE;
+		tegra_dsi_writel(dsi, value, DSI_DCS_CMDS);
+
+		/* set SOL delay */
+		if (dsi->master || dsi->slave) {
+			unsigned int lanes = tegra_dsi_get_lanes(dsi);
+			unsigned long delay, bclk, bclk_ganged;
+
+			/* SOL to valid, valid to FIFO and FIFO write delay */
+			delay = 4 + 4 + 2;
+			delay = DIV_ROUND_UP(delay * mul, div * lanes);
+			/* FIFO read delay */
+			delay = delay + 6;
+
+			bclk = DIV_ROUND_UP(mode->htotal * mul, div * lanes);
+			bclk_ganged = DIV_ROUND_UP(bclk * lanes / 2, lanes);
+			value = bclk - bclk_ganged + delay + 20;
+		} else {
+			/* TODO: revisit for non-ganged mode */
+			value = 8 * mul / div;
+		}
+
+		tegra_dsi_writel(dsi, value, DSI_SOL_DELAY);
+	}
+
+	if (dsi->slave) {
+		err = tegra_dsi_configure(dsi->slave, pipe, mode);
+		if (err < 0)
+			return err;
+
+		/*
+		 * TODO: Support modes other than symmetrical left-right
+		 * split.
+		 */
+		tegra_dsi_ganged_enable(dsi, 0, mode->hdisplay / 2);
+		tegra_dsi_ganged_enable(dsi->slave, mode->hdisplay / 2,
+					mode->hdisplay / 2);
+	}
+
+	return 0;
+}
+
+static int tegra_output_dsi_enable(struct tegra_output *output)
+{
+	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
+	const struct drm_display_mode *mode = &dc->base.mode;
+	struct tegra_dsi *dsi = to_dsi(output);
+	u32 value;
+	int err;
+
+	if (dsi->enabled)
+		return 0;
+
+	err = tegra_dsi_configure(dsi, dc->pipe, mode);
+	if (err < 0)
+		return err;
 
 	/* enable display controller */
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
@@ -531,28 +662,79 @@
 	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
 
 	/* enable DSI controller */
-	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
-	value |= DSI_POWER_CONTROL_ENABLE;
-	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
+	tegra_dsi_enable(dsi);
 
 	dsi->enabled = true;
 
 	return 0;
 }
 
+static int tegra_dsi_wait_idle(struct tegra_dsi *dsi, unsigned long timeout)
+{
+	u32 value;
+
+	timeout = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_dsi_readl(dsi, DSI_STATUS);
+		if (value & DSI_STATUS_IDLE)
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static void tegra_dsi_video_disable(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	value = tegra_dsi_readl(dsi, DSI_CONTROL);
+	value &= ~DSI_CONTROL_VIDEO_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_CONTROL);
+
+	if (dsi->slave)
+		tegra_dsi_video_disable(dsi->slave);
+}
+
+static void tegra_dsi_ganged_disable(struct tegra_dsi *dsi)
+{
+	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_START);
+	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_SIZE);
+	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_CONTROL);
+}
+
+static void tegra_dsi_disable(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	if (dsi->slave) {
+		tegra_dsi_ganged_disable(dsi->slave);
+		tegra_dsi_ganged_disable(dsi);
+	}
+
+	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
+	value &= ~DSI_POWER_CONTROL_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
+
+	if (dsi->slave)
+		tegra_dsi_disable(dsi->slave);
+
+	usleep_range(5000, 10000);
+}
+
 static int tegra_output_dsi_disable(struct tegra_output *output)
 {
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct tegra_dsi *dsi = to_dsi(output);
 	unsigned long value;
+	int err;
 
 	if (!dsi->enabled)
 		return 0;
 
-	/* disable DSI controller */
-	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
-	value &= ~DSI_POWER_CONTROL_ENABLE;
-	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
+	tegra_dsi_video_disable(dsi);
 
 	/*
 	 * The following accesses registers of the display controller, so make
@@ -576,39 +758,68 @@
 		tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
 	}
 
-	clk_disable(dsi->clk);
+	err = tegra_dsi_wait_idle(dsi, 100);
+	if (err < 0)
+		dev_dbg(dsi->dev, "failed to idle DSI: %d\n", err);
+
+	tegra_dsi_disable(dsi);
 
 	dsi->enabled = false;
 
 	return 0;
 }
 
+static void tegra_dsi_set_timeout(struct tegra_dsi *dsi, unsigned long bclk,
+				  unsigned int vrefresh)
+{
+	unsigned int timeout;
+	u32 value;
+
+	/* one frame high-speed transmission timeout */
+	timeout = (bclk / vrefresh) / 512;
+	value = DSI_TIMEOUT_LRX(0x2000) | DSI_TIMEOUT_HTX(timeout);
+	tegra_dsi_writel(dsi, value, DSI_TIMEOUT_0);
+
+	/* 2 ms peripheral timeout for panel */
+	timeout = 2 * bclk / 512 * 1000;
+	value = DSI_TIMEOUT_PR(timeout) | DSI_TIMEOUT_TA(0x2000);
+	tegra_dsi_writel(dsi, value, DSI_TIMEOUT_1);
+
+	value = DSI_TALLY_TA(0) | DSI_TALLY_LRX(0) | DSI_TALLY_HTX(0);
+	tegra_dsi_writel(dsi, value, DSI_TO_TALLY);
+
+	if (dsi->slave)
+		tegra_dsi_set_timeout(dsi->slave, bclk, vrefresh);
+}
+
 static int tegra_output_dsi_setup_clock(struct tegra_output *output,
 					struct clk *clk, unsigned long pclk,
 					unsigned int *divp)
 {
 	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
 	struct drm_display_mode *mode = &dc->base.mode;
-	unsigned int timeout, mul, div, vrefresh;
 	struct tegra_dsi *dsi = to_dsi(output);
-	unsigned long bclk, plld, value;
+	unsigned int mul, div, vrefresh, lanes;
+	unsigned long bclk, plld;
 	int err;
 
+	lanes = tegra_dsi_get_lanes(dsi);
+
 	err = tegra_dsi_get_muldiv(dsi->format, &mul, &div);
 	if (err < 0)
 		return err;
 
-	DRM_DEBUG_KMS("mul: %u, div: %u, lanes: %u\n", mul, div, dsi->lanes);
+	DRM_DEBUG_KMS("mul: %u, div: %u, lanes: %u\n", mul, div, lanes);
 	vrefresh = drm_mode_vrefresh(mode);
 	DRM_DEBUG_KMS("vrefresh: %u\n", vrefresh);
 
 	/* compute byte clock */
-	bclk = (pclk * mul) / (div * dsi->lanes);
+	bclk = (pclk * mul) / (div * lanes);
 
 	/*
 	 * Compute bit clock and round up to the next MHz.
 	 */
-	plld = DIV_ROUND_UP(bclk * 8, 1000000) * 1000000;
+	plld = DIV_ROUND_UP(bclk * 8, USEC_PER_SEC) * USEC_PER_SEC;
 
 	/*
 	 * We divide the frequency by two here, but we make up for that by
@@ -640,25 +851,17 @@
 	 * not working properly otherwise. Perhaps the PLLs cannot generate
 	 * frequencies sufficiently high.
 	 */
-	*divp = ((8 * mul) / (div * dsi->lanes)) - 2;
+	*divp = ((8 * mul) / (div * lanes)) - 2;
 
 	/*
 	 * XXX: Move the below somewhere else so that we don't need to have
 	 * access to the vrefresh in this function?
 	 */
+	tegra_dsi_set_timeout(dsi, bclk, vrefresh);
 
-	/* one frame high-speed transmission timeout */
-	timeout = (bclk / vrefresh) / 512;
-	value = DSI_TIMEOUT_LRX(0x2000) | DSI_TIMEOUT_HTX(timeout);
-	tegra_dsi_writel(dsi, value, DSI_TIMEOUT_0);
-
-	/* 2 ms peripheral timeout for panel */
-	timeout = 2 * bclk / 512 * 1000;
-	value = DSI_TIMEOUT_PR(timeout) | DSI_TIMEOUT_TA(0x2000);
-	tegra_dsi_writel(dsi, value, DSI_TIMEOUT_1);
-
-	value = DSI_TALLY_TA(0) | DSI_TALLY_LRX(0) | DSI_TALLY_HTX(0);
-	tegra_dsi_writel(dsi, value, DSI_TO_TALLY);
+	err = tegra_dsi_set_phy_timing(dsi);
+	if (err < 0)
+		return err;
 
 	return 0;
 }
@@ -695,7 +898,7 @@
 
 static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
 {
-	unsigned long value;
+	u32 value;
 
 	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
 	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
@@ -720,14 +923,17 @@
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 	int err;
 
-	dsi->output.type = TEGRA_OUTPUT_DSI;
-	dsi->output.dev = client->dev;
-	dsi->output.ops = &dsi_ops;
+	/* Gangsters must not register their own outputs. */
+	if (!dsi->master) {
+		dsi->output.type = TEGRA_OUTPUT_DSI;
+		dsi->output.dev = client->dev;
+		dsi->output.ops = &dsi_ops;
 
-	err = tegra_output_init(drm, &dsi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output setup failed: %d\n", err);
-		return err;
+		err = tegra_output_init(drm, &dsi->output);
+		if (err < 0) {
+			dev_err(client->dev, "output setup failed: %d\n", err);
+			return err;
+		}
 	}
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
@@ -736,12 +942,6 @@
 			dev_err(dsi->dev, "debugfs setup failed: %d\n", err);
 	}
 
-	err = tegra_dsi_pad_calibrate(dsi);
-	if (err < 0) {
-		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
-		return err;
-	}
-
 	return 0;
 }
 
@@ -756,16 +956,20 @@
 			dev_err(dsi->dev, "debugfs cleanup failed: %d\n", err);
 	}
 
-	err = tegra_output_disable(&dsi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output failed to disable: %d\n", err);
-		return err;
-	}
+	if (!dsi->master) {
+		err = tegra_output_disable(&dsi->output);
+		if (err < 0) {
+			dev_err(client->dev, "output failed to disable: %d\n",
+				err);
+			return err;
+		}
 
-	err = tegra_output_exit(&dsi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output cleanup failed: %d\n", err);
-		return err;
+		err = tegra_output_exit(&dsi->output);
+		if (err < 0) {
+			dev_err(client->dev, "output cleanup failed: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	return 0;
@@ -792,20 +996,324 @@
 	return 0;
 }
 
+static const char * const error_report[16] = {
+	"SoT Error",
+	"SoT Sync Error",
+	"EoT Sync Error",
+	"Escape Mode Entry Command Error",
+	"Low-Power Transmit Sync Error",
+	"Peripheral Timeout Error",
+	"False Control Error",
+	"Contention Detected",
+	"ECC Error, single-bit",
+	"ECC Error, multi-bit",
+	"Checksum Error",
+	"DSI Data Type Not Recognized",
+	"DSI VC ID Invalid",
+	"Invalid Transmission Length",
+	"Reserved",
+	"DSI Protocol Violation",
+};
+
+static ssize_t tegra_dsi_read_response(struct tegra_dsi *dsi,
+				       const struct mipi_dsi_msg *msg,
+				       size_t count)
+{
+	u8 *rx = msg->rx_buf;
+	unsigned int i, j, k;
+	size_t size = 0;
+	u16 errors;
+	u32 value;
+
+	/* read and parse packet header */
+	value = tegra_dsi_readl(dsi, DSI_RD_DATA);
+
+	switch (value & 0x3f) {
+	case MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT:
+		errors = (value >> 8) & 0xffff;
+		dev_dbg(dsi->dev, "Acknowledge and error report: %04x\n",
+			errors);
+		for (i = 0; i < ARRAY_SIZE(error_report); i++)
+			if (errors & BIT(i))
+				dev_dbg(dsi->dev, "  %2u: %s\n", i,
+					error_report[i]);
+		break;
+
+	case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_1BYTE:
+		rx[0] = (value >> 8) & 0xff;
+		size = 1;
+		break;
+
+	case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_2BYTE:
+		rx[0] = (value >>  8) & 0xff;
+		rx[1] = (value >> 16) & 0xff;
+		size = 2;
+		break;
+
+	case MIPI_DSI_RX_DCS_LONG_READ_RESPONSE:
+		size = ((value >> 8) & 0xff00) | ((value >> 8) & 0xff);
+		break;
+
+	case MIPI_DSI_RX_GENERIC_LONG_READ_RESPONSE:
+		size = ((value >> 8) & 0xff00) | ((value >> 8) & 0xff);
+		break;
+
+	default:
+		dev_err(dsi->dev, "unhandled response type: %02x\n",
+			value & 0x3f);
+		return -EPROTO;
+	}
+
+	size = min(size, msg->rx_len);
+
+	if (msg->rx_buf && size > 0) {
+		for (i = 0, j = 0; i < count - 1; i++, j += 4) {
+			u8 *rx = msg->rx_buf + j;
+
+			value = tegra_dsi_readl(dsi, DSI_RD_DATA);
+
+			for (k = 0; k < 4 && (j + k) < msg->rx_len; k++)
+				rx[j + k] = (value >> (k << 3)) & 0xff;
+		}
+	}
+
+	return size;
+}
+
+static int tegra_dsi_transmit(struct tegra_dsi *dsi, unsigned long timeout)
+{
+	tegra_dsi_writel(dsi, DSI_TRIGGER_HOST, DSI_TRIGGER);
+
+	timeout = jiffies + msecs_to_jiffies(timeout);
+
+	while (time_before(jiffies, timeout)) {
+		u32 value = tegra_dsi_readl(dsi, DSI_TRIGGER);
+		if ((value & DSI_TRIGGER_HOST) == 0)
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	DRM_DEBUG_KMS("timeout waiting for transmission to complete\n");
+	return -ETIMEDOUT;
+}
+
+static int tegra_dsi_wait_for_response(struct tegra_dsi *dsi,
+				       unsigned long timeout)
+{
+	timeout = jiffies + msecs_to_jiffies(250);
+
+	while (time_before(jiffies, timeout)) {
+		u32 value = tegra_dsi_readl(dsi, DSI_STATUS);
+		u8 count = value & 0x1f;
+
+		if (count > 0)
+			return count;
+
+		usleep_range(1000, 2000);
+	}
+
+	DRM_DEBUG_KMS("peripheral returned no data\n");
+	return -ETIMEDOUT;
+}
+
+static void tegra_dsi_writesl(struct tegra_dsi *dsi, unsigned long offset,
+			      const void *buffer, size_t size)
+{
+	const u8 *buf = buffer;
+	size_t i, j;
+	u32 value;
+
+	for (j = 0; j < size; j += 4) {
+		value = 0;
+
+		for (i = 0; i < 4 && j + i < size; i++)
+			value |= buf[j + i] << (i << 3);
+
+		tegra_dsi_writel(dsi, value, DSI_WR_DATA);
+	}
+}
+
+static ssize_t tegra_dsi_host_transfer(struct mipi_dsi_host *host,
+				       const struct mipi_dsi_msg *msg)
+{
+	struct tegra_dsi *dsi = host_to_tegra(host);
+	struct mipi_dsi_packet packet;
+	const u8 *header;
+	size_t count;
+	ssize_t err;
+	u32 value;
+
+	err = mipi_dsi_create_packet(&packet, msg);
+	if (err < 0)
+		return err;
+
+	header = packet.header;
+
+	/* maximum FIFO depth is 1920 words */
+	if (packet.size > dsi->video_fifo_depth * 4)
+		return -ENOSPC;
+
+	/* reset underflow/overflow flags */
+	value = tegra_dsi_readl(dsi, DSI_STATUS);
+	if (value & (DSI_STATUS_UNDERFLOW | DSI_STATUS_OVERFLOW)) {
+		value = DSI_HOST_CONTROL_FIFO_RESET;
+		tegra_dsi_writel(dsi, value, DSI_HOST_CONTROL);
+		usleep_range(10, 20);
+	}
+
+	value = tegra_dsi_readl(dsi, DSI_POWER_CONTROL);
+	value |= DSI_POWER_CONTROL_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_POWER_CONTROL);
+
+	usleep_range(5000, 10000);
+
+	value = DSI_HOST_CONTROL_CRC_RESET | DSI_HOST_CONTROL_TX_TRIG_HOST |
+		DSI_HOST_CONTROL_CS | DSI_HOST_CONTROL_ECC;
+
+	if ((msg->flags & MIPI_DSI_MSG_USE_LPM) == 0)
+		value |= DSI_HOST_CONTROL_HS;
+
+	/*
+	 * The host FIFO has a maximum of 64 words, so larger transmissions
+	 * need to use the video FIFO.
+	 */
+	if (packet.size > dsi->host_fifo_depth * 4)
+		value |= DSI_HOST_CONTROL_FIFO_SEL;
+
+	tegra_dsi_writel(dsi, value, DSI_HOST_CONTROL);
+
+	/*
+	 * For reads and messages with explicitly requested ACK, generate a
+	 * BTA sequence after the transmission of the packet.
+	 */
+	if ((msg->flags & MIPI_DSI_MSG_REQ_ACK) ||
+	    (msg->rx_buf && msg->rx_len > 0)) {
+		value = tegra_dsi_readl(dsi, DSI_HOST_CONTROL);
+		value |= DSI_HOST_CONTROL_PKT_BTA;
+		tegra_dsi_writel(dsi, value, DSI_HOST_CONTROL);
+	}
+
+	value = DSI_CONTROL_LANES(0) | DSI_CONTROL_HOST_ENABLE;
+	tegra_dsi_writel(dsi, value, DSI_CONTROL);
+
+	/* write packet header, ECC is generated by hardware */
+	value = header[2] << 16 | header[1] << 8 | header[0];
+	tegra_dsi_writel(dsi, value, DSI_WR_DATA);
+
+	/* write payload (if any) */
+	if (packet.payload_length > 0)
+		tegra_dsi_writesl(dsi, DSI_WR_DATA, packet.payload,
+				  packet.payload_length);
+
+	err = tegra_dsi_transmit(dsi, 250);
+	if (err < 0)
+		return err;
+
+	if ((msg->flags & MIPI_DSI_MSG_REQ_ACK) ||
+	    (msg->rx_buf && msg->rx_len > 0)) {
+		err = tegra_dsi_wait_for_response(dsi, 250);
+		if (err < 0)
+			return err;
+
+		count = err;
+
+		value = tegra_dsi_readl(dsi, DSI_RD_DATA);
+		switch (value) {
+		case 0x84:
+			/*
+			dev_dbg(dsi->dev, "ACK\n");
+			*/
+			break;
+
+		case 0x87:
+			/*
+			dev_dbg(dsi->dev, "ESCAPE\n");
+			*/
+			break;
+
+		default:
+			dev_err(dsi->dev, "unknown status: %08x\n", value);
+			break;
+		}
+
+		if (count > 1) {
+			err = tegra_dsi_read_response(dsi, msg, count);
+			if (err < 0)
+				dev_err(dsi->dev,
+					"failed to parse response: %zd\n",
+					err);
+			else {
+				/*
+				 * For read commands, return the number of
+				 * bytes returned by the peripheral.
+				 */
+				count = err;
+			}
+		}
+	} else {
+		/*
+		 * For write commands, we have transmitted the 4-byte header
+		 * plus the variable-length payload.
+		 */
+		count = 4 + packet.payload_length;
+	}
+
+	return count;
+}
+
+static int tegra_dsi_ganged_setup(struct tegra_dsi *dsi)
+{
+	struct clk *parent;
+	int err;
+
+	/* make sure both DSI controllers share the same PLL */
+	parent = clk_get_parent(dsi->slave->clk);
+	if (!parent)
+		return -EINVAL;
+
+	err = clk_set_parent(parent, dsi->clk_parent);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
 static int tegra_dsi_host_attach(struct mipi_dsi_host *host,
 				 struct mipi_dsi_device *device)
 {
 	struct tegra_dsi *dsi = host_to_tegra(host);
-	struct tegra_output *output = &dsi->output;
 
 	dsi->flags = device->mode_flags;
 	dsi->format = device->format;
 	dsi->lanes = device->lanes;
 
-	output->panel = of_drm_find_panel(device->dev.of_node);
-	if (output->panel) {
-		if (output->connector.dev)
+	if (dsi->slave) {
+		int err;
+
+		dev_dbg(dsi->dev, "attaching dual-channel device %s\n",
+			dev_name(&device->dev));
+
+		err = tegra_dsi_ganged_setup(dsi);
+		if (err < 0) {
+			dev_err(dsi->dev, "failed to set up ganged mode: %d\n",
+				err);
+			return err;
+		}
+	}
+
+	/*
+	 * Slaves don't have a panel associated with them, so they provide
+	 * merely the second channel.
+	 */
+	if (!dsi->master) {
+		struct tegra_output *output = &dsi->output;
+
+		output->panel = of_drm_find_panel(device->dev.of_node);
+		if (output->panel && output->connector.dev) {
+			drm_panel_attach(output->panel, &output->connector);
 			drm_helper_hpd_irq_event(output->connector.dev);
+		}
 	}
 
 	return 0;
@@ -818,10 +1326,10 @@
 	struct tegra_output *output = &dsi->output;
 
 	if (output->panel && &device->dev == output->panel->dev) {
+		output->panel = NULL;
+
 		if (output->connector.dev)
 			drm_helper_hpd_irq_event(output->connector.dev);
-
-		output->panel = NULL;
 	}
 
 	return 0;
@@ -830,8 +1338,29 @@
 static const struct mipi_dsi_host_ops tegra_dsi_host_ops = {
 	.attach = tegra_dsi_host_attach,
 	.detach = tegra_dsi_host_detach,
+	.transfer = tegra_dsi_host_transfer,
 };
 
+static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi)
+{
+	struct device_node *np;
+
+	np = of_parse_phandle(dsi->dev->of_node, "nvidia,ganged-mode", 0);
+	if (np) {
+		struct platform_device *gangster = of_find_device_by_node(np);
+
+		dsi->slave = platform_get_drvdata(gangster);
+		of_node_put(np);
+
+		if (!dsi->slave)
+			return -EPROBE_DEFER;
+
+		dsi->slave->master = dsi;
+	}
+
+	return 0;
+}
+
 static int tegra_dsi_probe(struct platform_device *pdev)
 {
 	struct tegra_dsi *dsi;
@@ -843,11 +1372,19 @@
 		return -ENOMEM;
 
 	dsi->output.dev = dsi->dev = &pdev->dev;
+	dsi->video_fifo_depth = 1920;
+	dsi->host_fifo_depth = 64;
+
+	err = tegra_dsi_ganged_probe(dsi);
+	if (err < 0)
+		return err;
 
 	err = tegra_output_probe(&dsi->output);
 	if (err < 0)
 		return err;
 
+	dsi->output.connector.polled = DRM_CONNECTOR_POLL_HPD;
+
 	/*
 	 * Assume these values by default. When a DSI peripheral driver
 	 * attaches to the DSI host, the parameters will be taken from
@@ -861,68 +1398,83 @@
 	if (IS_ERR(dsi->rst))
 		return PTR_ERR(dsi->rst);
 
+	err = reset_control_deassert(dsi->rst);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to bring DSI out of reset: %d\n",
+			err);
+		return err;
+	}
+
 	dsi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dsi->clk)) {
 		dev_err(&pdev->dev, "cannot get DSI clock\n");
-		return PTR_ERR(dsi->clk);
+		err = PTR_ERR(dsi->clk);
+		goto reset;
 	}
 
 	err = clk_prepare_enable(dsi->clk);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot enable DSI clock\n");
-		return err;
+		goto reset;
 	}
 
 	dsi->clk_lp = devm_clk_get(&pdev->dev, "lp");
 	if (IS_ERR(dsi->clk_lp)) {
 		dev_err(&pdev->dev, "cannot get low-power clock\n");
-		return PTR_ERR(dsi->clk_lp);
+		err = PTR_ERR(dsi->clk_lp);
+		goto disable_clk;
 	}
 
 	err = clk_prepare_enable(dsi->clk_lp);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot enable low-power clock\n");
-		return err;
+		goto disable_clk;
 	}
 
 	dsi->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dsi->clk_parent)) {
 		dev_err(&pdev->dev, "cannot get parent clock\n");
-		return PTR_ERR(dsi->clk_parent);
-	}
-
-	err = clk_prepare_enable(dsi->clk_parent);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable parent clock\n");
-		return err;
+		err = PTR_ERR(dsi->clk_parent);
+		goto disable_clk_lp;
 	}
 
 	dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi");
 	if (IS_ERR(dsi->vdd)) {
 		dev_err(&pdev->dev, "cannot get VDD supply\n");
-		return PTR_ERR(dsi->vdd);
+		err = PTR_ERR(dsi->vdd);
+		goto disable_clk_lp;
 	}
 
 	err = regulator_enable(dsi->vdd);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot enable VDD supply\n");
-		return err;
+		goto disable_clk_lp;
 	}
 
 	err = tegra_dsi_setup_clocks(dsi);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot setup clocks\n");
-		return err;
+		goto disable_vdd;
 	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(dsi->regs))
-		return PTR_ERR(dsi->regs);
+	if (IS_ERR(dsi->regs)) {
+		err = PTR_ERR(dsi->regs);
+		goto disable_vdd;
+	}
 
 	dsi->mipi = tegra_mipi_request(&pdev->dev);
-	if (IS_ERR(dsi->mipi))
-		return PTR_ERR(dsi->mipi);
+	if (IS_ERR(dsi->mipi)) {
+		err = PTR_ERR(dsi->mipi);
+		goto disable_vdd;
+	}
+
+	err = tegra_dsi_pad_calibrate(dsi);
+	if (err < 0) {
+		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+		goto mipi_free;
+	}
 
 	dsi->host.ops = &tegra_dsi_host_ops;
 	dsi->host.dev = &pdev->dev;
@@ -930,7 +1482,7 @@
 	err = mipi_dsi_host_register(&dsi->host);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register DSI host: %d\n", err);
-		return err;
+		goto mipi_free;
 	}
 
 	INIT_LIST_HEAD(&dsi->client.list);
@@ -941,12 +1493,26 @@
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
-		return err;
+		goto unregister;
 	}
 
 	platform_set_drvdata(pdev, dsi);
 
 	return 0;
+
+unregister:
+	mipi_dsi_host_unregister(&dsi->host);
+mipi_free:
+	tegra_mipi_free(dsi->mipi);
+disable_vdd:
+	regulator_disable(dsi->vdd);
+disable_clk_lp:
+	clk_disable_unprepare(dsi->clk_lp);
+disable_clk:
+	clk_disable_unprepare(dsi->clk);
+reset:
+	reset_control_assert(dsi->rst);
+	return err;
 }
 
 static int tegra_dsi_remove(struct platform_device *pdev)
@@ -965,7 +1531,6 @@
 	tegra_mipi_free(dsi->mipi);
 
 	regulator_disable(dsi->vdd);
-	clk_disable_unprepare(dsi->clk_parent);
 	clk_disable_unprepare(dsi->clk_lp);
 	clk_disable_unprepare(dsi->clk);
 	reset_control_assert(dsi->rst);
diff --git a/drivers/gpu/drm/tegra/dsi.h b/drivers/gpu/drm/tegra/dsi.h
index 5ce610d..bad1006 100644
--- a/drivers/gpu/drm/tegra/dsi.h
+++ b/drivers/gpu/drm/tegra/dsi.h
@@ -21,9 +21,16 @@
 #define DSI_INT_STATUS			0x0d
 #define DSI_INT_MASK			0x0e
 #define DSI_HOST_CONTROL		0x0f
+#define DSI_HOST_CONTROL_FIFO_RESET	(1 << 21)
+#define DSI_HOST_CONTROL_CRC_RESET	(1 << 20)
+#define DSI_HOST_CONTROL_TX_TRIG_SOL	(0 << 12)
+#define DSI_HOST_CONTROL_TX_TRIG_FIFO	(1 << 12)
+#define DSI_HOST_CONTROL_TX_TRIG_HOST	(2 << 12)
 #define DSI_HOST_CONTROL_RAW		(1 << 6)
 #define DSI_HOST_CONTROL_HS		(1 << 5)
-#define DSI_HOST_CONTROL_BTA		(1 << 2)
+#define DSI_HOST_CONTROL_FIFO_SEL	(1 << 4)
+#define DSI_HOST_CONTROL_IMM_BTA	(1 << 3)
+#define DSI_HOST_CONTROL_PKT_BTA	(1 << 2)
 #define DSI_HOST_CONTROL_CS		(1 << 1)
 #define DSI_HOST_CONTROL_ECC		(1 << 0)
 #define DSI_CONTROL			0x10
@@ -39,9 +46,13 @@
 #define DSI_SOL_DELAY			0x11
 #define DSI_MAX_THRESHOLD		0x12
 #define DSI_TRIGGER			0x13
+#define DSI_TRIGGER_HOST		(1 << 1)
+#define DSI_TRIGGER_VIDEO		(1 << 0)
 #define DSI_TX_CRC			0x14
 #define DSI_STATUS			0x15
 #define DSI_STATUS_IDLE			(1 << 10)
+#define DSI_STATUS_UNDERFLOW		(1 <<  9)
+#define DSI_STATUS_OVERFLOW		(1 <<  8)
 #define DSI_INIT_SEQ_CONTROL		0x1a
 #define DSI_INIT_SEQ_DATA_0		0x1b
 #define DSI_INIT_SEQ_DATA_1		0x1c
@@ -104,6 +115,7 @@
 #define DSI_PAD_CONTROL_3		0x51
 #define DSI_PAD_CONTROL_4		0x52
 #define DSI_GANGED_MODE_CONTROL		0x53
+#define DSI_GANGED_MODE_CONTROL_ENABLE	(1 << 0)
 #define DSI_GANGED_MODE_START		0x54
 #define DSI_GANGED_MODE_SIZE		0x55
 #define DSI_RAW_DATA_BYTE_COUNT		0x56
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 3513d12..e9c715d 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -65,8 +65,12 @@
 	for (i = 0; i < fb->num_planes; i++) {
 		struct tegra_bo *bo = fb->planes[i];
 
-		if (bo)
+		if (bo) {
+			if (bo->pages && bo->vaddr)
+				vunmap(bo->vaddr);
+
 			drm_gem_object_unreference_unlocked(&bo->gem);
+		}
 	}
 
 	drm_framebuffer_cleanup(framebuffer);
@@ -223,14 +227,16 @@
 	info = framebuffer_alloc(0, drm->dev);
 	if (!info) {
 		dev_err(drm->dev, "failed to allocate framebuffer info\n");
-		tegra_bo_free_object(&bo->gem);
+		drm_gem_object_unreference_unlocked(&bo->gem);
 		return -ENOMEM;
 	}
 
 	fbdev->fb = tegra_fb_alloc(drm, &cmd, &bo, 1);
 	if (IS_ERR(fbdev->fb)) {
-		dev_err(drm->dev, "failed to allocate DRM framebuffer\n");
 		err = PTR_ERR(fbdev->fb);
+		dev_err(drm->dev, "failed to allocate DRM framebuffer: %d\n",
+			err);
+		drm_gem_object_unreference_unlocked(&bo->gem);
 		goto release;
 	}
 
@@ -254,6 +260,16 @@
 	offset = info->var.xoffset * bytes_per_pixel +
 		 info->var.yoffset * fb->pitches[0];
 
+	if (bo->pages) {
+		bo->vaddr = vmap(bo->pages, bo->num_pages, VM_MAP,
+				 pgprot_writecombine(PAGE_KERNEL));
+		if (!bo->vaddr) {
+			dev_err(drm->dev, "failed to vmap() framebuffer\n");
+			err = -ENOMEM;
+			goto destroy;
+		}
+	}
+
 	drm->mode_config.fb_base = (resource_size_t)bo->paddr;
 	info->screen_base = (void __iomem *)bo->vaddr + offset;
 	info->screen_size = size;
@@ -289,6 +305,11 @@
 	return fbdev;
 }
 
+static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
+{
+	kfree(fbdev);
+}
+
 static int tegra_fbdev_init(struct tegra_fbdev *fbdev,
 			    unsigned int preferred_bpp,
 			    unsigned int num_crtc,
@@ -299,19 +320,21 @@
 
 	err = drm_fb_helper_init(drm, &fbdev->base, num_crtc, max_connectors);
 	if (err < 0) {
-		dev_err(drm->dev, "failed to initialize DRM FB helper\n");
+		dev_err(drm->dev, "failed to initialize DRM FB helper: %d\n",
+			err);
 		return err;
 	}
 
 	err = drm_fb_helper_single_add_all_connectors(&fbdev->base);
 	if (err < 0) {
-		dev_err(drm->dev, "failed to add connectors\n");
+		dev_err(drm->dev, "failed to add connectors: %d\n", err);
 		goto fini;
 	}
 
 	err = drm_fb_helper_initial_config(&fbdev->base, preferred_bpp);
 	if (err < 0) {
-		dev_err(drm->dev, "failed to set initial configuration\n");
+		dev_err(drm->dev, "failed to set initial configuration: %d\n",
+			err);
 		goto fini;
 	}
 
@@ -322,7 +345,7 @@
 	return err;
 }
 
-static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
+static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
 {
 	struct fb_info *info = fbdev->base.fbdev;
 
@@ -341,11 +364,11 @@
 
 	if (fbdev->fb) {
 		drm_framebuffer_unregister_private(&fbdev->fb->base);
-		tegra_fb_destroy(&fbdev->fb->base);
+		drm_framebuffer_remove(&fbdev->fb->base);
 	}
 
 	drm_fb_helper_fini(&fbdev->base);
-	kfree(fbdev);
+	tegra_fbdev_free(fbdev);
 }
 
 void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
@@ -393,6 +416,15 @@
 	return 0;
 }
 
+void tegra_drm_fb_free(struct drm_device *drm)
+{
+#ifdef CONFIG_DRM_TEGRA_FBDEV
+	struct tegra_drm *tegra = drm->dev_private;
+
+	tegra_fbdev_free(tegra->fbdev);
+#endif
+}
+
 int tegra_drm_fb_init(struct drm_device *drm)
 {
 #ifdef CONFIG_DRM_TEGRA_FBDEV
@@ -413,6 +445,6 @@
 #ifdef CONFIG_DRM_TEGRA_FBDEV
 	struct tegra_drm *tegra = drm->dev_private;
 
-	tegra_fbdev_free(tegra->fbdev);
+	tegra_fbdev_exit(tegra->fbdev);
 #endif
 }
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index ce023fa..da32086 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/dma-buf.h>
+#include <linux/iommu.h>
 #include <drm/tegra_drm.h>
 
 #include "drm.h"
@@ -91,13 +92,90 @@
 	.kunmap = tegra_bo_kunmap,
 };
 
-static void tegra_bo_destroy(struct drm_device *drm, struct tegra_bo *bo)
+/*
+ * A generic iommu_map_sg() function is being reviewed and will hopefully be
+ * merged soon. At that point this function can be dropped in favour of the
+ * one provided by the IOMMU API.
+ */
+static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			      struct scatterlist *sg, unsigned int nents,
+			      int prot)
 {
-	dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
+	struct scatterlist *s;
+	size_t offset = 0;
+	unsigned int i;
+	int err;
+
+	for_each_sg(sg, s, nents, i) {
+		phys_addr_t phys = page_to_phys(sg_page(s));
+		size_t length = s->offset + s->length;
+
+		err = iommu_map(domain, iova + offset, phys, length, prot);
+		if (err < 0) {
+			iommu_unmap(domain, iova, offset);
+			return err;
+		}
+
+		offset += length;
+	}
+
+	return offset;
 }
 
-struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size,
-				 unsigned long flags)
+static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
+{
+	int prot = IOMMU_READ | IOMMU_WRITE;
+	ssize_t err;
+
+	if (bo->mm)
+		return -EBUSY;
+
+	bo->mm = kzalloc(sizeof(*bo->mm), GFP_KERNEL);
+	if (!bo->mm)
+		return -ENOMEM;
+
+	err = drm_mm_insert_node_generic(&tegra->mm, bo->mm, bo->gem.size,
+					 PAGE_SIZE, 0, 0, 0);
+	if (err < 0) {
+		dev_err(tegra->drm->dev, "out of I/O virtual memory: %zd\n",
+			err);
+		goto free;
+	}
+
+	bo->paddr = bo->mm->start;
+
+	err = __iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
+			     bo->sgt->nents, prot);
+	if (err < 0) {
+		dev_err(tegra->drm->dev, "failed to map buffer: %zd\n", err);
+		goto remove;
+	}
+
+	bo->size = err;
+
+	return 0;
+
+remove:
+	drm_mm_remove_node(bo->mm);
+free:
+	kfree(bo->mm);
+	return err;
+}
+
+static int tegra_bo_iommu_unmap(struct tegra_drm *tegra, struct tegra_bo *bo)
+{
+	if (!bo->mm)
+		return 0;
+
+	iommu_unmap(tegra->domain, bo->paddr, bo->size);
+	drm_mm_remove_node(bo->mm);
+	kfree(bo->mm);
+
+	return 0;
+}
+
+static struct tegra_bo *tegra_bo_alloc_object(struct drm_device *drm,
+					      size_t size)
 {
 	struct tegra_bo *bo;
 	int err;
@@ -109,22 +187,96 @@
 	host1x_bo_init(&bo->base, &tegra_bo_ops);
 	size = round_up(size, PAGE_SIZE);
 
-	bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-					   GFP_KERNEL | __GFP_NOWARN);
-	if (!bo->vaddr) {
-		dev_err(drm->dev, "failed to allocate buffer with size %u\n",
-			size);
-		err = -ENOMEM;
-		goto err_dma;
-	}
-
 	err = drm_gem_object_init(drm, &bo->gem, size);
-	if (err)
-		goto err_init;
+	if (err < 0)
+		goto free;
 
 	err = drm_gem_create_mmap_offset(&bo->gem);
-	if (err)
-		goto err_mmap;
+	if (err < 0)
+		goto release;
+
+	return bo;
+
+release:
+	drm_gem_object_release(&bo->gem);
+free:
+	kfree(bo);
+	return ERR_PTR(err);
+}
+
+static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo)
+{
+	if (bo->pages) {
+		drm_gem_put_pages(&bo->gem, bo->pages, true, true);
+		sg_free_table(bo->sgt);
+		kfree(bo->sgt);
+	} else if (bo->vaddr) {
+		dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr,
+				      bo->paddr);
+	}
+}
+
+static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo,
+			      size_t size)
+{
+	bo->pages = drm_gem_get_pages(&bo->gem);
+	if (IS_ERR(bo->pages))
+		return PTR_ERR(bo->pages);
+
+	bo->num_pages = size >> PAGE_SHIFT;
+
+	bo->sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages);
+	if (IS_ERR(bo->sgt)) {
+		drm_gem_put_pages(&bo->gem, bo->pages, false, false);
+		return PTR_ERR(bo->sgt);
+	}
+
+	return 0;
+}
+
+static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo,
+			  size_t size)
+{
+	struct tegra_drm *tegra = drm->dev_private;
+	int err;
+
+	if (tegra->domain) {
+		err = tegra_bo_get_pages(drm, bo, size);
+		if (err < 0)
+			return err;
+
+		err = tegra_bo_iommu_map(tegra, bo);
+		if (err < 0) {
+			tegra_bo_free(drm, bo);
+			return err;
+		}
+	} else {
+		bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
+						   GFP_KERNEL | __GFP_NOWARN);
+		if (!bo->vaddr) {
+			dev_err(drm->dev,
+				"failed to allocate buffer of size %zu\n",
+				size);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+struct tegra_bo *tegra_bo_create(struct drm_device *drm, size_t size,
+				 unsigned long flags)
+{
+	struct tegra_bo *bo;
+	int err;
+
+	bo = tegra_bo_alloc_object(drm, size);
+	if (IS_ERR(bo))
+		return bo;
+
+	err = tegra_bo_alloc(drm, bo, size);
+	if (err < 0)
+		goto release;
 
 	if (flags & DRM_TEGRA_GEM_CREATE_TILED)
 		bo->tiling.mode = TEGRA_BO_TILING_MODE_TILED;
@@ -134,69 +286,52 @@
 
 	return bo;
 
-err_mmap:
+release:
 	drm_gem_object_release(&bo->gem);
-err_init:
-	tegra_bo_destroy(drm, bo);
-err_dma:
 	kfree(bo);
-
 	return ERR_PTR(err);
 }
 
 struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
 					     struct drm_device *drm,
-					     unsigned int size,
+					     size_t size,
 					     unsigned long flags,
-					     unsigned int *handle)
+					     u32 *handle)
 {
 	struct tegra_bo *bo;
-	int ret;
+	int err;
 
 	bo = tegra_bo_create(drm, size, flags);
 	if (IS_ERR(bo))
 		return bo;
 
-	ret = drm_gem_handle_create(file, &bo->gem, handle);
-	if (ret)
-		goto err;
+	err = drm_gem_handle_create(file, &bo->gem, handle);
+	if (err) {
+		tegra_bo_free_object(&bo->gem);
+		return ERR_PTR(err);
+	}
 
 	drm_gem_object_unreference_unlocked(&bo->gem);
 
 	return bo;
-
-err:
-	tegra_bo_free_object(&bo->gem);
-	return ERR_PTR(ret);
 }
 
 static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
 					struct dma_buf *buf)
 {
+	struct tegra_drm *tegra = drm->dev_private;
 	struct dma_buf_attachment *attach;
 	struct tegra_bo *bo;
-	ssize_t size;
 	int err;
 
-	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-	if (!bo)
-		return ERR_PTR(-ENOMEM);
-
-	host1x_bo_init(&bo->base, &tegra_bo_ops);
-	size = round_up(buf->size, PAGE_SIZE);
-
-	err = drm_gem_object_init(drm, &bo->gem, size);
-	if (err < 0)
-		goto free;
-
-	err = drm_gem_create_mmap_offset(&bo->gem);
-	if (err < 0)
-		goto release;
+	bo = tegra_bo_alloc_object(drm, buf->size);
+	if (IS_ERR(bo))
+		return bo;
 
 	attach = dma_buf_attach(buf, drm->dev);
 	if (IS_ERR(attach)) {
 		err = PTR_ERR(attach);
-		goto free_mmap;
+		goto free;
 	}
 
 	get_dma_buf(buf);
@@ -212,12 +347,19 @@
 		goto detach;
 	}
 
-	if (bo->sgt->nents > 1) {
-		err = -EINVAL;
-		goto detach;
+	if (tegra->domain) {
+		err = tegra_bo_iommu_map(tegra, bo);
+		if (err < 0)
+			goto detach;
+	} else {
+		if (bo->sgt->nents > 1) {
+			err = -EINVAL;
+			goto detach;
+		}
+
+		bo->paddr = sg_dma_address(bo->sgt->sgl);
 	}
 
-	bo->paddr = sg_dma_address(bo->sgt->sgl);
 	bo->gem.import_attach = attach;
 
 	return bo;
@@ -228,47 +370,41 @@
 
 	dma_buf_detach(buf, attach);
 	dma_buf_put(buf);
-free_mmap:
-	drm_gem_free_mmap_offset(&bo->gem);
-release:
-	drm_gem_object_release(&bo->gem);
 free:
+	drm_gem_object_release(&bo->gem);
 	kfree(bo);
-
 	return ERR_PTR(err);
 }
 
 void tegra_bo_free_object(struct drm_gem_object *gem)
 {
+	struct tegra_drm *tegra = gem->dev->dev_private;
 	struct tegra_bo *bo = to_tegra_bo(gem);
 
+	if (tegra->domain)
+		tegra_bo_iommu_unmap(tegra, bo);
+
 	if (gem->import_attach) {
 		dma_buf_unmap_attachment(gem->import_attach, bo->sgt,
 					 DMA_TO_DEVICE);
 		drm_prime_gem_destroy(gem, NULL);
 	} else {
-		tegra_bo_destroy(gem->dev, bo);
+		tegra_bo_free(gem->dev, bo);
 	}
 
-	drm_gem_free_mmap_offset(gem);
 	drm_gem_object_release(gem);
-
 	kfree(bo);
 }
 
 int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
 			 struct drm_mode_create_dumb *args)
 {
-	int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+	unsigned int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct tegra_bo *bo;
 
-	min_pitch = round_up(min_pitch, tegra->pitch_align);
-	if (args->pitch < min_pitch)
-		args->pitch = min_pitch;
-
-	if (args->size < args->pitch * args->height)
-		args->size = args->pitch * args->height;
+	args->pitch = round_up(min_pitch, tegra->pitch_align);
+	args->size = args->pitch * args->height;
 
 	bo = tegra_bo_create_with_handle(file, drm, args->size, 0,
 					 &args->handle);
@@ -279,7 +415,7 @@
 }
 
 int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
-			     uint32_t handle, uint64_t *offset)
+			     u32 handle, u64 *offset)
 {
 	struct drm_gem_object *gem;
 	struct tegra_bo *bo;
@@ -304,7 +440,38 @@
 	return 0;
 }
 
+static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *gem = vma->vm_private_data;
+	struct tegra_bo *bo = to_tegra_bo(gem);
+	struct page *page;
+	pgoff_t offset;
+	int err;
+
+	if (!bo->pages)
+		return VM_FAULT_SIGBUS;
+
+	offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
+	page = bo->pages[offset];
+
+	err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+	switch (err) {
+	case -EAGAIN:
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		return VM_FAULT_NOPAGE;
+
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	}
+
+	return VM_FAULT_SIGBUS;
+}
+
 const struct vm_operations_struct tegra_bo_vm_ops = {
+	.fault = tegra_bo_fault,
 	.open = drm_gem_vm_open,
 	.close = drm_gem_vm_close,
 };
@@ -322,12 +489,30 @@
 	gem = vma->vm_private_data;
 	bo = to_tegra_bo(gem);
 
-	ret = remap_pfn_range(vma, vma->vm_start, bo->paddr >> PAGE_SHIFT,
-			      vma->vm_end - vma->vm_start, vma->vm_page_prot);
-	if (ret)
-		drm_gem_vm_close(vma);
+	if (!bo->pages) {
+		unsigned long vm_pgoff = vma->vm_pgoff;
 
-	return ret;
+		vma->vm_flags &= ~VM_PFNMAP;
+		vma->vm_pgoff = 0;
+
+		ret = dma_mmap_writecombine(gem->dev->dev, vma, bo->vaddr,
+					    bo->paddr, gem->size);
+		if (ret) {
+			drm_gem_vm_close(vma);
+			return ret;
+		}
+
+		vma->vm_pgoff = vm_pgoff;
+	} else {
+		pgprot_t prot = vm_get_page_prot(vma->vm_flags);
+
+		vma->vm_flags |= VM_MIXEDMAP;
+		vma->vm_flags &= ~VM_PFNMAP;
+
+		vma->vm_page_prot = pgprot_writecombine(prot);
+	}
+
+	return 0;
 }
 
 static struct sg_table *
@@ -342,21 +527,44 @@
 	if (!sgt)
 		return NULL;
 
-	if (sg_alloc_table(sgt, 1, GFP_KERNEL)) {
-		kfree(sgt);
-		return NULL;
+	if (bo->pages) {
+		struct scatterlist *sg;
+		unsigned int i;
+
+		if (sg_alloc_table(sgt, bo->num_pages, GFP_KERNEL))
+			goto free;
+
+		for_each_sg(sgt->sgl, sg, bo->num_pages, i)
+			sg_set_page(sg, bo->pages[i], PAGE_SIZE, 0);
+
+		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
+			goto free;
+	} else {
+		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
+			goto free;
+
+		sg_dma_address(sgt->sgl) = bo->paddr;
+		sg_dma_len(sgt->sgl) = gem->size;
 	}
 
-	sg_dma_address(sgt->sgl) = bo->paddr;
-	sg_dma_len(sgt->sgl) = gem->size;
-
 	return sgt;
+
+free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return NULL;
 }
 
 static void tegra_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 					  struct sg_table *sgt,
 					  enum dma_data_direction dir)
 {
+	struct drm_gem_object *gem = attach->dmabuf->priv;
+	struct tegra_bo *bo = to_tegra_bo(gem);
+
+	if (bo->pages)
+		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+
 	sg_free_table(sgt);
 	kfree(sgt);
 }
diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h
index 6538b56..6c5f12a 100644
--- a/drivers/gpu/drm/tegra/gem.h
+++ b/drivers/gpu/drm/tegra/gem.h
@@ -38,6 +38,12 @@
 	dma_addr_t paddr;
 	void *vaddr;
 
+	struct drm_mm_node *mm;
+	unsigned long num_pages;
+	struct page **pages;
+	/* size of IOMMU mapping */
+	size_t size;
+
 	struct tegra_bo_tiling tiling;
 };
 
@@ -46,18 +52,18 @@
 	return container_of(gem, struct tegra_bo, gem);
 }
 
-struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size,
+struct tegra_bo *tegra_bo_create(struct drm_device *drm, size_t size,
 				 unsigned long flags);
 struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
 					     struct drm_device *drm,
-					     unsigned int size,
+					     size_t size,
 					     unsigned long flags,
-					     unsigned int *handle);
+					     u32 *handle);
 void tegra_bo_free_object(struct drm_gem_object *gem);
 int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
 			 struct drm_mode_create_dumb *args);
 int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
-			     uint32_t handle, uint64_t *offset);
+			     u32 handle, u64 *offset);
 
 int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 0c67d7e..6a5c7b8 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -157,22 +157,18 @@
 
 static void tegra_encoder_prepare(struct drm_encoder *encoder)
 {
+	tegra_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
 }
 
 static void tegra_encoder_commit(struct drm_encoder *encoder)
 {
+	tegra_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
 }
 
 static void tegra_encoder_mode_set(struct drm_encoder *encoder,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted)
 {
-	struct tegra_output *output = encoder_to_output(encoder);
-	int err;
-
-	err = tegra_output_enable(output);
-	if (err < 0)
-		dev_err(encoder->dev->dev, "tegra_output_enable(): %d\n", err);
 }
 
 static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
@@ -187,7 +183,8 @@
 {
 	struct tegra_output *output = data;
 
-	drm_helper_hpd_irq_event(output->connector.dev);
+	if (output->connector.dev)
+		drm_helper_hpd_irq_event(output->connector.dev);
 
 	return IRQ_HANDLED;
 }
@@ -259,6 +256,13 @@
 		}
 
 		output->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+		/*
+		 * Disable the interrupt until the connector has been
+		 * initialized to avoid a race in the hotplug interrupt
+		 * handler.
+		 */
+		disable_irq(output->hpd_irq);
 	}
 
 	return 0;
@@ -324,10 +328,27 @@
 
 	output->encoder.possible_crtcs = 0x3;
 
+	/*
+	 * The connector is now registered and ready to receive hotplug events
+	 * so the hotplug interrupt can be enabled.
+	 */
+	if (gpio_is_valid(output->hpd_gpio))
+		enable_irq(output->hpd_irq);
+
 	return 0;
 }
 
 int tegra_output_exit(struct tegra_output *output)
 {
+	/*
+	 * The connector is going away, so the interrupt must be disabled to
+	 * prevent the hotplug interrupt handler from potentially crashing.
+	 */
+	if (gpio_is_valid(output->hpd_gpio))
+		disable_irq(output->hpd_irq);
+
+	if (output->panel)
+		drm_panel_detach(output->panel);
+
 	return 0;
 }
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 3995255..5a8c8d5 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -97,7 +97,7 @@
 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 {
 	u32 pos = pb->pos;
-	u32 *p = (u32 *)((u32)pb->mapped + pos);
+	u32 *p = (u32 *)((void *)pb->mapped + pos);
 	WARN_ON(pos == pb->fence);
 	*(p++) = op1;
 	*(p++) = op2;
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 313c4b7..470087a 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -42,7 +42,7 @@
  */
 
 struct push_buffer {
-	u32 *mapped;			/* mapped pushbuffer memory */
+	void *mapped;			/* mapped pushbuffer memory */
 	dma_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */
 	u32 pos;			/* index to write to */
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 6b09b71..305ea8f 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -26,11 +26,11 @@
 #include "../debug.h"
 
 /*
- * Put the restart at the end of pushbuffer memor
+ * Put the restart at the end of pushbuffer memory
  */
 static void push_buffer_init(struct push_buffer *pb)
 {
-	*(pb->mapped + (pb->size_bytes >> 2)) = host1x_opcode_restart(0);
+	*(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0);
 }
 
 /*
@@ -51,11 +51,11 @@
 
 	/* NOP all the PB slots */
 	while (nr_slots--) {
-		u32 *p = (u32 *)((u32)pb->mapped + getptr);
+		u32 *p = (u32 *)(pb->mapped + getptr);
 		*(p++) = HOST1X_OPCODE_NOP;
 		*(p++) = HOST1X_OPCODE_NOP;
-		dev_dbg(host1x->dev, "%s: NOP at %#llx\n", __func__,
-			(u64)pb->phys + getptr);
+		dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__,
+			&pb->phys, getptr);
 		getptr = (getptr + 8) & (pb->size_bytes - 1);
 	}
 	wmb();
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4608257..946c332 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -32,6 +32,7 @@
 static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 			       u32 offset, u32 words)
 {
+	struct device *dev = cdma_to_channel(cdma)->dev;
 	void *mem = NULL;
 
 	if (host1x_debug_trace_cmdbuf)
@@ -44,11 +45,14 @@
 		 * of how much you can output to ftrace at once.
 		 */
 		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
-			trace_host1x_cdma_push_gather(
-				dev_name(cdma_to_channel(cdma)->dev),
-				(u32)bo, min(words - i, TRACE_MAX_LENGTH),
-				offset + i * sizeof(u32), mem);
+			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+			offset += i * sizeof(u32);
+
+			trace_host1x_cdma_push_gather(dev_name(dev), bo,
+						      num_words, offset,
+						      mem);
 		}
+
 		host1x_bo_munmap(bo, mem);
 	}
 }
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index f72c873..791de93 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -163,8 +163,8 @@
 				continue;
 			}
 
-			host1x_debug_output(o, "    GATHER at %#llx+%04x, %d words\n",
-					    (u64)g->base, g->offset, g->words);
+			host1x_debug_output(o, "    GATHER at %pad+%#x, %d words\n",
+					    &g->base, g->offset, g->words);
 
 			show_gather(o, g->base + g->offset, g->words, cdma,
 				    g->base, mapped);
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 33a697d..8b3c15d 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -23,7 +23,7 @@
 	u32 words;
 	dma_addr_t base;
 	struct host1x_bo *bo;
-	int offset;
+	u32 offset;
 	bool handled;
 };
 
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index 9882ea1..fbc6ee6 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -49,35 +49,47 @@
 #define MIPI_CAL_CONFIG_DSIC		0x10
 #define MIPI_CAL_CONFIG_DSID		0x11
 
+#define MIPI_CAL_CONFIG_DSIAB_CLK	0x19
+#define MIPI_CAL_CONFIG_DSICD_CLK	0x1a
+#define MIPI_CAL_CONFIG_CSIAB_CLK	0x1b
+#define MIPI_CAL_CONFIG_CSICD_CLK	0x1c
+#define MIPI_CAL_CONFIG_CSIE_CLK	0x1d
+
+/* for data and clock lanes */
 #define MIPI_CAL_CONFIG_SELECT		(1 << 21)
+
+/* for data lanes */
 #define MIPI_CAL_CONFIG_HSPDOS(x)	(((x) & 0x1f) << 16)
 #define MIPI_CAL_CONFIG_HSPUOS(x)	(((x) & 0x1f) <<  8)
 #define MIPI_CAL_CONFIG_TERMOS(x)	(((x) & 0x1f) <<  0)
 
+/* for clock lanes */
+#define MIPI_CAL_CONFIG_HSCLKPDOSD(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_HSCLKPUOSD(x)	(((x) & 0x1f) <<  0)
+
 #define MIPI_CAL_BIAS_PAD_CFG0		0x16
 #define MIPI_CAL_BIAS_PAD_PDVCLAMP	(1 << 1)
 #define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF	(1 << 0)
 
 #define MIPI_CAL_BIAS_PAD_CFG1		0x17
+#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16)
 
 #define MIPI_CAL_BIAS_PAD_CFG2		0x18
 #define MIPI_CAL_BIAS_PAD_PDVREG	(1 << 1)
 
-static const struct module {
-	unsigned long reg;
-} modules[] = {
-	{ .reg = MIPI_CAL_CONFIG_CSIA },
-	{ .reg = MIPI_CAL_CONFIG_CSIB },
-	{ .reg = MIPI_CAL_CONFIG_CSIC },
-	{ .reg = MIPI_CAL_CONFIG_CSID },
-	{ .reg = MIPI_CAL_CONFIG_CSIE },
-	{ .reg = MIPI_CAL_CONFIG_DSIA },
-	{ .reg = MIPI_CAL_CONFIG_DSIB },
-	{ .reg = MIPI_CAL_CONFIG_DSIC },
-	{ .reg = MIPI_CAL_CONFIG_DSID },
+struct tegra_mipi_pad {
+	unsigned long data;
+	unsigned long clk;
+};
+
+struct tegra_mipi_soc {
+	bool has_clk_lane;
+	const struct tegra_mipi_pad *pads;
+	unsigned int num_pads;
 };
 
 struct tegra_mipi {
+	const struct tegra_mipi_soc *soc;
 	void __iomem *regs;
 	struct mutex lock;
 	struct clk *clk;
@@ -90,16 +102,16 @@
 	unsigned long pads;
 };
 
-static inline unsigned long tegra_mipi_readl(struct tegra_mipi *mipi,
-					     unsigned long reg)
+static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi,
+				   unsigned long offset)
 {
-	return readl(mipi->regs + (reg << 2));
+	return readl(mipi->regs + (offset << 2));
 }
 
-static inline void tegra_mipi_writel(struct tegra_mipi *mipi,
-				     unsigned long value, unsigned long reg)
+static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value,
+				     unsigned long offset)
 {
-	writel(value, mipi->regs + (reg << 2));
+	writel(value, mipi->regs + (offset << 2));
 }
 
 struct tegra_mipi_device *tegra_mipi_request(struct device *device)
@@ -117,36 +129,35 @@
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev) {
-		of_node_put(args.np);
 		err = -ENOMEM;
 		goto out;
 	}
 
 	dev->pdev = of_find_device_by_node(args.np);
 	if (!dev->pdev) {
-		of_node_put(args.np);
 		err = -ENODEV;
 		goto free;
 	}
 
-	of_node_put(args.np);
-
 	dev->mipi = platform_get_drvdata(dev->pdev);
 	if (!dev->mipi) {
 		err = -EPROBE_DEFER;
-		goto pdev_put;
+		goto put;
 	}
 
+	of_node_put(args.np);
+
 	dev->pads = args.args[0];
 	dev->device = device;
 
 	return dev;
 
-pdev_put:
+put:
 	platform_device_put(dev->pdev);
 free:
 	kfree(dev);
 out:
+	of_node_put(args.np);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(tegra_mipi_request);
@@ -161,7 +172,7 @@
 static int tegra_mipi_wait(struct tegra_mipi *mipi)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(250);
-	unsigned long value;
+	u32 value;
 
 	while (time_before(jiffies, timeout)) {
 		value = tegra_mipi_readl(mipi, MIPI_CAL_STATUS);
@@ -177,8 +188,9 @@
 
 int tegra_mipi_calibrate(struct tegra_mipi_device *device)
 {
-	unsigned long value;
+	const struct tegra_mipi_soc *soc = device->mipi->soc;
 	unsigned int i;
+	u32 value;
 	int err;
 
 	err = clk_enable(device->mipi->clk);
@@ -192,23 +204,35 @@
 	value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
 	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
 
+	tegra_mipi_writel(device->mipi, MIPI_CAL_BIAS_PAD_DRV_DN_REF(2),
+			  MIPI_CAL_BIAS_PAD_CFG1);
+
 	value = tegra_mipi_readl(device->mipi, MIPI_CAL_BIAS_PAD_CFG2);
 	value &= ~MIPI_CAL_BIAS_PAD_PDVREG;
 	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
 
-	for (i = 0; i < ARRAY_SIZE(modules); i++) {
-		if (device->pads & BIT(i))
-			value = MIPI_CAL_CONFIG_SELECT |
-				MIPI_CAL_CONFIG_HSPDOS(0) |
-				MIPI_CAL_CONFIG_HSPUOS(4) |
-				MIPI_CAL_CONFIG_TERMOS(5);
-		else
-			value = 0;
+	for (i = 0; i < soc->num_pads; i++) {
+		u32 clk = 0, data = 0;
 
-		tegra_mipi_writel(device->mipi, value, modules[i].reg);
+		if (device->pads & BIT(i)) {
+			data = MIPI_CAL_CONFIG_SELECT |
+			       MIPI_CAL_CONFIG_HSPDOS(0) |
+			       MIPI_CAL_CONFIG_HSPUOS(4) |
+			       MIPI_CAL_CONFIG_TERMOS(5);
+			clk = MIPI_CAL_CONFIG_SELECT |
+			      MIPI_CAL_CONFIG_HSCLKPDOSD(0) |
+			      MIPI_CAL_CONFIG_HSCLKPUOSD(4);
+		}
+
+		tegra_mipi_writel(device->mipi, data, soc->pads[i].data);
+
+		if (soc->has_clk_lane)
+			tegra_mipi_writel(device->mipi, clk, soc->pads[i].clk);
 	}
 
-	tegra_mipi_writel(device->mipi, MIPI_CAL_CTRL_START, MIPI_CAL_CTRL);
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
+	value |= MIPI_CAL_CTRL_START;
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
 
 	err = tegra_mipi_wait(device->mipi);
 
@@ -219,16 +243,63 @@
 }
 EXPORT_SYMBOL(tegra_mipi_calibrate);
 
+static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA },
+	{ .data = MIPI_CAL_CONFIG_CSIB },
+	{ .data = MIPI_CAL_CONFIG_CSIC },
+	{ .data = MIPI_CAL_CONFIG_CSID },
+	{ .data = MIPI_CAL_CONFIG_CSIE },
+	{ .data = MIPI_CAL_CONFIG_DSIA },
+	{ .data = MIPI_CAL_CONFIG_DSIB },
+	{ .data = MIPI_CAL_CONFIG_DSIC },
+	{ .data = MIPI_CAL_CONFIG_DSID },
+};
+
+static const struct tegra_mipi_soc tegra114_mipi_soc = {
+	.has_clk_lane = false,
+	.pads = tegra114_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra114_mipi_pads),
+};
+
+static const struct tegra_mipi_pad tegra124_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIAB_CLK },
+};
+
+static const struct tegra_mipi_soc tegra124_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+};
+
+static struct of_device_id tegra_mipi_of_match[] = {
+	{ .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc },
+	{ .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc },
+	{ },
+};
+
 static int tegra_mipi_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *match;
 	struct tegra_mipi *mipi;
 	struct resource *res;
 	int err;
 
+	match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
+	if (!match)
+		return -ENODEV;
+
 	mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL);
 	if (!mipi)
 		return -ENOMEM;
 
+	mipi->soc = match->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mipi->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(mipi->regs))
@@ -260,11 +331,6 @@
 	return 0;
 }
 
-static struct of_device_id tegra_mipi_of_match[] = {
-	{ .compatible = "nvidia,tegra114-mipi", },
-	{ },
-};
-
 struct platform_driver tegra_mipi_driver = {
 	.driver = {
 		.name = "tegra-mipi",
diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h
index 94db6a2..6311636 100644
--- a/include/trace/events/host1x.h
+++ b/include/trace/events/host1x.h
@@ -29,6 +29,8 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
+struct host1x_bo;
+
 DECLARE_EVENT_CLASS(host1x,
 	TP_PROTO(const char *name),
 	TP_ARGS(name),
@@ -79,14 +81,14 @@
 );
 
 TRACE_EVENT(host1x_cdma_push_gather,
-	TP_PROTO(const char *name, u32 mem_id,
+	TP_PROTO(const char *name, struct host1x_bo *bo,
 			u32 words, u32 offset, void *cmdbuf),
 
-	TP_ARGS(name, mem_id, words, offset, cmdbuf),
+	TP_ARGS(name, bo, words, offset, cmdbuf),
 
 	TP_STRUCT__entry(
 		__field(const char *, name)
-		__field(u32, mem_id)
+		__field(struct host1x_bo *, bo)
 		__field(u32, words)
 		__field(u32, offset)
 		__field(bool, cmdbuf)
@@ -100,13 +102,13 @@
 		}
 		__entry->cmdbuf = cmdbuf;
 		__entry->name = name;
-		__entry->mem_id = mem_id;
+		__entry->bo = bo;
 		__entry->words = words;
 		__entry->offset = offset;
 	),
 
-	TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]",
-	  __entry->name, __entry->mem_id,
+	TP_printk("name=%s, bo=%p, words=%u, offset=%d, contents=[%s]",
+	  __entry->name, __entry->bo,
 	  __entry->words, __entry->offset,
 	  __print_hex(__get_dynamic_array(cmdbuf),
 		  __entry->cmdbuf ? __entry->words * 4 : 0))
@@ -221,12 +223,13 @@
 );
 
 TRACE_EVENT(host1x_syncpt_wait_check,
-	TP_PROTO(void *mem_id, u32 offset, u32 syncpt_id, u32 thresh, u32 min),
+	TP_PROTO(struct host1x_bo *bo, u32 offset, u32 syncpt_id, u32 thresh,
+		 u32 min),
 
-	TP_ARGS(mem_id, offset, syncpt_id, thresh, min),
+	TP_ARGS(bo, offset, syncpt_id, thresh, min),
 
 	TP_STRUCT__entry(
-		__field(void *, mem_id)
+		__field(struct host1x_bo *, bo)
 		__field(u32, offset)
 		__field(u32, syncpt_id)
 		__field(u32, thresh)
@@ -234,15 +237,15 @@
 	),
 
 	TP_fast_assign(
-		__entry->mem_id = mem_id;
+		__entry->bo = bo;
 		__entry->offset = offset;
 		__entry->syncpt_id = syncpt_id;
 		__entry->thresh = thresh;
 		__entry->min = min;
 	),
 
-	TP_printk("mem_id=%p, offset=%05x, id=%d, thresh=%d, current=%d",
-		__entry->mem_id, __entry->offset,
+	TP_printk("bo=%p, offset=%05x, id=%d, thresh=%d, current=%d",
+		__entry->bo, __entry->offset,
 		__entry->syncpt_id, __entry->thresh,
 		__entry->min)
 );