drm/i915: Perform object clflushing asynchronously

Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

v2: Introduce i915_gem_clflush.h and use a new name, split out some
extras into separate patches.

Suggested-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-5-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4d67cbb..2806d6b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,6 +37,7 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
+#include "i915_gem_clflush.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -13188,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
 	int ret;
 
+	if (obj) {
+		if (plane->type == DRM_PLANE_TYPE_CURSOR &&
+		    INTEL_INFO(dev_priv)->cursor_needs_physical) {
+			const int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
+
+			ret = i915_gem_object_attach_phys(obj, align);
+			if (ret) {
+				DRM_DEBUG_KMS("failed to attach phys object\n");
+				return ret;
+			}
+		} else {
+			struct i915_vma *vma;
+
+			vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
+			if (IS_ERR(vma)) {
+				DRM_DEBUG_KMS("failed to pin object\n");
+				return PTR_ERR(vma);
+			}
+
+			to_intel_plane_state(new_state)->vma = vma;
+		}
+	}
+
 	if (!obj && !old_obj)
 		return 0;
 
@@ -13240,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
 	}
 
-	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
-	    INTEL_INFO(dev_priv)->cursor_needs_physical) {
-		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
-		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret) {
-			DRM_DEBUG_KMS("failed to attach phys object\n");
-			return ret;
-		}
-	} else {
-		struct i915_vma *vma;
-
-		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma)) {
-			DRM_DEBUG_KMS("failed to pin object\n");
-			return PTR_ERR(vma);
-		}
-
-		to_intel_plane_state(new_state)->vma = vma;
-	}
-
 	return 0;
 }