drm/i915: Wait for completion of pending flips when starved of fences

On older generations (gen2, gen3) the GPU requires fences for many
operations, such as blits. The display hardware also requires fences for
scanouts and this leads to a situation where an arbitrary number of
fences may be pinned by old scanouts following a pageflip but before we
have executed the unpin workqueue. This is unpredictable by userspace
and leads to random EDEADLK when submitting an otherwise benign
execbuffer. However, we can detect when we have an outstanding flip and
so cause userspace to wait upon their completion before finally
declaring that the system is starved of fences. This is really no worse
than forcing the GPU to stall waiting for older execbuffer to retire and
release their fences before we can reallocate them for the next
execbuffer.

v2: move the test for a pending fb unpin to a common routine for
later reuse during eviction

Reported-and-tested-by: dimon@gmx.net
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73696
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4461336..00c8361 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3106,7 +3106,7 @@
 	}
 
 	if (avail == NULL)
-		return NULL;
+		goto deadlock;
 
 	/* None available, try to steal one or wait for a user to finish */
 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
@@ -3116,7 +3116,12 @@
 		return reg;
 	}
 
-	return NULL;
+deadlock:
+	/* Wait for completion of pending flips which consume fences */
+	if (intel_has_pending_fb_unpin(dev))
+		return ERR_PTR(-EAGAIN);
+
+	return ERR_PTR(-EDEADLK);
 }
 
 /**
@@ -3161,8 +3166,8 @@
 		}
 	} else if (enable) {
 		reg = i915_find_fence_reg(dev);
-		if (reg == NULL)
-			return -EDEADLK;
+		if (IS_ERR(reg))
+			return PTR_ERR(reg);
 
 		if (reg->obj) {
 			struct drm_i915_gem_object *old = reg->obj;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 14b024b..98371ee 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2982,6 +2982,30 @@
 	return pending;
 }
 
+bool intel_has_pending_fb_unpin(struct drm_device *dev)
+{
+	struct intel_crtc *crtc;
+
+	/* Note that we don't need to be called with mode_config.lock here
+	 * as our list of CRTC objects is static for the lifetime of the
+	 * device and so cannot disappear as we iterate. Similarly, we can
+	 * happily treat the predicates as racy, atomic checks as userspace
+	 * cannot claim and pin a new fb without at least acquring the
+	 * struct_mutex and so serialising with us.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+		if (atomic_read(&crtc->unpin_work_count) == 0)
+			continue;
+
+		if (crtc->unpin_work)
+			intel_wait_for_vblank(dev, crtc->pipe);
+
+		return true;
+	}
+
+	return false;
+}
+
 static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8754db9..fbfaaba 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -626,6 +626,7 @@
 
 /* intel_display.c */
 const char *intel_output_name(int output);
+bool intel_has_pending_fb_unpin(struct drm_device *dev);
 int intel_pch_rawclk(struct drm_device *dev);
 void intel_mark_busy(struct drm_device *dev);
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,