drm/i915: Convert engine->write_tail to operate on a request
If we rewrite the I915_WRITE_TAIL specialisation for the legacy
ringbuffer as submitting the request onto the ringbuffer, we can unify
the vfunc with both execlists and GuC in the next patch.
v2: Drop the modulus from the I915_WRITE_TAIL as it is currently being
applied in intel_ring_advance() after every command packet, and add a
comment explaining why we need the modulus at all.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1469432687-22756-22-git-send-email-chris@chris-wilson.co.uk
Link: http://patchwork.freedesktop.org/patch/msgid/1470174640-18242-12-git-send-email-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 606b0b8..a885905 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -466,15 +466,13 @@
*/
request->postfix = ring->tail;
- if (i915.enable_execlists) {
+ if (i915.enable_execlists)
ret = engine->emit_request(request);
- } else {
+ else
ret = engine->add_request(request);
-
- request->tail = ring->tail;
- }
/* Not allowed to fail! */
WARN(ret, "emit|add_request failed: %d!\n", ret);
+
/* Sanity check that the reserved size was large enough. */
ret = ring->tail - request_start;
if (ret < 0)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 799a7dc..3142085 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -58,14 +58,6 @@
ring->tail, ring->size);
}
-static void __intel_engine_submit(struct intel_engine_cs *engine)
-{
- struct intel_ring *ring = engine->buffer;
-
- ring->tail &= ring->size - 1;
- engine->write_tail(engine, ring->tail);
-}
-
static int
gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
{
@@ -412,13 +404,6 @@
return gen8_emit_pipe_control(req, flags, scratch_addr);
}
-static void ring_write_tail(struct intel_engine_cs *engine,
- u32 value)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- I915_WRITE_TAIL(engine, value);
-}
-
u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -532,7 +517,7 @@
I915_WRITE_CTL(engine, 0);
I915_WRITE_HEAD(engine, 0);
- engine->write_tail(engine, 0);
+ I915_WRITE_TAIL(engine, 0);
if (!IS_GEN2(dev_priv)) {
(void)I915_READ_CTL(engine);
@@ -1469,7 +1454,10 @@
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, req->fence.seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
- __intel_engine_submit(engine);
+ intel_ring_advance(ring);
+
+ req->tail = ring->tail;
+ engine->submit_request(req);
return 0;
}
@@ -1499,7 +1487,9 @@
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_emit(ring, MI_NOOP);
- __intel_engine_submit(engine);
+
+ req->tail = ring->tail;
+ engine->submit_request(req);
return 0;
}
@@ -1716,11 +1706,21 @@
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, req->fence.seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
- __intel_engine_submit(req->engine);
+ intel_ring_advance(ring);
+
+ req->tail = ring->tail;
+ req->engine->submit_request(req);
return 0;
}
+static void i9xx_submit_request(struct drm_i915_gem_request *request)
+{
+ struct drm_i915_private *dev_priv = request->i915;
+
+ I915_WRITE_TAIL(request->engine, request->tail);
+}
+
static void
gen6_irq_enable(struct intel_engine_cs *engine)
{
@@ -2479,10 +2479,9 @@
rcu_read_unlock();
}
-static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
- u32 value)
+static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *dev_priv = request->i915;
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
@@ -2506,8 +2505,8 @@
DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
/* Now that the ring is fully powered up, update the tail */
- I915_WRITE_FW(RING_TAIL(engine->mmio_base), value);
- POSTING_READ_FW(RING_TAIL(engine->mmio_base));
+ I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail);
+ POSTING_READ_FW(RING_TAIL(request->engine->mmio_base));
/* Let the ring send IDLE messages to the GT again,
* and so let it sleep to conserve power when idle.
@@ -2811,7 +2810,7 @@
struct intel_engine_cs *engine)
{
engine->init_hw = init_ring_common;
- engine->write_tail = ring_write_tail;
+ engine->submit_request = i9xx_submit_request;
engine->add_request = i9xx_add_request;
if (INTEL_GEN(dev_priv) >= 6)
@@ -2895,7 +2894,7 @@
if (INTEL_GEN(dev_priv) >= 6) {
/* gen6 bsd needs a special wa for tail updates */
if (IS_GEN6(dev_priv))
- engine->write_tail = gen6_bsd_ring_write_tail;
+ engine->submit_request = gen6_bsd_submit_request;
engine->emit_flush = gen6_bsd_ring_flush;
if (INTEL_GEN(dev_priv) < 8)
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0c3c718..14d2ea3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -204,8 +204,6 @@
int (*init_context)(struct drm_i915_gem_request *req);
- void (*write_tail)(struct intel_engine_cs *engine,
- u32 value);
int (*add_request)(struct drm_i915_gem_request *req);
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
@@ -296,6 +294,7 @@
#define I915_DISPATCH_SECURE 0x1
#define I915_DISPATCH_PINNED 0x2
#define I915_DISPATCH_RS 0x4
+ void (*submit_request)(struct drm_i915_gem_request *req);
/**
* List of objects currently involved in rendering from the
@@ -461,6 +460,13 @@
static inline void intel_ring_advance(struct intel_ring *ring)
{
+ /* The modulus is required so that we avoid writing
+ * request->tail == ring->size, rather than the expected 0,
+ * into the RING_TAIL register as that can cause a GPU hang.
+ * As this is only strictly required for the request->tail,
+ * and only then as we write the value into hardware, we can
+ * one day remove the modulus after every command packet.
+ */
ring->tail &= ring->size - 1;
}