drm/i915: Unify active context tracking between legacy/execlists/guc The requests conversion introduced a nasty bug where we could generate a new request in the middle of constructing a request if we needed to idle the system in order to evict space for a context. The request to idle would be executed (and waited upon) before the current one, creating a minor havoc in the seqno accounting, as we will consider the current request to already be completed (prior to deferred seqno assignment) but ring->last_retired_head would have been updated and still could allow us to overwrite the current request before execution. We also employed two different mechanisms to track the active context until it was switched out. The legacy method allowed for waiting upon an active context (it could forcibly evict any vma, including context's), but the execlists method took a step backwards by pinning the vma for the entire active lifespan of the context (the only way to evict was to idle the entire GPU, not individual contexts). However, to circumvent the tricky issue of locking (i.e. we cannot take struct_mutex at the time of i915_gem_request_submit(), where we would want to move the previous context onto the active tracker and unpin it), we take the execlists approach and keep the contexts pinned until retirement. The benefit of the execlists approach, more important for execlists than legacy, was the reduction in work in pinning the context for each request - as the context was kept pinned until idle, it could short circuit the pinning for all active contexts. We introduce new engine vfuncs to pin and unpin the context respectively. The context is pinned at the start of the request, and only unpinned when the following request is retired (this ensures that the context is idle and coherent in main memory before we unpin it). We move the engine->last_context tracking into the retirement itself (rather than during request submission) in order to allow the submission to be reordered or unwound without undue difficultly. And finally an ulterior motive for unifying context handling was to prepare for mock requests. v2: Rename to last_retired_context, split out legacy_context tracking for MI_SET_CONTEXT. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk

commit: e8a9c58fcd9a5081f71f57f370af1347ed6a310b [log] [tgz]
author: Chris Wilson <chris@chris-wilson.co.uk> Sun Dec 18 15:37:20 2016 +0000
committer: Chris Wilson <chris@chris-wilson.co.uk> Sun Dec 18 16:18:50 2016 +0000
tree: 61b8fe4eb3219a120f8e941ac4b0aa4a8f10fa69
parent: ef11c01db405b49d96a0ab2689807ea33ae85dd6 [diff] [blame]
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b848b5f..599afed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c

@@ -512,15 +512,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		RB_CLEAR_NODE(&cursor->priotree.node);
 		cursor->priotree.priority = INT_MAX;
 
-		/* We keep the previous context alive until we retire the
-		 * following request. This ensures that any the context object
-		 * is still pinned for any residual writes the HW makes into it
-		 * on the context switch into the next object following the
-		 * breadcrumb. Otherwise, we may retire the context too early.
-		 */
-		cursor->previous_context = engine->last_context;
-		engine->last_context = cursor->ctx;
-
 		__i915_gem_request_submit(cursor);
 		last = cursor;
 		submit = true;
@@ -772,8 +763,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
 
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
-				struct intel_engine_cs *engine)
+static int execlists_context_pin(struct intel_engine_cs *engine,
+				 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	void *vaddr;
@@ -784,6 +775,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	if (ce->pin_count++)
 		return 0;
 
+	if (!ce->state) {
+		ret = execlists_context_deferred_alloc(ctx, engine);
+		if (ret)
+			goto err;
+	}
+
 	ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
 			   PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
 	if (ret)
@@ -825,8 +822,8 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	return ret;
 }
 
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine)
+static void execlists_context_unpin(struct intel_engine_cs *engine,
+				    struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 
@@ -850,24 +847,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 	struct intel_context *ce = &request->ctx->engine[engine->id];
 	int ret;
 
+	GEM_BUG_ON(!ce->pin_count);
+
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
 	 * have to repeat work.
 	 */
 	request->reserved_space += EXECLISTS_REQUEST_SIZE;
 
-	if (!ce->state) {
-		ret = execlists_context_deferred_alloc(request->ctx, engine);
-		if (ret)
-			return ret;
-	}
-
+	GEM_BUG_ON(!ce->ring);
 	request->ring = ce->ring;
 
-	ret = intel_lr_context_pin(request->ctx, engine);
-	if (ret)
-		return ret;
-
 	if (i915.enable_guc_submission) {
 		/*
 		 * Check that the GuC has space for the request before
@@ -876,7 +866,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 		 */
 		ret = i915_guc_wq_reserve(request);
 		if (ret)
-			goto err_unpin;
+			goto err;
 	}
 
 	ret = intel_ring_begin(request, 0);
@@ -904,8 +894,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request
 err_unreserve:
 	if (i915.enable_guc_submission)
 		i915_guc_wq_unreserve(request);
-err_unpin:
-	intel_lr_context_unpin(request->ctx, engine);
+err:
 	return ret;
 }
 
@@ -1789,13 +1778,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
 	if (engine->cleanup)
 		engine->cleanup(engine);
 
-	intel_engine_cleanup_common(engine);
-
 	if (engine->status_page.vma) {
 		i915_gem_object_unpin_map(engine->status_page.vma->obj);
 		engine->status_page.vma = NULL;
 	}
-	intel_lr_context_unpin(dev_priv->kernel_context, engine);
+
+	intel_engine_cleanup_common(engine);
 
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
@@ -1820,6 +1808,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
 	engine->reset_hw = reset_common_ring;
+
+	engine->context_pin = execlists_context_pin;
+	engine->context_unpin = execlists_context_unpin;
+
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb;
 	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
@@ -1902,18 +1894,6 @@ logical_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		goto error;
 
-	ret = execlists_context_deferred_alloc(dctx, engine);
-	if (ret)
-		goto error;
-
-	/* As this is the default context, always pin it */
-	ret = intel_lr_context_pin(dctx, engine);
-	if (ret) {
-		DRM_ERROR("Failed to pin context for %s: %d\n",
-			  engine->name, ret);
-		goto error;
-	}
-
 	/* And setup the hardware status page. */
 	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
 	if (ret) {
commit	e8a9c58fcd9a5081f71f57f370af1347ed6a310b	[log] [tgz]
author	Chris Wilson <chris@chris-wilson.co.uk>	Sun Dec 18 15:37:20 2016 +0000
committer	Chris Wilson <chris@chris-wilson.co.uk>	Sun Dec 18 16:18:50 2016 +0000
tree	61b8fe4eb3219a120f8e941ac4b0aa4a8f10fa69
parent	ef11c01db405b49d96a0ab2689807ea33ae85dd6 [diff] [blame]