drm/amdgpu: add ctx_id to the WAIT_CS IOCTL (v4)

It is required to support fence per context.

v2: add amdgpu_ctx_get/put
v3: improve get/put
v4: squash hlock fix

Signed-off-by: Jammy Zhou <Jammy.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cef3a43..bf0c607 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1902,6 +1902,8 @@
 							uint32_t id,struct amdgpu_ctx_state *state);
 
 void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 extern int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 						 struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ffbe9aa..86b9324 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -768,8 +768,13 @@
 	uint64_t seq[AMDGPU_MAX_RINGS] = {0};
 	struct amdgpu_ring *ring = NULL;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
+	struct amdgpu_ctx *ctx;
 	long r;
 
+	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
+	if (ctx == NULL)
+		return -EINVAL;
+
 	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
 			       wait->in.ring, &ring);
 	if (r)
@@ -778,6 +783,7 @@
 	seq[ring->idx] = wait->in.handle;
 
 	r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout);
+	amdgpu_ctx_put(ctx);
 	if (r < 0)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0dc3a4e..bcd332e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -151,3 +151,33 @@
 
 	return r;
 }
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
+{
+	struct amdgpu_ctx *ctx;
+	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+
+	mutex_lock(&mgr->lock);
+	ctx = idr_find(&mgr->ctx_handles, id);
+	if (ctx)
+		kref_get(&ctx->refcount);
+	mutex_unlock(&mgr->lock);
+	return ctx;
+}
+
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
+{
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx_mgr *mgr;
+
+	if (ctx == NULL)
+		return -EINVAL;
+
+	fpriv = ctx->fpriv;
+	mgr = &fpriv->ctx_mgr;
+	mutex_lock(&mgr->lock);
+	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
+	mutex_unlock(&mgr->lock);
+
+	return 0;
+}