drm/amd/amdgpu/vcn_v2_0: Mark RB commands as KMD commands

Sets the CMD_SOURCE bit for VCN 2.0 Decoder Ring Buffer commands. This
bit was previously set by the RBC HW on older firmware. Newer firmware
uses a SW RBC and this bit has to be set by the driver.

Signed-off-by: Thong Thai <thong.thai@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 99f14fc..19661c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -30,6 +30,7 @@
 #define AMDGPU_VCN_FIRMWARE_OFFSET	256
 #define AMDGPU_VCN_MAX_ENC_RINGS	3
 
+#define VCN_DEC_KMD_CMD 		0x80000000
 #define VCN_DEC_CMD_FENCE		0x00000000
 #define VCN_DEC_CMD_TRAP		0x00000001
 #define VCN_DEC_CMD_WRITE_REG		0x00000004
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 1cfc262..46593e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -1485,7 +1485,7 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
-	amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1));
 }
 
 /**
@@ -1498,7 +1498,7 @@ static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring)
 static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring)
 {
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
-	amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1));
 }
 
 /**
@@ -1543,7 +1543,7 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
-	amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1));
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0));
 	amdgpu_ring_write(ring, 0);
@@ -1553,7 +1553,7 @@ static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
 
-	amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1));
 }
 
 /**
@@ -1597,7 +1597,7 @@ static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
 
-	amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1));
 }
 
 static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
@@ -1626,7 +1626,7 @@ static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
 
 	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0));
 
-	amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
+	amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1));
 }
 
 /**