drm/radeon/kms: evergreen/ni big endian fixes (v2)

Based on 6xx/7xx endian fixes from Cédric Cano.

v2: fix typo in shader

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index ffdc833..d270b3f 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1192,7 +1192,11 @@
 	radeon_ring_write(rdev, 1);
 	/* FIXME: implement */
 	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(rdev, ib->gpu_addr & 0xFFFFFFFC);
+	radeon_ring_write(rdev,
+#ifdef __BIG_ENDIAN
+			  (2 << 0) |
+#endif
+			  (ib->gpu_addr & 0xFFFFFFFC));
 	radeon_ring_write(rdev, upper_32_bits(ib->gpu_addr) & 0xFF);
 	radeon_ring_write(rdev, ib->length_dw);
 }
@@ -1207,7 +1211,11 @@
 		return -EINVAL;
 
 	r700_cp_stop(rdev);
-	WREG32(CP_RB_CNTL, RB_NO_UPDATE | (15 << 8) | (3 << 0));
+	WREG32(CP_RB_CNTL,
+#ifdef __BIG_ENDIAN
+	       BUF_SWAP_32BIT |
+#endif
+	       RB_NO_UPDATE | RB_BLKSZ(15) | RB_BUFSZ(3));
 
 	fw_data = (const __be32 *)rdev->pfp_fw->data;
 	WREG32(CP_PFP_UCODE_ADDR, 0);
@@ -1326,7 +1334,11 @@
 	WREG32(CP_RB_WPTR, 0);
 
 	/* set the wb address wether it's enabled or not */
-	WREG32(CP_RB_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
+	WREG32(CP_RB_RPTR_ADDR,
+#ifdef __BIG_ENDIAN
+	       RB_RPTR_SWAP(2) |
+#endif
+	       ((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC));
 	WREG32(CP_RB_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
 
@@ -2627,8 +2639,8 @@
 	while (rptr != wptr) {
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;
-		src_id =  rdev->ih.ring[ring_index] & 0xff;
-		src_data = rdev->ih.ring[ring_index + 1] & 0xfffffff;
+		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
+		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
 
 		switch (src_id) {
 		case 1: /* D1 vblank/vline */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index a1ba4b3..a7b7a33 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -133,6 +133,9 @@
 
 	/* high addr, stride */
 	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+#ifdef __BIG_ENDIAN
+	sq_vtx_constant_word2 |= (2 << 30);
+#endif
 	/* xyzw swizzles */
 	sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
 
@@ -221,7 +224,11 @@
 	radeon_ring_write(rdev, DI_PT_RECTLIST);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
-	radeon_ring_write(rdev, DI_INDEX_SIZE_16_BIT);
+	radeon_ring_write(rdev,
+#ifdef __BIG_ENDIAN
+			  (2 << 2) |
+#endif
+			  DI_INDEX_SIZE_16_BIT);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
 	radeon_ring_write(rdev, 1);
@@ -541,7 +548,7 @@
 int evergreen_blit_init(struct radeon_device *rdev)
 {
 	u32 obj_size;
-	int r, dwords;
+	int i, r, dwords;
 	void *ptr;
 	u32 packet2s[16];
 	int num_packet2s = 0;
@@ -557,7 +564,7 @@
 
 	dwords = rdev->r600_blit.state_len;
 	while (dwords & 0xf) {
-		packet2s[num_packet2s++] = PACKET2(0);
+		packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
 		dwords++;
 	}
 
@@ -598,8 +605,10 @@
 	if (num_packet2s)
 		memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
 			    packet2s, num_packet2s * 4);
-	memcpy(ptr + rdev->r600_blit.vs_offset, evergreen_vs, evergreen_vs_size * 4);
-	memcpy(ptr + rdev->r600_blit.ps_offset, evergreen_ps, evergreen_ps_size * 4);
+	for (i = 0; i < evergreen_vs_size; i++)
+		*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
+	for (i = 0; i < evergreen_ps_size; i++)
+		*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
 	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
index ef1d28c..3a10399e 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
@@ -311,11 +311,19 @@
 	0x00000000,
 	0x3c000000,
 	0x67961001,
+#ifdef __BIG_ENDIAN
+	0x000a0000,
+#else
 	0x00080000,
+#endif
 	0x00000000,
 	0x1c000000,
 	0x67961000,
+#ifdef __BIG_ENDIAN
+	0x00020008,
+#else
 	0x00000008,
+#endif
 	0x00000000,
 };
 
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index afec1ac..eb4acf4 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -98,6 +98,7 @@
 #define		BUF_SWAP_32BIT					(2 << 16)
 #define	CP_RB_RPTR					0x8700
 #define	CP_RB_RPTR_ADDR					0xC10C
+#define		RB_RPTR_SWAP(x)					((x) << 0)
 #define	CP_RB_RPTR_ADDR_HI				0xC110
 #define	CP_RB_RPTR_WR					0xC108
 #define	CP_RB_WPTR					0xC114