viafb: complete support for VX800/VX855 accelerated framebuffer

This patch is a painful merge of change
a90bab567ece3e915d0ccd55ab00c9bb333fa8c0 (viafb: Add support for 2D
accelerated framebuffer on VX800/VX855) in the OLPC tree, originally by
Harald Welte.  Harald's changelog read:

	The VX800/VX820 and the VX855/VX875 chipsets have a different 2D
    	acceleration engine called "M1".  The M1 engine has some subtle
    	(and some not-so-subtle) differences to the previous engines, so
    	support for accelerated framebuffer on those chipsets was disabled
    	so far.

This merge tries to preserve Harald's changes in the framework of the
much-changed 2.6.34 viafb code.

Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: ScottFang@viatech.com.cn
Cc: JosephChan@via.com.tw
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 7c1d9c4..0d90c85 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -317,6 +317,7 @@
 {
 	struct viafb_par *viapar = info->par;
 	void __iomem *engine;
+	int highest_reg, i;
 	u32 vq_start_addr, vq_end_addr, vq_start_low, vq_end_low, vq_high,
 		vq_len, chip_name = viapar->shared->chip_info.gfx_chip_name;
 
@@ -328,6 +329,18 @@
 		return -ENOMEM;
 	}
 
+	/* Initialize registers to reset the 2D engine */
+	switch (viapar->shared->chip_info.twod_engine) {
+	case VIA_2D_ENG_M1:
+		highest_reg = 0x5c;
+		break;
+	default:
+		highest_reg = 0x40;
+		break;
+	}
+	for (i = 0; i <= highest_reg; i += 4)
+		writel(0x0, engine + i);
+
 	switch (chip_name) {
 	case UNICHROME_CLE266:
 	case UNICHROME_K400:
@@ -357,13 +370,12 @@
 	viapar->shared->vq_vram_addr = viapar->fbmem_free;
 	viapar->fbmem_used += VQ_SIZE;
 
-	/* Init 2D engine reg to reset 2D engine */
-	writel(0x0, engine + VIA_REG_KEYCONTROL);
-
 	/* Init AGP and VQ regs */
 	switch (chip_name) {
 	case UNICHROME_K8M890:
 	case UNICHROME_P4M900:
+	case UNICHROME_VX800:
+	case UNICHROME_VX855:
 		writel(0x00100000, engine + VIA_REG_CR_TRANSET);
 		writel(0x680A0000, engine + VIA_REG_CR_TRANSPACE);
 		writel(0x02000000, engine + VIA_REG_CR_TRANSPACE);
@@ -398,6 +410,8 @@
 	switch (chip_name) {
 	case UNICHROME_K8M890:
 	case UNICHROME_P4M900:
+	case UNICHROME_VX800:
+	case UNICHROME_VX855:
 		vq_start_low |= 0x20000000;
 		vq_end_low |= 0x20000000;
 		vq_high |= 0x20000000;
@@ -475,15 +489,25 @@
 {
 	struct viafb_par *viapar = info->par;
 	int loop = 0;
+	u32 mask;
 
-	while (!(readl(viapar->shared->engine_mmio + VIA_REG_STATUS) &
-			VIA_VR_QUEUE_BUSY) && (loop < MAXLOOP)) {
-		loop++;
-		cpu_relax();
+	switch (viapar->shared->chip_info.twod_engine) {
+	case VIA_2D_ENG_H5:
+	case VIA_2D_ENG_M1:
+		mask = VIA_CMD_RGTR_BUSY_M1 | VIA_2D_ENG_BUSY_M1 |
+			      VIA_3D_ENG_BUSY_M1;
+		break;
+	default:
+		while (!(readl(viapar->shared->engine_mmio + VIA_REG_STATUS) &
+				VIA_VR_QUEUE_BUSY) && (loop < MAXLOOP)) {
+			loop++;
+			cpu_relax();
+		}
+		mask = VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY;
+		break;
 	}
 
-	while ((readl(viapar->shared->engine_mmio + VIA_REG_STATUS) &
-		    (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY | VIA_3D_ENG_BUSY)) &&
+	while ((readl(viapar->shared->engine_mmio + VIA_REG_STATUS) & mask) &&
 		    (loop < MAXLOOP)) {
 		loop++;
 		cpu_relax();