Merge "Fix color cube tests."
diff --git a/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/ColorCube.java b/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/ColorCube.java
index d03466d..d1dd47c 100644
--- a/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/ColorCube.java
+++ b/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/ColorCube.java
@@ -53,10 +53,10 @@
             for (int y = 0; y < sy; y++) {
                 for (int x = 0; x < sx; x++ ) {
 
-                    dat[z*sy*sx + y*sx + x] = 0xff000000 |
-                        ((x | (x<<2)) << 16) |
-                        ((y | (y<<2)) << 8) |
-                        ((z | (z<<2)) << 0);
+                    dat[z*sy*sx + y*sx + x] = //0xff000000 |
+                        (((x >> 2) | (x<<3)) << 0) |
+                        (((y >> 2) | (y<<3)) << 8) |
+                        ((z | (z<<4)) << 16);
 
 
                 }
diff --git a/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/colorcube.rs b/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/colorcube.rs
index 97bb429..09b3f69 100644
--- a/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/colorcube.rs
+++ b/tests/RenderScriptTests/ImageProcessing/src/com/android/rs/image/colorcube.rs
@@ -20,36 +20,78 @@
 
 
 static rs_allocation gCube;
-static int4 gDims;
-static int4 gFracMask;
-static int4 gFracBits;
+static short4 gDims;
+static short4 gFracMask;
+static short4 gFracBits;
+static short4 gFracShift;
+static int4 gFinalShift;
+static int4 gFinalAdd;
 
 void setCube(rs_allocation c) {
     gCube = c;
-    gDims.x = rsAllocationGetDimX(gCube);
-    gDims.y = rsAllocationGetDimY(gCube);
-    gDims.z = rsAllocationGetDimZ(gCube);
+    gDims.x = rsAllocationGetDimX(gCube) - 1;
+    gDims.y = rsAllocationGetDimY(gCube) - 1;
+    gDims.z = rsAllocationGetDimZ(gCube) - 1;
     gDims.w = 0;
 
-    gFracMask = gDims - 1;
-    gFracBits = (int4)32 - clz(gFracMask);
+    gFracMask = gDims;
+    gFracBits = (short4)32 - clz(gFracMask);
+    gFracShift = (short4)8 - gFracBits;
 
     rsDebug("dims", gDims);
     rsDebug("gFracMask", gFracMask);
     rsDebug("gFracBits", gFracBits);
+
+    gFinalShift = gFracShift.x + gFracShift.y + gFracShift.z;
+    gFinalAdd = (((int4)1 << gFinalShift) - (int4)1) >> (int4)1;
+
+    rsDebug("gFinalShift", gFinalShift);
+    rsDebug("gFinalAdd", gFinalAdd);
+
 }
 
-void root(const uchar4 *in, uchar4 *out) {
+void root(const uchar4 *in, uchar4 *out, uint32_t x, uint32_t y) {
     //rsDebug("root", in);
 
-    int4 coord1 = convert_int4(*in);
-    int4 coord2 = min(coord1 + 1, gDims);
+    short4 baseCoord = convert_short4(*in);
+    short4 coord1 = baseCoord >> gFracShift;
+    short4 coord2 = min(coord1 + (short4)1, gDims);
 
-    uchar4 v1 = rsGetElementAt_uchar4(gCube, coord1.x >> 3, coord1.y >> 3, coord1.z >> 4);
+    short4 weight2 = baseCoord - (coord1 << gFracShift);
+    short4 weight1 = ((short4)1 << gFracShift) - weight2;
 
-    //rsDebug("coord1", coord1);
-    //rsDebug("coord2", coord2);
+    ushort4 v000 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord1.x, coord1.y, coord1.z));
+    ushort4 v100 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord2.x, coord1.y, coord1.z));
+    ushort4 v010 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord1.x, coord2.y, coord1.z));
+    ushort4 v110 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord2.x, coord2.y, coord1.z));
+    ushort4 v001 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord1.x, coord1.y, coord2.z));
+    ushort4 v101 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord2.x, coord1.y, coord2.z));
+    ushort4 v011 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord1.x, coord2.y, coord2.z));
+    ushort4 v111 = convert_ushort4(rsGetElementAt_uchar4(gCube, coord2.x, coord2.y, coord2.z));
 
-    *out = v1;
+    uint4 yz00 = convert_uint4((v000 * weight1.x) + (v100 * weight2.x));
+    uint4 yz10 = convert_uint4((v010 * weight1.x) + (v110 * weight2.x));
+    uint4 yz01 = convert_uint4((v001 * weight1.x) + (v101 * weight2.x));
+    uint4 yz11 = convert_uint4((v011 * weight1.x) + (v111 * weight2.x));
+
+    uint4 z0 = (yz00 * weight1.y) + (yz10 * weight2.y);
+    uint4 z1 = (yz01 * weight1.y) + (yz11 * weight2.y);
+
+    uint4 v = (z0 * weight1.z) + (z1 * weight2.z);
+
+    #if 0
+    if (x + y < 100) {
+        rsDebug("coord1", coord1);
+        rsDebug("coord2", coord2);
+        rsDebug("weight1", weight1);
+        rsDebug("weight2", weight2);
+        rsDebug("yz00", yz00);
+        rsDebug("z0", z0);
+        rsDebug("v", v);
+    }
+    #endif
+
+    *out = convert_uchar4((v + gFinalAdd) >> gFinalShift);
+    out->a = 0xff;
 }