Introduce PixelBuffer API to enable PBOs

PBOs (Pixel Buffer Objects) can be used on OpenGL ES 3.0 to perform
asynchronous texture uploads to free up the CPU. This change does not
enable the use of PBOs unless a specific property is set (Adreno drivers
have issues with PBOs at the moment, Mali drivers work just fine.)

This change also cleans up Font/FontRenderer a little bit and improves
performance of drop shadows generations by using memcpy() instead of
a manual byte-by-byte copy.

On GL ES 2.0 devices, or when PBOs are disabled, a PixelBuffer instance
behaves like a simple byte array. The extra APIs introduced for PBOs
(map/unmap and bind/unbind) are pretty much no-ops for CPU pixel
buffers and won't introduce any significant overhead.

This change also fixes a bug with text drop shadows: if the drop
shadow is larger than the max texture size, the renderer would leave
the GL context in a bad state and generate 0x501 errors. This change
simply skips drop shadows if they are too large.

Change-Id: I2700aadb0c6093431dc5dee3d587d689190c4e23
diff --git a/libs/hwui/Android.mk b/libs/hwui/Android.mk
index 7b59bf2..a630ea1 100644
--- a/libs/hwui/Android.mk
+++ b/libs/hwui/Android.mk
@@ -30,6 +30,7 @@
 		PatchCache.cpp \
 		PathCache.cpp \
 		PathTessellator.cpp \
+		PixelBuffer.cpp \
 		Program.cpp \
 		ProgramCache.cpp \
 		RenderBufferCache.cpp \
diff --git a/libs/hwui/Caches.cpp b/libs/hwui/Caches.cpp
index 57d1a4f..a381a68 100644
--- a/libs/hwui/Caches.cpp
+++ b/libs/hwui/Caches.cpp
@@ -70,6 +70,7 @@
     mCurrentPositionPointer = this;
     mCurrentPositionStride = 0;
     mCurrentTexCoordsPointer = this;
+    mCurrentPixelBuffer = 0;
 
     mTexCoordsArrayEnabled = false;
 
@@ -366,6 +367,28 @@
 }
 
 ///////////////////////////////////////////////////////////////////////////////
+// PBO
+///////////////////////////////////////////////////////////////////////////////
+
+bool Caches::bindPixelBuffer(const GLuint buffer) {
+    if (mCurrentPixelBuffer != buffer) {
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, buffer);
+        mCurrentPixelBuffer = buffer;
+        return true;
+    }
+    return false;
+}
+
+bool Caches::unbindPixelBuffer() {
+    if (mCurrentPixelBuffer) {
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+        mCurrentPixelBuffer = 0;
+        return true;
+    }
+    return false;
+}
+
+///////////////////////////////////////////////////////////////////////////////
 // Meshes and textures
 ///////////////////////////////////////////////////////////////////////////////
 
diff --git a/libs/hwui/Caches.h b/libs/hwui/Caches.h
index 63836c1..91b938b 100644
--- a/libs/hwui/Caches.h
+++ b/libs/hwui/Caches.h
@@ -176,6 +176,16 @@
     bool unbindIndicesBuffer();
 
     /**
+     * Binds the specified buffer as the current GL unpack pixel buffer.
+     */
+    bool bindPixelBuffer(const GLuint buffer);
+
+    /**
+     * Resets the current unpack pixel buffer to 0 (default value.)
+     */
+    bool unbindPixelBuffer();
+
+    /**
      * Binds an attrib to the specified float vertex pointer.
      * Assumes a stride of gMeshStride and a size of 2.
      */
@@ -307,6 +317,7 @@
 
     GLuint mCurrentBuffer;
     GLuint mCurrentIndicesBuffer;
+    GLuint mCurrentPixelBuffer;
     void* mCurrentPositionPointer;
     GLsizei mCurrentPositionStride;
     void* mCurrentTexCoordsPointer;
diff --git a/libs/hwui/DeferredDisplayList.cpp b/libs/hwui/DeferredDisplayList.cpp
index fe51bf9..d5007e1 100644
--- a/libs/hwui/DeferredDisplayList.cpp
+++ b/libs/hwui/DeferredDisplayList.cpp
@@ -21,6 +21,7 @@
 
 #include <utils/Trace.h>
 
+#include "Caches.h"
 #include "Debug.h"
 #include "DisplayListOp.h"
 #include "OpenGLRenderer.h"
@@ -377,6 +378,8 @@
 
 status_t DeferredDisplayList::flush(OpenGLRenderer& renderer, Rect& dirty) {
     ATRACE_NAME("flush drawing commands");
+    Caches::getInstance().fontRenderer->endPrecaching();
+
     status_t status = DrawGlInfo::kStatusDone;
 
     if (isEmpty()) return status; // nothing to flush
diff --git a/libs/hwui/FontRenderer.cpp b/libs/hwui/FontRenderer.cpp
index 44dc731..6894ef9 100644
--- a/libs/hwui/FontRenderer.cpp
+++ b/libs/hwui/FontRenderer.cpp
@@ -33,6 +33,7 @@
 #include "Debug.h"
 #include "Extensions.h"
 #include "FontRenderer.h"
+#include "PixelBuffer.h"
 #include "Rect.h"
 
 namespace android {
@@ -133,26 +134,13 @@
     for (uint32_t i = 0; i < mCacheTextures.size(); i++) {
         mCacheTextures[i]->init();
     }
-
-#if DEBUG_FONT_RENDERER
-    uint16_t totalGlyphs = 0;
-    for (uint32_t i = 0; i < mCacheTextures.size(); i++) {
-        totalGlyphs += mCacheTextures[i]->getGlyphCount();
-        // Erase caches, just as a debugging facility
-        if (mCacheTextures[i]->getTexture()) {
-            memset(mCacheTextures[i]->getTexture(), 0,
-                    mCacheTextures[i]->getWidth() * mCacheTextures[i]->getHeight());
-        }
-    }
-    ALOGD("Flushing caches: glyphs cached = %d", totalGlyphs);
-#endif
 }
 
 void FontRenderer::flushLargeCaches() {
     // Start from 1; don't deallocate smallest/default texture
     for (uint32_t i = 1; i < mCacheTextures.size(); i++) {
         CacheTexture* cacheTexture = mCacheTextures[i];
-        if (cacheTexture->getTexture()) {
+        if (cacheTexture->getPixelBuffer()) {
             cacheTexture->init();
             LruCache<Font::FontDescription, Font*>::Iterator it(mActiveFonts);
             while (it.next()) {
@@ -226,7 +214,7 @@
 
     uint32_t cacheWidth = cacheTexture->getWidth();
 
-    if (!cacheTexture->getTexture()) {
+    if (!cacheTexture->getPixelBuffer()) {
         Caches::getInstance().activeTexture(0);
         // Large-glyph texture memory is allocated only as needed
         cacheTexture->allocateTexture();
@@ -239,7 +227,7 @@
     // or anti-aliased (8 bits per pixel)
     SkMask::Format format = static_cast<SkMask::Format>(glyph.fMaskFormat);
 
-    uint8_t* cacheBuffer = cacheTexture->getTexture();
+    uint8_t* cacheBuffer = cacheTexture->getPixelBuffer()->map();
     uint32_t cacheX = 0, bX = 0, cacheY = 0, bY = 0;
 
     // Copy the glyph image, taking the mask format into account
@@ -377,56 +365,36 @@
     Caches& caches = Caches::getInstance();
     GLuint lastTextureId = 0;
 
-    // OpenGL ES 3.0+ lets us specify the row length for unpack operations such
-    // as glTexSubImage2D(). This allows us to upload a sub-rectangle of a texture.
-    // With OpenGL ES 2.0 we have to upload entire stripes instead.
-    const bool hasUnpackRowLength = Extensions::getInstance().getMajorGlVersion() >= 3;
+    bool resetPixelStore = false;
     glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
 
     // Iterate over all the cache textures and see which ones need to be updated
     for (uint32_t i = 0; i < mCacheTextures.size(); i++) {
         CacheTexture* cacheTexture = mCacheTextures[i];
-        if (cacheTexture->isDirty() && cacheTexture->getTexture()) {
-            const Rect* dirtyRect = cacheTexture->getDirtyRect();
-            uint32_t x = hasUnpackRowLength ? dirtyRect->left : 0;
-            uint32_t y = dirtyRect->top;
-            uint32_t width = cacheTexture->getWidth();
-            uint32_t height = dirtyRect->getHeight();
-            void* textureData = cacheTexture->getTexture() + y * width + x;
-
+        if (cacheTexture->isDirty() && cacheTexture->getPixelBuffer()) {
             if (cacheTexture->getTextureId() != lastTextureId) {
                 lastTextureId = cacheTexture->getTextureId();
                 caches.activeTexture(0);
                 glBindTexture(GL_TEXTURE_2D, lastTextureId);
-
-                // The unpack row length only needs to be specified when a new
-                // texture is bound
-                if (hasUnpackRowLength) {
-                    glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
-                }
             }
 
-            // If we can upload a sub-rectangle, use the dirty rect width
-            // instead of the width of the entire texture
-            if (hasUnpackRowLength) {
-                width = dirtyRect->getWidth();
+            if (cacheTexture->upload()) {
+                resetPixelStore = true;
             }
 
 #if DEBUG_FONT_RENDERER
             ALOGD("glTexSubimage for cacheTexture %d: x, y, width height = %d, %d, %d, %d",
                     i, x, y, width, height);
 #endif
-
-            glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height,
-                    GL_ALPHA, GL_UNSIGNED_BYTE, textureData);
-
-            cacheTexture->setDirty(false);
         }
     }
 
+    // Unbind any PBO we might have used to update textures
+    caches.unbindPixelBuffer();
+
     // Reset to default unpack row length to avoid affecting texture
     // uploads in other parts of the renderer
-    if (hasUnpackRowLength) {
+    if (resetPixelStore) {
         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
     }
 
@@ -539,13 +507,14 @@
         uint32_t startIndex, uint32_t len, int numGlyphs, uint32_t radius, const float* positions) {
     checkInit();
 
+    DropShadow image;
+    image.width = 0;
+    image.height = 0;
+    image.image = NULL;
+    image.penX = 0;
+    image.penY = 0;
+
     if (!mCurrentFont) {
-        DropShadow image;
-        image.width = 0;
-        image.height = 0;
-        image.image = NULL;
-        image.penX = 0;
-        image.penY = 0;
         return image;
     }
 
@@ -559,6 +528,11 @@
     uint32_t paddedWidth = (uint32_t) (bounds.right - bounds.left) + 2 * radius;
     uint32_t paddedHeight = (uint32_t) (bounds.top - bounds.bottom) + 2 * radius;
 
+    uint32_t maxSize = Caches::getInstance().maxTextureSize;
+    if (paddedWidth > maxSize || paddedHeight > maxSize) {
+        return image;
+    }
+
     // Align buffers for renderscript usage
     if (paddedWidth & (RS_CPU_ALLOCATION_ALIGNMENT - 1)) {
         paddedWidth += RS_CPU_ALLOCATION_ALIGNMENT - paddedWidth % RS_CPU_ALLOCATION_ALIGNMENT;
@@ -578,10 +552,12 @@
         mCurrentFont->render(paint, text, startIndex, len, numGlyphs, penX, penY,
                 Font::BITMAP, dataBuffer, paddedWidth, paddedHeight, NULL, positions);
 
+        // Unbind any PBO we might have used
+        Caches::getInstance().unbindPixelBuffer();
+
         blurImage(&dataBuffer, paddedWidth, paddedHeight, radius);
     }
 
-    DropShadow image;
     image.width = paddedWidth;
     image.height = paddedHeight;
     image.image = dataBuffer;
@@ -612,6 +588,10 @@
     font->precache(paint, text, numGlyphs);
 }
 
+void FontRenderer::endPrecaching() {
+    checkTextureUpdate();
+}
+
 bool FontRenderer::renderPosText(SkPaint* paint, const Rect* clip, const char *text,
         uint32_t startIndex, uint32_t len, int numGlyphs, int x, int y,
         const float* positions, Rect* bounds, Functor* functor) {
@@ -690,5 +670,16 @@
     *image = outImage;
 }
 
+uint32_t FontRenderer::getCacheSize() const {
+    uint32_t size = 0;
+    for (uint32_t i = 0; i < mCacheTextures.size(); i++) {
+        CacheTexture* cacheTexture = mCacheTextures[i];
+        if (cacheTexture && cacheTexture->getPixelBuffer()) {
+            size += cacheTexture->getPixelBuffer()->getSize();
+        }
+    }
+    return size;
+}
+
 }; // namespace uirenderer
 }; // namespace android
diff --git a/libs/hwui/FontRenderer.h b/libs/hwui/FontRenderer.h
index 1da3b6c..348b7e3 100644
--- a/libs/hwui/FontRenderer.h
+++ b/libs/hwui/FontRenderer.h
@@ -61,6 +61,7 @@
     void setFont(SkPaint* paint, const mat4& matrix);
 
     void precache(SkPaint* paint, const char* text, int numGlyphs, const mat4& matrix);
+    void endPrecaching();
 
     // bounds is an out parameter
     bool renderPosText(SkPaint* paint, const Rect* clip, const char *text, uint32_t startIndex,
@@ -95,16 +96,7 @@
         mLinearFiltering = linearFiltering;
     }
 
-    uint32_t getCacheSize() const {
-        uint32_t size = 0;
-        for (uint32_t i = 0; i < mCacheTextures.size(); i++) {
-            CacheTexture* cacheTexture = mCacheTextures[i];
-            if (cacheTexture && cacheTexture->getTexture()) {
-                size += cacheTexture->getWidth() * cacheTexture->getHeight();
-            }
-        }
-        return size;
-    }
+    uint32_t getCacheSize() const;
 
 private:
     friend class Font;
diff --git a/libs/hwui/GammaFontRenderer.cpp b/libs/hwui/GammaFontRenderer.cpp
index bd0a4b3..06d2aad 100644
--- a/libs/hwui/GammaFontRenderer.cpp
+++ b/libs/hwui/GammaFontRenderer.cpp
@@ -129,6 +129,12 @@
     }
 }
 
+void ShaderGammaFontRenderer::endPrecaching() {
+    if (mRenderer) {
+        mRenderer->endPrecaching();
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Lookup-based renderer
 ///////////////////////////////////////////////////////////////////////////////
@@ -146,6 +152,12 @@
     mRenderer = NULL;
 }
 
+void LookupGammaFontRenderer::endPrecaching() {
+    if (mRenderer) {
+        mRenderer->endPrecaching();
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Lookup-based renderer, using 3 different correction tables
 ///////////////////////////////////////////////////////////////////////////////
@@ -177,6 +189,14 @@
     }
 }
 
+void Lookup3GammaFontRenderer::endPrecaching() {
+    for (int i = 0; i < kGammaCount; i++) {
+        if (mRenderers[i]) {
+            mRenderers[i]->endPrecaching();
+        }
+    }
+}
+
 void Lookup3GammaFontRenderer::clear() {
     for (int i = 0; i < kGammaCount; i++) {
         delete mRenderers[i];
diff --git a/libs/hwui/GammaFontRenderer.h b/libs/hwui/GammaFontRenderer.h
index 5c1860e..bbfa66d 100644
--- a/libs/hwui/GammaFontRenderer.h
+++ b/libs/hwui/GammaFontRenderer.h
@@ -40,6 +40,8 @@
     virtual void describe(ProgramDescription& description, const SkPaint* paint) const = 0;
     virtual void setupProgram(ProgramDescription& description, Program* program) const = 0;
 
+    virtual void endPrecaching() = 0;
+
     static GammaFontRenderer* createRenderer();
 
 protected:
@@ -86,6 +88,8 @@
     void describe(ProgramDescription& description, const SkPaint* paint) const;
     void setupProgram(ProgramDescription& description, Program* program) const;
 
+    void endPrecaching();
+
 private:
     ShaderGammaFontRenderer(bool multiGamma);
 
@@ -134,6 +138,8 @@
     void setupProgram(ProgramDescription& description, Program* program) const {
     }
 
+    void endPrecaching();
+
 private:
     LookupGammaFontRenderer();
 
@@ -171,6 +177,8 @@
     void setupProgram(ProgramDescription& description, Program* program) const {
     }
 
+    void endPrecaching();
+
 private:
     Lookup3GammaFontRenderer();
 
diff --git a/libs/hwui/OpenGLRenderer.cpp b/libs/hwui/OpenGLRenderer.cpp
index e18d922..dcd1eb8 100644
--- a/libs/hwui/OpenGLRenderer.cpp
+++ b/libs/hwui/OpenGLRenderer.cpp
@@ -2644,6 +2644,9 @@
     mCaches.dropShadowCache.setFontRenderer(fontRenderer);
     const ShadowTexture* shadow = mCaches.dropShadowCache.get(
             paint, text, bytesCount, count, mDrawModifiers.mShadowRadius, positions);
+    // If the drop shadow exceeds the max texture size or couldn't be
+    // allocated, skip drawing
+    if (!shadow) return;
     const AutoTexture autoCleanup(shadow);
 
     const float sx = x - shadow->left + mDrawModifiers.mShadowDx;
diff --git a/libs/hwui/PixelBuffer.cpp b/libs/hwui/PixelBuffer.cpp
new file mode 100644
index 0000000..8280370
--- /dev/null
+++ b/libs/hwui/PixelBuffer.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "OpenGLRenderer"
+
+#include <utils/Log.h>
+
+#include "Caches.h"
+#include "Extensions.h"
+#include "PixelBuffer.h"
+#include "Properties.h"
+
+namespace android {
+namespace uirenderer {
+
+///////////////////////////////////////////////////////////////////////////////
+// CPU pixel buffer
+///////////////////////////////////////////////////////////////////////////////
+
+class CpuPixelBuffer: public PixelBuffer {
+public:
+    CpuPixelBuffer(GLenum format, uint32_t width, uint32_t height);
+    ~CpuPixelBuffer();
+
+    uint8_t* map(AccessMode mode = kAccessMode_ReadWrite);
+    void unmap();
+
+    uint8_t* getMappedPointer() const;
+
+    void upload(uint32_t x, uint32_t y, uint32_t width, uint32_t height, int offset);
+
+private:
+    uint8_t* mBuffer;
+};
+
+CpuPixelBuffer::CpuPixelBuffer(GLenum format, uint32_t width, uint32_t height):
+        PixelBuffer(format, width, height) {
+    mBuffer = new uint8_t[width * height * formatSize(format)];
+}
+
+CpuPixelBuffer::~CpuPixelBuffer() {
+    delete[] mBuffer;
+}
+
+uint8_t* CpuPixelBuffer::map(AccessMode mode) {
+    if (mAccessMode == kAccessMode_None) {
+        mAccessMode = mode;
+    }
+    return mBuffer;
+}
+
+void CpuPixelBuffer::unmap() {
+    mAccessMode = kAccessMode_None;
+}
+
+uint8_t* CpuPixelBuffer::getMappedPointer() const {
+    return mAccessMode == kAccessMode_None ? NULL : mBuffer;
+}
+
+void CpuPixelBuffer::upload(uint32_t x, uint32_t y, uint32_t width, uint32_t height, int offset) {
+    glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height,
+            mFormat, GL_UNSIGNED_BYTE, mBuffer + offset);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GPU pixel buffer
+///////////////////////////////////////////////////////////////////////////////
+
+class GpuPixelBuffer: public PixelBuffer {
+public:
+    GpuPixelBuffer(GLenum format, uint32_t width, uint32_t height);
+    ~GpuPixelBuffer();
+
+    uint8_t* map(AccessMode mode = kAccessMode_ReadWrite);
+    void unmap();
+
+    uint8_t* getMappedPointer() const;
+
+    void upload(uint32_t x, uint32_t y, uint32_t width, uint32_t height, int offset);
+
+private:
+    GLuint mBuffer;
+    uint8_t* mMappedPointer;
+    Caches& mCaches;
+};
+
+GpuPixelBuffer::GpuPixelBuffer(GLenum format, uint32_t width, uint32_t height):
+        PixelBuffer(format, width, height), mMappedPointer(0), mCaches(Caches::getInstance()) {
+    glGenBuffers(1, &mBuffer);
+    mCaches.bindPixelBuffer(mBuffer);
+    glBufferData(GL_PIXEL_UNPACK_BUFFER, getSize(), NULL, GL_DYNAMIC_DRAW);
+    mCaches.unbindPixelBuffer();
+}
+
+GpuPixelBuffer::~GpuPixelBuffer() {
+    glDeleteBuffers(1, &mBuffer);
+}
+
+uint8_t* GpuPixelBuffer::map(AccessMode mode) {
+    if (mAccessMode == kAccessMode_None) {
+        mCaches.bindPixelBuffer(mBuffer);
+        mMappedPointer = (uint8_t*) glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, getSize(), mode);
+        mAccessMode = mode;
+    }
+
+    return mMappedPointer;
+}
+
+void GpuPixelBuffer::unmap() {
+    if (mAccessMode != kAccessMode_None) {
+        if (mMappedPointer) {
+            mCaches.bindPixelBuffer(mBuffer);
+            glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
+        }
+        mAccessMode = kAccessMode_None;
+        mMappedPointer = NULL;
+    }
+}
+
+uint8_t* GpuPixelBuffer::getMappedPointer() const {
+    return mMappedPointer;
+}
+
+void GpuPixelBuffer::upload(uint32_t x, uint32_t y, uint32_t width, uint32_t height, int offset) {
+    // If the buffer is not mapped, unmap() will not bind it
+    mCaches.bindPixelBuffer(mBuffer);
+    unmap();
+    glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, mFormat,
+            GL_UNSIGNED_BYTE, (void*) offset);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Factory
+///////////////////////////////////////////////////////////////////////////////
+
+PixelBuffer* PixelBuffer::create(GLenum format, uint32_t width, uint32_t height, BufferType type) {
+    bool gpuBuffer = type == kBufferType_Auto && Extensions::getInstance().getMajorGlVersion() >= 3;
+    if (gpuBuffer) {
+        char property[PROPERTY_VALUE_MAX];
+        if (property_get(PROPERTY_ENABLE_GPU_PIXEL_BUFFERS, property, "false") > 0) {
+            if (!strcmp(property, "true")) {
+                return new GpuPixelBuffer(format, width, height);
+            }
+        }
+    }
+    return new CpuPixelBuffer(format, width, height);
+}
+
+}; // namespace uirenderer
+}; // namespace android
diff --git a/libs/hwui/PixelBuffer.h b/libs/hwui/PixelBuffer.h
new file mode 100644
index 0000000..32d5417
--- /dev/null
+++ b/libs/hwui/PixelBuffer.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_HWUI_PIXEL_BUFFER_H
+#define ANDROID_HWUI_PIXEL_BUFFER_H
+
+#include <GLES3/gl3.h>
+
+namespace android {
+namespace uirenderer {
+
+/**
+ * Represents a pixel buffer. A pixel buffer will be backed either by a
+ * PBO on OpenGL ES 3.0 and higher or by an array of uint8_t on other
+ * versions. If the buffer is backed by a PBO it will of type
+ * GL_PIXEL_UNPACK_BUFFER.
+ *
+ * To read from or write into a PixelBuffer you must first map the
+ * buffer using the map(AccessMode) method. This method returns a
+ * pointer to the beginning of the buffer.
+ *
+ * Before the buffer can be used by the GPU, for instance to upload
+ * a texture, you must first unmap the buffer. To do so, call the
+ * unmap() method.
+ *
+ * Mapping and unmapping a PixelBuffer can have the side effect of
+ * changing the currently active GL_PIXEL_UNPACK_BUFFER. It is
+ * therefore recommended to call Caches::unbindPixelbuffer() after
+ * using a PixelBuffer to upload to a texture.
+ */
+class PixelBuffer {
+public:
+    enum BufferType {
+        kBufferType_Auto,
+        kBufferType_CPU
+    };
+
+    enum AccessMode {
+        kAccessMode_None = 0,
+        kAccessMode_Read = GL_MAP_READ_BIT,
+        kAccessMode_Write = GL_MAP_WRITE_BIT,
+        kAccessMode_ReadWrite = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT
+    };
+
+    /**
+     * Creates a new PixelBuffer object with the specified format and
+     * dimensions. The buffer is immediately allocated.
+     *
+     * The buffer type specifies how the buffer should be allocated.
+     * By default this method will automatically choose whether to allocate
+     * a CPU or GPU buffer.
+     */
+    static PixelBuffer* create(GLenum format, uint32_t width, uint32_t height,
+            BufferType type = kBufferType_Auto);
+
+    virtual ~PixelBuffer() {
+    }
+
+    /**
+     * Returns the format of this render buffer.
+     */
+    GLenum getFormat() const {
+        return mFormat;
+    }
+
+    /**
+     * Maps this before with the specified access mode. This method
+     * returns a pointer to the region of memory where the buffer was
+     * mapped.
+     *
+     * If the buffer is already mapped when this method is invoked,
+     * this method will return the previously mapped pointer. The
+     * access mode can only be changed by calling unmap() first.
+     *
+     * The specified access mode cannot be kAccessMode_None.
+     */
+    virtual uint8_t* map(AccessMode mode = kAccessMode_ReadWrite) = 0;
+
+    /**
+     * Unmaps this buffer, if needed. After the buffer is unmapped,
+     * the pointer previously returned by map() becomes invalid and
+     * should not be used. After calling this method, getMappedPointer()
+     * will always return NULL.
+     */
+    virtual void unmap() = 0;
+
+    /**
+     * Returns the current access mode for this buffer. If the buffer
+     * is not mapped, this method returns kAccessMode_None.
+     */
+    AccessMode getAccessMode() const {
+        return mAccessMode;
+    }
+
+    /**
+     * Returns the currently mapped pointer. Returns NULL if the buffer
+     * is not mapped.
+     */
+    virtual uint8_t* getMappedPointer() const = 0;
+
+    /**
+     * Upload the specified rectangle of this pixe buffer as a
+     * GL_TEXTURE_2D texture. Calling this method will trigger
+     * an unmap() if necessary.
+     */
+    virtual void upload(uint32_t x, uint32_t y, uint32_t width, uint32_t height, int offset) = 0;
+
+    /**
+     * Returns the width of the render buffer in pixels.
+     */
+    uint32_t getWidth() const {
+        return mWidth;
+    }
+
+    /**
+     * Returns the height of the render buffer in pixels.
+     */
+    uint32_t getHeight() const {
+        return mHeight;
+    }
+
+    /**
+     * Returns the size of this pixel buffer in bytes.
+     */
+    uint32_t getSize() const {
+        return mWidth * mHeight * formatSize(mFormat);
+    }
+
+    /**
+     * Returns the number of bytes per pixel in the specified format.
+     *
+     * Supported formats:
+     *      GL_ALPHA
+     *      GL_RGBA
+     */
+    static uint32_t formatSize(GLenum format) {
+        switch (format) {
+            case GL_ALPHA:
+                return 1;
+            case GL_RGBA:
+                return 4;
+        }
+        return 0;
+    }
+
+protected:
+    /**
+     * Creates a new render buffer in the specified format and dimensions.
+     * The format must be GL_ALPHA or GL_RGBA.
+     */
+    PixelBuffer(GLenum format, uint32_t width, uint32_t height):
+            mFormat(format), mWidth(width), mHeight(height), mAccessMode(kAccessMode_None) {
+    }
+
+    GLenum mFormat;
+
+    uint32_t mWidth;
+    uint32_t mHeight;
+
+    AccessMode mAccessMode;
+
+}; // class PixelBuffer
+
+}; // namespace uirenderer
+}; // namespace android
+
+#endif // ANDROID_HWUI_PIXEL_BUFFER_H
diff --git a/libs/hwui/Properties.h b/libs/hwui/Properties.h
index e4b4f3c..6eea00c 100644
--- a/libs/hwui/Properties.h
+++ b/libs/hwui/Properties.h
@@ -25,6 +25,10 @@
  * the OpenGLRenderer.
  */
 
+///////////////////////////////////////////////////////////////////////////////
+// Compile-time properties
+///////////////////////////////////////////////////////////////////////////////
+
 // If turned on, text is interpreted as glyphs instead of UTF-16
 #define RENDER_TEXT_AS_GLYPHS 1
 
@@ -39,6 +43,10 @@
 // to properly implement overdraw debugging
 #define STENCIL_BUFFER_SIZE 8
 
+///////////////////////////////////////////////////////////////////////////////
+// Debug properties
+///////////////////////////////////////////////////////////////////////////////
+
 /**
  * Debug level for app developers. The value is a numeric value defined
  * by the DebugLevel enum below.
@@ -82,6 +90,23 @@
 #define PROPERTY_DEBUG_STENCIL_CLIP "debug.hwui.show_non_rect_clip"
 
 /**
+ * Disables draw operation deferral if set to "true", forcing draw
+ * commands to be issued to OpenGL in order, and processed in sequence
+ * with state-manipulation canvas commands.
+ */
+#define PROPERTY_DISABLE_DRAW_DEFER "debug.hwui.disable_draw_defer"
+
+/**
+ * Used to disable draw operation reordering when deferring draw operations
+ * Has no effect if PROPERTY_DISABLE_DRAW_DEFER is set to "true"
+ */
+#define PROPERTY_DISABLE_DRAW_REORDER "debug.hwui.disable_draw_reorder"
+
+///////////////////////////////////////////////////////////////////////////////
+// Runtime configuration properties
+///////////////////////////////////////////////////////////////////////////////
+
+/**
  * Used to enable/disable scissor optimization. The accepted values are
  * "true" and "false". The default value is "false".
  *
@@ -97,17 +122,10 @@
 #define PROPERTY_DISABLE_SCISSOR_OPTIMIZATION "ro.hwui.disable_scissor_opt"
 
 /**
- * Disables draw operation deferral if set to "true", forcing draw
- * commands to be issued to OpenGL in order, and processed in sequence
- * with state-manipulation canvas commands.
+ * Indicates whether PBOs can be used to back pixel buffers.
+ * Accepted values are "true" and "false".
  */
-#define PROPERTY_DISABLE_DRAW_DEFER "debug.hwui.disable_draw_defer"
-
-/**
- * Used to disable draw operation reordering when deferring draw operations
- * Has no effect if PROPERTY_DISABLE_DRAW_DEFER is set to "true"
- */
-#define PROPERTY_DISABLE_DRAW_REORDER "debug.hwui.disable_draw_reorder"
+#define PROPERTY_ENABLE_GPU_PIXEL_BUFFERS "hwui.use_gpu_pixel_buffers"
 
 // These properties are defined in mega-bytes
 #define PROPERTY_TEXTURE_CACHE_SIZE "ro.hwui.texture_cache_size"
@@ -152,8 +170,9 @@
 // Lumincance threshold above which white gamma correction is applied. Range: [0..255]
 #define PROPERTY_TEXT_WHITE_GAMMA_THRESHOLD "hwui.text_gamma.white_threshold"
 
-// Converts a number of mega-bytes into bytes
-#define MB(s) s * 1024 * 1024
+///////////////////////////////////////////////////////////////////////////////
+// Default property values
+///////////////////////////////////////////////////////////////////////////////
 
 #define DEFAULT_TEXTURE_CACHE_SIZE 24.0f
 #define DEFAULT_LAYER_CACHE_SIZE 16.0f
@@ -170,6 +189,13 @@
 #define DEFAULT_TEXT_BLACK_GAMMA_THRESHOLD 64
 #define DEFAULT_TEXT_WHITE_GAMMA_THRESHOLD 192
 
+///////////////////////////////////////////////////////////////////////////////
+// Misc
+///////////////////////////////////////////////////////////////////////////////
+
+// Converts a number of mega-bytes into bytes
+#define MB(s) s * 1024 * 1024
+
 static DebugLevel readDebugLevel() {
     char property[PROPERTY_VALUE_MAX];
     if (property_get(PROPERTY_DEBUG, property, NULL) > 0) {
diff --git a/libs/hwui/TextDropShadowCache.cpp b/libs/hwui/TextDropShadowCache.cpp
index f1f35bd..6976eaa 100644
--- a/libs/hwui/TextDropShadowCache.cpp
+++ b/libs/hwui/TextDropShadowCache.cpp
@@ -178,6 +178,10 @@
         FontRenderer::DropShadow shadow = mRenderer->renderDropShadow(&paintCopy, text, 0,
                 len, numGlyphs, radius, positions);
 
+        if (!shadow.image) {
+            return NULL;
+        }
+
         texture = new ShadowTexture;
         texture->left = shadow.penX;
         texture->top = shadow.penY;
diff --git a/libs/hwui/font/CacheTexture.cpp b/libs/hwui/font/CacheTexture.cpp
index 577f463..6c5267d 100644
--- a/libs/hwui/font/CacheTexture.cpp
+++ b/libs/hwui/font/CacheTexture.cpp
@@ -18,6 +18,8 @@
 
 #include "CacheTexture.h"
 #include "../Debug.h"
+#include "../Extensions.h"
+#include "../PixelBuffer.h"
 
 namespace android {
 namespace uirenderer {
@@ -111,6 +113,11 @@
             mMesh(NULL), mCurrentQuad(0), mMaxQuadCount(maxQuadCount) {
     mCacheBlocks = new CacheBlock(TEXTURE_BORDER_SIZE, TEXTURE_BORDER_SIZE,
             mWidth - TEXTURE_BORDER_SIZE, mHeight - TEXTURE_BORDER_SIZE, true);
+
+    // OpenGL ES 3.0+ lets us specify the row length for unpack operations such
+    // as glTexSubImage2D(). This allows us to upload a sub-rectangle of a texture.
+    // With OpenGL ES 2.0 we have to upload entire stripes instead.
+    mHasES3 = Extensions::getInstance().getMajorGlVersion() >= 3;
 }
 
 CacheTexture::~CacheTexture() {
@@ -143,7 +150,7 @@
 
 void CacheTexture::releaseTexture() {
     if (mTexture) {
-        delete[] mTexture;
+        delete mTexture;
         mTexture = NULL;
     }
     if (mTextureId) {
@@ -154,6 +161,17 @@
     mCurrentQuad = 0;
 }
 
+void CacheTexture::setLinearFiltering(bool linearFiltering, bool bind) {
+   if (linearFiltering != mLinearFiltering) {
+       mLinearFiltering = linearFiltering;
+
+       const GLenum filtering = linearFiltering ? GL_LINEAR : GL_NEAREST;
+       if (bind) glBindTexture(GL_TEXTURE_2D, getTextureId());
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filtering);
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filtering);
+   }
+}
+
 void CacheTexture::allocateMesh() {
     if (!mMesh) {
         mMesh = new TextureVertex[mMaxQuadCount * 4];
@@ -162,7 +180,7 @@
 
 void CacheTexture::allocateTexture() {
     if (!mTexture) {
-        mTexture = new uint8_t[mWidth * mHeight];
+        mTexture = PixelBuffer::create(GL_ALPHA, mWidth, mHeight);
     }
 
     if (!mTextureId) {
@@ -183,6 +201,34 @@
     }
 }
 
+bool CacheTexture::upload() {
+    const Rect& dirtyRect = mDirtyRect;
+
+    uint32_t x = mHasES3 ? dirtyRect.left : 0;
+    uint32_t y = dirtyRect.top;
+    uint32_t width = mHasES3 ? dirtyRect.getWidth() : mWidth;
+    uint32_t height = dirtyRect.getHeight();
+
+    // The unpack row length only needs to be specified when a new
+    // texture is bound
+    if (mHasES3) {
+        glPixelStorei(GL_UNPACK_ROW_LENGTH, mWidth);
+    }
+
+    mTexture->upload(x, y, width, height, y * mWidth + x);
+
+    setDirty(false);
+
+    return mHasES3;
+}
+
+void CacheTexture::setDirty(bool dirty) {
+    mDirty = dirty;
+    if (!dirty) {
+        mDirtyRect.setEmpty();
+    }
+}
+
 bool CacheTexture::fitBitmap(const SkGlyph& glyph, uint32_t* retOriginX, uint32_t* retOriginY) {
     if (glyph.fHeight + TEXTURE_BORDER_SIZE * 2 > mHeight) {
         return false;
diff --git a/libs/hwui/font/CacheTexture.h b/libs/hwui/font/CacheTexture.h
index e7fb474..ddcc836 100644
--- a/libs/hwui/font/CacheTexture.h
+++ b/libs/hwui/font/CacheTexture.h
@@ -30,6 +30,8 @@
 namespace android {
 namespace uirenderer {
 
+class PixelBuffer;
+
 /**
  * CacheBlock is a node in a linked list of current free space areas in a CacheTexture.
  * Using CacheBlocks enables us to pack the cache from top to bottom as well as left to right.
@@ -83,6 +85,10 @@
     void allocateTexture();
     void allocateMesh();
 
+    // Returns true if glPixelStorei(GL_UNPACK_ROW_LENGTH) must be reset
+    // This method will also call setDirty(false)
+    bool upload();
+
     bool fitBitmap(const SkGlyph& glyph, uint32_t* retOriginX, uint32_t* retOriginY);
 
     inline uint16_t getWidth() const {
@@ -97,7 +103,7 @@
         return &mDirtyRect;
     }
 
-    inline uint8_t* getTexture() const {
+    inline PixelBuffer* getPixelBuffer() const {
         return mTexture;
     }
 
@@ -110,13 +116,6 @@
         return mDirty;
     }
 
-    inline void setDirty(bool dirty) {
-        mDirty = dirty;
-        if (!dirty) {
-            mDirtyRect.setEmpty();
-        }
-    }
-
     inline bool getLinearFiltering() const {
         return mLinearFiltering;
     }
@@ -124,16 +123,7 @@
     /**
      * This method assumes that the proper texture unit is active.
      */
-    void setLinearFiltering(bool linearFiltering, bool bind = true) {
-        if (linearFiltering != mLinearFiltering) {
-            mLinearFiltering = linearFiltering;
-
-            const GLenum filtering = linearFiltering ? GL_LINEAR : GL_NEAREST;
-            if (bind) glBindTexture(GL_TEXTURE_2D, getTextureId());
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filtering);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filtering);
-        }
-    }
+    void setLinearFiltering(bool linearFiltering, bool bind = true);
 
     inline uint16_t getGlyphCount() const {
         return mNumGlyphs;
@@ -176,7 +166,9 @@
     }
 
 private:
-    uint8_t* mTexture;
+    void setDirty(bool dirty);
+
+    PixelBuffer* mTexture;
     GLuint mTextureId;
     uint16_t mWidth;
     uint16_t mHeight;
@@ -188,6 +180,7 @@
     uint32_t mMaxQuadCount;
     CacheBlock* mCacheBlocks;
     Rect mDirtyRect;
+    bool mHasES3;
 };
 
 }; // namespace uirenderer
diff --git a/libs/hwui/font/Font.cpp b/libs/hwui/font/Font.cpp
index 02c1aa1..011cfc1 100644
--- a/libs/hwui/font/Font.cpp
+++ b/libs/hwui/font/Font.cpp
@@ -25,11 +25,12 @@
 #include <SkGlyph.h>
 #include <SkUtils.h>
 
-#include "Debug.h"
 #include "FontUtil.h"
 #include "Font.h"
-#include "FontRenderer.h"
-#include "Properties.h"
+#include "../Debug.h"
+#include "../FontRenderer.h"
+#include "../PixelBuffer.h"
+#include "../Properties.h"
 
 namespace android {
 namespace uirenderer {
@@ -200,25 +201,23 @@
             p[3].x(), p[3].y(), u1, v1, glyph->mCacheTexture);
 }
 
-void Font::drawCachedGlyphBitmap(CachedGlyphInfo* glyph, int x, int y,
-        uint8_t* bitmap, uint32_t bitmapW, uint32_t bitmapH, Rect* bounds, const float* pos) {
-    int nPenX = x + glyph->mBitmapLeft;
-    int nPenY = y + glyph->mBitmapTop;
-
-    uint32_t endX = glyph->mStartX + glyph->mBitmapWidth;
-    uint32_t endY = glyph->mStartY + glyph->mBitmapHeight;
+void Font::drawCachedGlyphBitmap(CachedGlyphInfo* glyph, int x, int y, uint8_t* bitmap,
+        uint32_t bitmapWidth, uint32_t bitmapHeight, Rect* bounds, const float* pos) {
+    int dstX = x + glyph->mBitmapLeft;
+    int dstY = y + glyph->mBitmapTop;
 
     CacheTexture* cacheTexture = glyph->mCacheTexture;
-    uint32_t cacheWidth = cacheTexture->getWidth();
-    const uint8_t* cacheBuffer = cacheTexture->getTexture();
 
-    uint32_t cacheX = 0, cacheY = 0;
-    int32_t bX = 0, bY = 0;
-    for (cacheX = glyph->mStartX, bX = nPenX; cacheX < endX; cacheX++, bX++) {
-        for (cacheY = glyph->mStartY, bY = nPenY; cacheY < endY; cacheY++, bY++) {
-            uint8_t tempCol = cacheBuffer[cacheY * cacheWidth + cacheX];
-            bitmap[bY * bitmapW + bX] = tempCol;
-        }
+    uint32_t cacheWidth = cacheTexture->getWidth();
+    uint32_t startY = glyph->mStartY * cacheWidth;
+    uint32_t endY = startY + (glyph->mBitmapHeight * cacheWidth);
+
+    PixelBuffer* pixelBuffer = cacheTexture->getPixelBuffer();
+    const uint8_t* cacheBuffer = pixelBuffer->map();
+
+    for (uint32_t cacheY = startY, bitmapY = dstY * bitmapWidth; cacheY < endY;
+            cacheY += cacheWidth, bitmapY += bitmapWidth) {
+        memcpy(&bitmap[bitmapY + dstX], &cacheBuffer[cacheY + glyph->mStartX], glyph->mBitmapWidth);
     }
 }