Merge "libcore: rewrite of UriCodec"

commit: 6d6f574e762052ccc8ed6791f2a267c66b0288d5 [log] [tgz]
author: Sergio Giro <sgiro@google.com> Tue Sep 22 10:26:50 2015 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Tue Sep 22 10:26:50 2015 +0000
tree: c9d747a3158e2fa8c7034ce4c5ae14204724c7c2
parent: 99ee6d838ae8550eba8f1009b13b2273470c2448 [diff]
parent: fda56554dbf5caf1af1982cad020a8dca5632244 [diff]
diff --git a/luni/src/main/java/libcore/net/UriCodec.java b/luni/src/main/java/libcore/net/UriCodec.java
index dd18c5f..17b875d 100644
--- a/luni/src/main/java/libcore/net/UriCodec.java
+++ b/luni/src/main/java/libcore/net/UriCodec.java

@@ -1,219 +1,379 @@
 /*
- *  Licensed to the Apache Software Foundation (ASF) under one or more
- *  contributor license agreements.  See the NOTICE file distributed with
- *  this work for additional information regarding copyright ownership.
- *  The ASF licenses this file to You under the Apache License, Version 2.0
- *  (the "License"); you may not use this file except in compliance with
- *  the License.  You may obtain a copy of the License at
+ * Copyright (C) 2015 The Android Open Source Project
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
  *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
  */
 
 package libcore.net;
 
 import java.io.ByteArrayOutputStream;
 import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 
 /**
- * Encodes and decodes {@code application/x-www-form-urlencoded} content.
- * Subclasses define exactly which characters are legal.
+ * Encodes and decodes “application/x-www-form-urlencoded” content.
  *
- * <p>By default, UTF-8 is used to encode escaped characters. A single input
- * character like "\u0080" may be encoded to multiple octets like %C2%80.
+ * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t.
+ * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to
+ * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form.
+ * e.g a 4 byte sequence might look like” %c6%ef%e0%e8”
  */
 public abstract class UriCodec {
-
     /**
-     * Returns true if {@code c} does not need to be escaped.
+     * Returns true iff. ‘c’ does not need to be escaped.
+     * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be
+     * escaped. This set is referred to as the ``whitelist''.
      */
     protected abstract boolean isRetained(char c);
 
+    private static boolean isWhitelisted(char c) {
+        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
+    }
+
+    private boolean isWhitelistedOrRetained(char c) {
+        return (isWhitelisted(c) || isRetained(c))
+                // Make sure % is not retained.
+                && (c != '%');
+    }
+
     /**
-     * Throws if {@code s} is invalid according to this encoder.
+     * Throw URISyntaxException if any of the characters in the range [start, end) are not valid
+     * according to this codec.
+     *  - If a char is in the whitelist or retained, it is valid both escaped and unescaped.
+     *  - All escaped octets appearing in the input are structurally valid hex, i.e convertible to
+     *  decimals.
+     *
+     * On success, the substring [start, end) is returned.
+     * {@code name} is not used, except to generate debugging info.
      */
     public final String validate(String uri, int start, int end, String name)
             throws URISyntaxException {
-        for (int i = start; i < end; ) {
-            char ch = uri.charAt(i);
-            if ((ch >= 'a' && ch <= 'z')
-                    || (ch >= 'A' && ch <= 'Z')
-                    || (ch >= '0' && ch <= '9')
-                    || isRetained(ch)) {
-                i++;
-            } else if (ch == '%') {
-                if (i + 2 >= end) {
-                    throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i);
+        int i = start;
+        while (i < end) {
+            char c = uri.charAt(i++);
+            if (isWhitelistedOrRetained(c)) {
+                continue;
+            }
+            // c is either '%' or character not allowed in a uri.
+            if (c != '%') {
+                throw unexpectedCharacterException(uri, name, c, i - 1);
+            }
+            // Expect two characters representing a number in hex.
+            for (int j = 0; j < 2; j++) {
+                c = getNextCharacter(uri, i++, end, name);
+                if (hexCharToValue(c) < 0) {
+                    throw unexpectedCharacterException(uri, name, c, i - 1);
                 }
-                int d1 = hexToInt(uri.charAt(i + 1));
-                int d2 = hexToInt(uri.charAt(i + 2));
-                if (d1 == -1 || d2 == -1) {
-                    throw new URISyntaxException(uri, "Invalid % sequence: "
-                            + uri.substring(i, i + 3) + " in " + name, i);
-                }
-                i += 3;
-            } else {
-                throw new URISyntaxException(uri, "Illegal character in " + name, i);
             }
         }
         return uri.substring(start, end);
     }
 
     /**
-     * Throws if {@code s} contains characters that are not letters, digits or
+     * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f').
+     */
+    private static int hexCharToValue(char c) {
+        if('0' <= c && c <= '9') {
+            return c - '0';
+        }
+        if ('a' <= c && c <= 'f') {
+            return 10 + c - 'a';
+        }
+        if ('A' <= c && c <= 'F') {
+            return 10 + c - 'A';
+        }
+        return -1;
+    }
+
+    private static URISyntaxException unexpectedCharacterException(
+            String uri, String name, char unexpected, int index) {
+        String nameString = (name == null) ? "" :  " in [" + name + "]";
+        return new URISyntaxException(
+                uri, "Unexpected character" + nameString + ": " + unexpected, index);
+    }
+
+    private static char getNextCharacter(String uri, int index, int end, String name)
+             throws URISyntaxException {
+        if (index >= end) {
+            String nameString = (name == null) ? "" :  " in [" + name + "]";
+            throw new URISyntaxException(
+                    uri, "Unexpected end of string" + nameString, index);
+        }
+        return uri.charAt(index);
+    }
+
+    /**
+     * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor
      * in {@code legal}.
      */
-    public static void validateSimple(String s, String legal)
-            throws URISyntaxException {
-        for (int i = 0; i < s.length(); i++) {
-            char ch = s.charAt(i);
-            if (!((ch >= 'a' && ch <= 'z')
-                    || (ch >= 'A' && ch <= 'Z')
-                    || (ch >= '0' && ch <= '9')
-                    || legal.indexOf(ch) > -1)) {
-                throw new URISyntaxException(s, "Illegal character", i);
+    public static void validateSimple(String uri, String legal) throws URISyntaxException {
+        for (int i = 0; i < uri.length(); i++) {
+            char c = uri.charAt(i);
+            if (!isWhitelisted(c) && legal.indexOf(c) < 0) {
+                throw unexpectedCharacterException(uri, null /* name */, c, i);
             }
         }
     }
 
     /**
-     * Encodes {@code s} and appends the result to {@code builder}.
+     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
      *
-     * @param isPartiallyEncoded true to fix input that has already been
-     *     partially or fully encoded. For example, input of "hello%20world" is
-     *     unchanged with isPartiallyEncoded=true but would be double-escaped to
-     *     "hello%2520world" otherwise.
+     * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes.
      */
-    private void appendEncoded(StringBuilder builder, String s, Charset charset,
-            boolean isPartiallyEncoded) {
-        if (s == null) {
-            throw new NullPointerException("s == null");
-        }
-
-        int escapeStart = -1;
-        for (int i = 0; i < s.length(); i++) {
-            char c = s.charAt(i);
-            if ((c >= 'a' && c <= 'z')
-                    || (c >= 'A' && c <= 'Z')
-                    || (c >= '0' && c <= '9')
-                    || isRetained(c)
-                    || (c == '%' && isPartiallyEncoded)) {
-                if (escapeStart != -1) {
-                    appendHex(builder, s.substring(escapeStart, i), charset);
-                    escapeStart = -1;
-                }
-                if (c == '%' && isPartiallyEncoded) {
-                    // this is an encoded 3-character sequence like "%20"
-                    builder.append(s, i, Math.min(i + 3, s.length()));
-                    i += 2;
-                } else if (c == ' ') {
-                    builder.append('+');
-                } else {
-                    builder.append(c);
-                }
-            } else if (escapeStart == -1) {
-                escapeStart = i;
-            }
-        }
-        if (escapeStart != -1) {
-            appendHex(builder, s.substring(escapeStart, s.length()), charset);
-        }
-    }
-
     public final String encode(String s, Charset charset) {
-        // Guess a bit larger for encoded form
-        StringBuilder builder = new StringBuilder(s.length() + 16);
+        StringBuilder builder = new StringBuilder(s.length());
         appendEncoded(builder, s, charset, false);
         return builder.toString();
     }
 
+    /**
+     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
+     *
+     * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
+     */
     public final void appendEncoded(StringBuilder builder, String s) {
         appendEncoded(builder, s, StandardCharsets.UTF_8, false);
     }
 
+    /**
+     * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
+     *
+     * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
+     * This method must produce partially encoded output. What this means is that if encoded octets
+     * appear in the input string, they are passed through unmodified, instead of being double
+     * escaped. Consider a decoder operating on the global whitelist dealing with a string
+     * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it
+     * will be double encoded into “foo%2525bar”.
+     */
     public final void appendPartiallyEncoded(StringBuilder builder, String s) {
         appendEncoded(builder, s, StandardCharsets.UTF_8, true);
     }
 
-    /**
-     * @param convertPlus true to convert '+' to ' '.
-     * @param throwOnFailure true to throw an IllegalArgumentException on
-     *     invalid escape sequences; false to replace them with the replacement
-     *     character (U+fffd).
-     */
-    public static String decode(String s, boolean convertPlus, Charset charset,
-            boolean throwOnFailure) {
-        if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
-            return s;
-        }
-
-        StringBuilder result = new StringBuilder(s.length());
-        ByteArrayOutputStream out = new ByteArrayOutputStream();
-        for (int i = 0; i < s.length();) {
+    private void appendEncoded(
+            StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) {
+        CharsetEncoder encoder = charset.newEncoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        CharBuffer cBuffer = CharBuffer.allocate(s.length());
+        for (int i = 0; i < s.length(); i++) {
             char c = s.charAt(i);
-            if (c == '%') {
-                do {
-                    int d1, d2;
-                    if (i + 2 < s.length()
-                            && (d1 = hexToInt(s.charAt(i + 1))) != -1
-                            && (d2 = hexToInt(s.charAt(i + 2))) != -1) {
-                        out.write((byte) ((d1 << 4) + d2));
-                    } else if (throwOnFailure) {
-                        throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s);
-                    } else {
-                        byte[] replacement = "\ufffd".getBytes(charset);
-                        out.write(replacement, 0, replacement.length);
+            if (c == '%' && partiallyEncoded) {
+                // In case there are characters waiting to be encoded.
+                flushEncodingCharBuffer(builder, encoder, cBuffer);
+                builder.append('%');
+                continue;
+            }
+
+            if (c == ' ' && isRetained(' ')) {
+                builder.append('+');
+                continue;
+            }
+
+            if (isWhitelistedOrRetained(c)) {
+                flushEncodingCharBuffer(builder, encoder, cBuffer);
+                builder.append(c);
+                continue;
+            }
+
+            // Put the character in the queue for encoding.
+            cBuffer.put(c);
+        }
+        flushEncodingCharBuffer(builder, encoder, cBuffer);
+    }
+
+    private static void flushEncodingCharBuffer(
+            StringBuilder builder,
+            CharsetEncoder encoder,
+            CharBuffer cBuffer) {
+        if (cBuffer.position() == 0) {
+            return;
+        }
+        // We are reading from the buffer now.
+        cBuffer.flip();
+        ByteBuffer byteBuffer = ByteBuffer.allocate(
+                cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar()));
+        byteBuffer.position(0);
+        CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */);
+        // According to the {@code CharsetEncoder#encode} spec, the method returns underflow
+        // and leaves an empty output when all bytes were processed correctly.
+        if (result != CoderResult.UNDERFLOW) {
+            throw new IllegalArgumentException(
+                    "Error encoding, unexpected result ["
+                            + result.toString()
+                            + "] using encoder for ["
+                            + encoder.charset().name()
+                            + "]");
+        }
+        if (cBuffer.hasRemaining()) {
+            throw new IllegalArgumentException(
+                    "Encoder for [" + encoder.charset().name() + "] failed with underflow with "
+                            + "remaining input [" + cBuffer + "]");
+        }
+        // Need to flush in case the encoder saves internal state.
+        encoder.flush(byteBuffer);
+        if (result != CoderResult.UNDERFLOW) {
+            throw new IllegalArgumentException(
+                    "Error encoding, unexpected result ["
+                            + result.toString()
+                            + "] flushing encoder for ["
+                            + encoder.charset().name()
+                            + "]");
+        }
+        encoder.reset();
+
+        byteBuffer.flip();
+        // Write the encoded bytes.
+        while(byteBuffer.hasRemaining()) {
+            byte b = byteBuffer.get();
+            builder.append('%');
+            builder.append(intToHexDigit((b & 0xf0) >>> 4));
+            builder.append(intToHexDigit(b & 0x0f));
+
+        }
+        // Use the character buffer to write again.
+        cBuffer.flip();
+        cBuffer.limit(cBuffer.capacity());
+    }
+
+    private static char intToHexDigit(int b) {
+        if (b < 10) {
+            return (char) ('0' + b);
+        } else {
+            return (char) ('A' + b - 10);
+        }
+    }
+
+    /**
+     * Decode a string according to the rules of this decoder.
+     *
+     * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘
+     *   (white space)
+     * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for
+     *   invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets.
+     */
+    public static String decode(
+            String s, boolean convertPlus, Charset charset, boolean throwOnFailure) {
+        StringBuilder builder = new StringBuilder(s.length());
+        appendDecoded(builder, s, convertPlus, charset, throwOnFailure);
+        return builder.toString();
+    }
+
+    /**
+     * Character to be output when there's an error decoding an input.
+     */
+    private static final char INVALID_INPUT_CHARACTER = '\ufffd';
+
+    private static void appendDecoded(
+            StringBuilder builder,
+            String s,
+            boolean convertPlus,
+            Charset charset,
+            boolean throwOnFailure) {
+        CharsetDecoder decoder = charset.newDecoder()
+                .onMalformedInput(CodingErrorAction.REPORT)
+                .onUnmappableCharacter(CodingErrorAction.REPORT);
+        // Holds the bytes corresponding to the escaped chars being read (empty if the last char
+        // wasn't a escaped char).
+        ByteBuffer byteBuffer = ByteBuffer.allocate(s.length());
+        int i = 0;
+        while (i < s.length()) {
+            char c = s.charAt(i);
+            i++;
+            switch (c) {
+                case '+':
+                    flushDecodingByteAccumulator(
+                            builder, decoder, byteBuffer, throwOnFailure);
+                    builder.append(convertPlus ? ' ' : '+');
+                    break;
+                case '%':
+                    // Expect two characters representing a number in hex.
+                    byte hexValue = 0;
+                    for (int j = 0; j < 2; j++) {
+                        try {
+                            c = getNextCharacter(s, i, s.length(), null /* name */);
+                        } catch (URISyntaxException e) {
+                            // Unexpected end of input.
+                            if (throwOnFailure) {
+                                throw new IllegalArgumentException(e);
+                            } else {
+                                flushDecodingByteAccumulator(
+                                        builder, decoder, byteBuffer, throwOnFailure);
+                                builder.append(INVALID_INPUT_CHARACTER);
+                                return;
+                            }
+                        }
+                        i++;
+                        int newDigit = hexCharToValue(c);
+                        if (newDigit < 0) {
+                            if (throwOnFailure) {
+                                throw new IllegalArgumentException(
+                                        unexpectedCharacterException(s, null /* name */, c, i - 1));
+                            } else {
+                                flushDecodingByteAccumulator(
+                                        builder, decoder, byteBuffer, throwOnFailure);
+                                builder.append(INVALID_INPUT_CHARACTER);
+                                break;
+                            }
+                        }
+                        hexValue = (byte) (hexValue * 0x10 + newDigit);
                     }
-                    i += 3;
-                } while (i < s.length() && s.charAt(i) == '%');
-                result.append(new String(out.toByteArray(), charset));
-                out.reset();
-            } else {
-                if (convertPlus && c == '+') {
-                    c = ' ';
-                }
-                result.append(c);
-                i++;
+                    byteBuffer.put(hexValue);
+                    break;
+                default:
+                    flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
+                    builder.append(c);
             }
         }
-        return result.toString();
+        flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
+    }
+
+    private static void flushDecodingByteAccumulator(
+            StringBuilder builder,
+            CharsetDecoder decoder,
+            ByteBuffer byteBuffer,
+            boolean throwOnFailure) {
+        if (byteBuffer.position() == 0) {
+            return;
+        }
+        byteBuffer.flip();
+        try {
+            builder.append(decoder.decode(byteBuffer));
+        } catch (CharacterCodingException e) {
+            if (throwOnFailure) {
+                throw new IllegalArgumentException(e);
+            } else {
+                builder.append(INVALID_INPUT_CHARACTER);
+            }
+        } finally {
+            // Use the byte buffer to write again.
+            byteBuffer.flip();
+            byteBuffer.limit(byteBuffer.capacity());
+        }
     }
 
     /**
-     * Like {@link Character#digit}, but without support for non-ASCII
-     * characters.
+     * Equivalent to {@code decode(s, false, UTF_8, true)}
      */
-    private static int hexToInt(char c) {
-        if ('0' <= c && c <= '9') {
-            return c - '0';
-        } else if ('a' <= c && c <= 'f') {
-            return 10 + (c - 'a');
-        } else if ('A' <= c && c <= 'F') {
-            return 10 + (c - 'A');
-        } else {
-            return -1;
-        }
-    }
-
     public static String decode(String s) {
-        return decode(s, false, StandardCharsets.UTF_8, true);
+        return decode(
+                s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */);
     }
-
-    private static void appendHex(StringBuilder builder, String s, Charset charset) {
-        for (byte b : s.getBytes(charset)) {
-            appendHex(builder, b);
-        }
-    }
-
-    private static void appendHex(StringBuilder sb, byte b) {
-        sb.append('%');
-        sb.append(Byte.toHexString(b, true));
-    }
-}
+}
\ No newline at end of file

diff --git a/luni/src/test/java/libcore/net/UriCodecTest.java b/luni/src/test/java/libcore/net/UriCodecTest.java
new file mode 100644
index 0000000..503bd8b
--- /dev/null
+++ b/luni/src/test/java/libcore/net/UriCodecTest.java

@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package libcore.net;
+
+import junit.framework.TestCase;
+
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link UriCodec}
+ */
+public class UriCodecTest extends TestCase {
+    private static final UriCodec CODEC = new UriCodec() {
+        @Override
+        protected boolean isRetained(char c) {
+            // Note: this is a dubious codec specifying to retain the escape character '%'.
+            // Testing that is not treated as retained anyway..
+            return c == '$' || c == '%';
+        }
+    };
+
+    private static final String VALID_ENCODED_STRING = "a0b$CD%01a%23b%45c%67%89%abd%cd%efq";
+
+    public void testValidate_stringOK_passes() throws Exception {
+        assertEquals(
+                VALID_ENCODED_STRING,
+                CODEC.validate(
+                        VALID_ENCODED_STRING, 0, VALID_ENCODED_STRING.length(), "test OK string"));
+    }
+
+    // Hex codes in upper case are valid as well.
+    public void testValidate_stringUppercaseOK_passes() throws Exception {
+        String stringOKUpperCase = VALID_ENCODED_STRING.toUpperCase();
+        CODEC.validate(stringOKUpperCase, 0, stringOKUpperCase.length(), "test OK UC string");
+    }
+
+    // Characters before the start index are ignored.
+    public void testValidate_wrongCharsBeforeStart_passes() throws Exception {
+        assertEquals(VALID_ENCODED_STRING, CODEC.validate(
+                "%p" + VALID_ENCODED_STRING,
+                2,
+                VALID_ENCODED_STRING.length() + 2,
+                "test string"));
+    }
+
+    // Fails with character 'p', invalid after '%'
+    public void testValidate_wrongCharsAtStart_fails() throws Exception {
+        try {
+            CODEC.validate(
+                    "%p" + VALID_ENCODED_STRING,
+                    0,
+                    VALID_ENCODED_STRING.length() + 2,
+                    "test string");
+            fail("Expected URISyntaxException");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    // Fails with character 'p', invalid after '%'
+    public void testValidate_wrongCharsBeyondEnd_passes() throws Exception {
+        assertEquals(VALID_ENCODED_STRING, CODEC.validate(
+                VALID_ENCODED_STRING + "%p",
+                0,
+                VALID_ENCODED_STRING.length(),
+                "test string"));
+    }
+
+    // Fails with character 'p', invalid after '%'
+    public void testValidate_wrongCharsAtEnd_fails() throws Exception {
+        try {
+            CODEC.validate(
+                    VALID_ENCODED_STRING + "%p",
+                    0,
+                    VALID_ENCODED_STRING.length() + 2,
+                    "test string");
+            fail("Expected URISyntaxException");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    public void testValidate_secondDigitWrong_fails() throws Exception {
+        try {
+            CODEC.validate(
+                    VALID_ENCODED_STRING + "%1p",
+                    0,
+                    VALID_ENCODED_STRING.length() + 2,
+                    "test string");
+            fail("Expected URISyntaxException");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    public void testValidate_emptyString_passes() throws Exception {
+        assertEquals("", CODEC.validate("", 0, 0, "empty string"));
+    }
+
+    public void testValidate_stringEndingWithPercent_fails() throws Exception {
+        try {
+            CODEC.validate("a%", 0, 0, "a% string");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    public void testValidate_stringEndingWithPercentAndSingleDigit_fails() throws Exception {
+        try {
+            CODEC.validate("a%1", 0, 0, "a%1 string");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    public void testValidateSimple_stringOK_passes() throws Exception {
+        UriCodec.validateSimple(VALID_ENCODED_STRING, "$%");
+    }
+
+    // Hex codes in upper case are valid as well.
+    public void testValidateSimple_stringUppercaseOK_passes() throws Exception {
+        UriCodec.validateSimple(VALID_ENCODED_STRING.toUpperCase(), "$%");
+    }
+
+    // Fails with character 'p', invalid after '%'
+    public void testValidateSimple_wrongCharsAtStart_fails() throws Exception {
+        try {
+            UriCodec.validateSimple("%/" + VALID_ENCODED_STRING, "$%");
+            fail("Expected URISyntaxException");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    // Fails with character 'p', invalid after '%'
+    public void testValidateSimple_wrongCharsAtEnd_fails() throws Exception {
+        try {
+            UriCodec.validateSimple(VALID_ENCODED_STRING + "%/", "$%");
+            fail("Expected URISyntaxException");
+        } catch (URISyntaxException expected) {
+            // Expected.
+        }
+    }
+
+    public void testValidateSimple_emptyString_passes() throws Exception {
+        UriCodec.validateSimple("", "$%");
+    }
+
+    public void testValidateSimple_stringEndingWithPercent_passes() throws Exception {
+        UriCodec.validateSimple("a%", "$%");
+    }
+
+    public void testValidateSimple_stringEndingWithPercentAndSingleDigit_passes() throws Exception {
+        UriCodec.validateSimple("a%1", "$%");
+    }
+
+    public void testEncode_emptyString_returnsEmptyString() {
+        assertEquals("", CODEC.encode("", StandardCharsets.UTF_8));
+    }
+
+    public void testEncode() {
+        assertEquals("ab%2F$%C4%82%2512", CODEC.encode("ab/$\u0102%12", StandardCharsets.UTF_8));
+    }
+
+    public void testEncode_convertWhitespace() {
+        // Whitespace is not retained, output %20.
+        assertEquals("ab%2F$%C4%82%2512%20",
+                CODEC.encode("ab/$\u0102%12 ", StandardCharsets.UTF_8));
+
+
+        UriCodec withWhitespaceRetained = new UriCodec() {
+            @Override
+            protected boolean isRetained(char c) {
+                // Note: this is a dubious codec specifying to retain the escape character '%'.
+                // Testing that is not treated as retained anyway..
+                return c == '$' || c == '%' || c == ' ';
+            }
+        };
+        // Whitespace is retained, convert to plus.
+        assertEquals("ab%2F$%C4%82%2512+",
+                withWhitespaceRetained.encode("ab/$\u0102%12 ", StandardCharsets.UTF_8));
+    }
+
+    public void testEncode_partially_returnsPercentUnchanged() {
+        StringBuilder stringBuilder = new StringBuilder();
+        // Check it's really appending instead of returning a new builder.
+        stringBuilder.append("pp");
+        CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%");
+        // Returns % at the end instead of %25.
+        assertEquals("ppab%2F$%C4%82%", stringBuilder.toString());
+    }
+
+    public void testEncode_partially_returnsCharactersAfterPercentEncoded() {
+        StringBuilder stringBuilder = new StringBuilder();
+        // Check it's really appending instead of returning a new builder.
+        stringBuilder.append("pp");
+        CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%\u0102");
+        // Returns %C4%82 at the end.
+        assertEquals("ppab%2F$%C4%82%%C4%82", stringBuilder.toString());
+    }
+
+    public void testEncode_partially_returnsDigitsAfterPercentUnchanged() {
+        StringBuilder stringBuilder = new StringBuilder();
+        // Check it's really appending instead of returning a new builder.
+        stringBuilder.append("pp");
+        CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%38");
+        // Returns %38 at the end.
+        assertEquals("ppab%2F$%C4%82%38", stringBuilder.toString());
+    }
+
+    // Last character needs encoding (make sure we are flushing the buffer with chars to encode).
+    public void testEncode_lastCharacter() {
+        assertEquals("ab%2F$%C4%82%25%E0%A1%80",
+                CODEC.encode("ab/$\u0102%\u0840", StandardCharsets.UTF_8));
+    }
+
+    public void testDecode_emptyString_returnsEmptyString() {
+        assertEquals("", UriCodec.decode(""));
+    }
+
+    public void testDecode_wrongHexDigit_fails() {
+        try {
+            // %p in the end.
+            UriCodec.decode("ab%2f$%C4%82%25%e0%a1%80%p");
+            fail("Expected URISyntaxException");
+        } catch (IllegalArgumentException expected) {
+            // Expected.
+        }
+    }
+
+    public void testDecode_secondHexDigitWrong_fails() {
+        try {
+            // %1p in the end.
+            UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%1p");
+            fail("Expected URISyntaxException");
+        } catch (IllegalArgumentException expected) {
+            // Expected.
+        }
+    }
+
+    public void testDecode_endsWithPercent_fails() {
+        try {
+            // % in the end.
+            UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%");
+            fail("Expected URISyntaxException");
+        } catch (IllegalArgumentException expected) {
+            // Expected.
+        }
+    }
+
+    public void testDecode_dontThrowException_appendsUnknownCharacter() {
+        assertEquals("ab/$\u0102%\u0840\ufffd",
+                UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%",
+                        false /* convertPlus */,
+                        StandardCharsets.UTF_8,
+                        false /* throwOnFailure */));
+    }
+
+    public void testDecode_convertPlus() {
+        assertEquals("ab/$\u0102% \u0840",
+                UriCodec.decode("ab%2f$%c4%82%25+%e0%a1%80",
+                        true /* convertPlus */,
+                        StandardCharsets.UTF_8,
+                        false /* throwOnFailure */));
+    }
+
+    // Last character needs decoding (make sure we are flushing the buffer with chars to decode).
+    public void testDecode_lastCharacter() {
+        assertEquals("ab/$\u0102%\u0840",
+                UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80"));
+    }
+
+    // Check that a second row of encoded characters is decoded properly (internal buffers are
+    // reset properly).
+    public void testDecode_secondRowOfEncoded() {
+        assertEquals("ab/$\u0102%\u0840aa\u0840",
+                UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80aa%e0%a1%80"));
+    }
+}
commit	6d6f574e762052ccc8ed6791f2a267c66b0288d5	[log] [tgz]
author	Sergio Giro <sgiro@google.com>	Tue Sep 22 10:26:50 2015 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Tue Sep 22 10:26:50 2015 +0000
tree	c9d747a3158e2fa8c7034ce4c5ae14204724c7c2
parent	99ee6d838ae8550eba8f1009b13b2273470c2448 [diff]
parent	fda56554dbf5caf1af1982cad020a8dca5632244 [diff]