Merge "libcore: rewrite of UriCodec"
diff --git a/luni/src/main/java/libcore/net/UriCodec.java b/luni/src/main/java/libcore/net/UriCodec.java
index dd18c5f..17b875d 100644
--- a/luni/src/main/java/libcore/net/UriCodec.java
+++ b/luni/src/main/java/libcore/net/UriCodec.java
@@ -1,219 +1,379 @@
/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
+ * Copyright (C) 2015 The Android Open Source Project
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
*/
package libcore.net;
import java.io.ByteArrayOutputStream;
import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
/**
- * Encodes and decodes {@code application/x-www-form-urlencoded} content.
- * Subclasses define exactly which characters are legal.
+ * Encodes and decodes “application/x-www-form-urlencoded” content.
*
- * <p>By default, UTF-8 is used to encode escaped characters. A single input
- * character like "\u0080" may be encoded to multiple octets like %C2%80.
+ * Subclasses define “isRetained”, which decides which chars need to be escaped and which don’t.
+ * Output is encoded as UTF-8 by default. I.e, each character (or surrogate pair) is converted to
+ * its equivalent UTF-8 encoded byte sequence, which is then converted to it’s escaped form.
+ * e.g a 4 byte sequence might look like” %c6%ef%e0%e8”
*/
public abstract class UriCodec {
-
/**
- * Returns true if {@code c} does not need to be escaped.
+ * Returns true iff. ‘c’ does not need to be escaped.
+ * 'a’ - ‘z’ , ‘A’ - ‘Z’ and ‘0’ - ‘9’ are always considered valid (i.e, don’t need to be
+ * escaped. This set is referred to as the ``whitelist''.
*/
protected abstract boolean isRetained(char c);
+ private static boolean isWhitelisted(char c) {
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9');
+ }
+
+ private boolean isWhitelistedOrRetained(char c) {
+ return (isWhitelisted(c) || isRetained(c))
+ // Make sure % is not retained.
+ && (c != '%');
+ }
+
/**
- * Throws if {@code s} is invalid according to this encoder.
+ * Throw URISyntaxException if any of the characters in the range [start, end) are not valid
+ * according to this codec.
+ * - If a char is in the whitelist or retained, it is valid both escaped and unescaped.
+ * - All escaped octets appearing in the input are structurally valid hex, i.e convertible to
+ * decimals.
+ *
+ * On success, the substring [start, end) is returned.
+ * {@code name} is not used, except to generate debugging info.
*/
public final String validate(String uri, int start, int end, String name)
throws URISyntaxException {
- for (int i = start; i < end; ) {
- char ch = uri.charAt(i);
- if ((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= '0' && ch <= '9')
- || isRetained(ch)) {
- i++;
- } else if (ch == '%') {
- if (i + 2 >= end) {
- throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i);
+ int i = start;
+ while (i < end) {
+ char c = uri.charAt(i++);
+ if (isWhitelistedOrRetained(c)) {
+ continue;
+ }
+ // c is either '%' or character not allowed in a uri.
+ if (c != '%') {
+ throw unexpectedCharacterException(uri, name, c, i - 1);
+ }
+ // Expect two characters representing a number in hex.
+ for (int j = 0; j < 2; j++) {
+ c = getNextCharacter(uri, i++, end, name);
+ if (hexCharToValue(c) < 0) {
+ throw unexpectedCharacterException(uri, name, c, i - 1);
}
- int d1 = hexToInt(uri.charAt(i + 1));
- int d2 = hexToInt(uri.charAt(i + 2));
- if (d1 == -1 || d2 == -1) {
- throw new URISyntaxException(uri, "Invalid % sequence: "
- + uri.substring(i, i + 3) + " in " + name, i);
- }
- i += 3;
- } else {
- throw new URISyntaxException(uri, "Illegal character in " + name, i);
}
}
return uri.substring(start, end);
}
/**
- * Throws if {@code s} contains characters that are not letters, digits or
+ * Interprets a char as hex digits, returning a number from -1 (invalid char) to 15 ('f').
+ */
+ private static int hexCharToValue(char c) {
+ if('0' <= c && c <= '9') {
+ return c - '0';
+ }
+ if ('a' <= c && c <= 'f') {
+ return 10 + c - 'a';
+ }
+ if ('A' <= c && c <= 'F') {
+ return 10 + c - 'A';
+ }
+ return -1;
+ }
+
+ private static URISyntaxException unexpectedCharacterException(
+ String uri, String name, char unexpected, int index) {
+ String nameString = (name == null) ? "" : " in [" + name + "]";
+ return new URISyntaxException(
+ uri, "Unexpected character" + nameString + ": " + unexpected, index);
+ }
+
+ private static char getNextCharacter(String uri, int index, int end, String name)
+ throws URISyntaxException {
+ if (index >= end) {
+ String nameString = (name == null) ? "" : " in [" + name + "]";
+ throw new URISyntaxException(
+ uri, "Unexpected end of string" + nameString, index);
+ }
+ return uri.charAt(index);
+ }
+
+ /**
+ * Throws {@link URISyntaxException} if any character in {@code uri} is neither whitelisted nor
* in {@code legal}.
*/
- public static void validateSimple(String s, String legal)
- throws URISyntaxException {
- for (int i = 0; i < s.length(); i++) {
- char ch = s.charAt(i);
- if (!((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= '0' && ch <= '9')
- || legal.indexOf(ch) > -1)) {
- throw new URISyntaxException(s, "Illegal character", i);
+ public static void validateSimple(String uri, String legal) throws URISyntaxException {
+ for (int i = 0; i < uri.length(); i++) {
+ char c = uri.charAt(i);
+ if (!isWhitelisted(c) && legal.indexOf(c) < 0) {
+ throw unexpectedCharacterException(uri, null /* name */, c, i);
}
}
}
/**
- * Encodes {@code s} and appends the result to {@code builder}.
+ * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
*
- * @param isPartiallyEncoded true to fix input that has already been
- * partially or fully encoded. For example, input of "hello%20world" is
- * unchanged with isPartiallyEncoded=true but would be double-escaped to
- * "hello%2520world" otherwise.
+ * @throws IllegalArgumentException if the encoder is unable to encode a sequence of bytes.
*/
- private void appendEncoded(StringBuilder builder, String s, Charset charset,
- boolean isPartiallyEncoded) {
- if (s == null) {
- throw new NullPointerException("s == null");
- }
-
- int escapeStart = -1;
- for (int i = 0; i < s.length(); i++) {
- char c = s.charAt(i);
- if ((c >= 'a' && c <= 'z')
- || (c >= 'A' && c <= 'Z')
- || (c >= '0' && c <= '9')
- || isRetained(c)
- || (c == '%' && isPartiallyEncoded)) {
- if (escapeStart != -1) {
- appendHex(builder, s.substring(escapeStart, i), charset);
- escapeStart = -1;
- }
- if (c == '%' && isPartiallyEncoded) {
- // this is an encoded 3-character sequence like "%20"
- builder.append(s, i, Math.min(i + 3, s.length()));
- i += 2;
- } else if (c == ' ') {
- builder.append('+');
- } else {
- builder.append(c);
- }
- } else if (escapeStart == -1) {
- escapeStart = i;
- }
- }
- if (escapeStart != -1) {
- appendHex(builder, s.substring(escapeStart, s.length()), charset);
- }
- }
-
public final String encode(String s, Charset charset) {
- // Guess a bit larger for encoded form
- StringBuilder builder = new StringBuilder(s.length() + 16);
+ StringBuilder builder = new StringBuilder(s.length());
appendEncoded(builder, s, charset, false);
return builder.toString();
}
+ /**
+ * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
+ *
+ * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
+ */
public final void appendEncoded(StringBuilder builder, String s) {
appendEncoded(builder, s, StandardCharsets.UTF_8, false);
}
+ /**
+ * Encodes the string {@code s} as per the rules of this encoder (see class level comment).
+ *
+ * Encoded output is appended to {@code builder}. This uses the default output encoding (UTF-8).
+ * This method must produce partially encoded output. What this means is that if encoded octets
+ * appear in the input string, they are passed through unmodified, instead of being double
+ * escaped. Consider a decoder operating on the global whitelist dealing with a string
+ * “foo%25bar”. With this method, the output will be “foo%25bar”, but with appendEncoded, it
+ * will be double encoded into “foo%2525bar”.
+ */
public final void appendPartiallyEncoded(StringBuilder builder, String s) {
appendEncoded(builder, s, StandardCharsets.UTF_8, true);
}
- /**
- * @param convertPlus true to convert '+' to ' '.
- * @param throwOnFailure true to throw an IllegalArgumentException on
- * invalid escape sequences; false to replace them with the replacement
- * character (U+fffd).
- */
- public static String decode(String s, boolean convertPlus, Charset charset,
- boolean throwOnFailure) {
- if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) {
- return s;
- }
-
- StringBuilder result = new StringBuilder(s.length());
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- for (int i = 0; i < s.length();) {
+ private void appendEncoded(
+ StringBuilder builder, String s, Charset charset, boolean partiallyEncoded) {
+ CharsetEncoder encoder = charset.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ CharBuffer cBuffer = CharBuffer.allocate(s.length());
+ for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
- if (c == '%') {
- do {
- int d1, d2;
- if (i + 2 < s.length()
- && (d1 = hexToInt(s.charAt(i + 1))) != -1
- && (d2 = hexToInt(s.charAt(i + 2))) != -1) {
- out.write((byte) ((d1 << 4) + d2));
- } else if (throwOnFailure) {
- throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s);
- } else {
- byte[] replacement = "\ufffd".getBytes(charset);
- out.write(replacement, 0, replacement.length);
+ if (c == '%' && partiallyEncoded) {
+ // In case there are characters waiting to be encoded.
+ flushEncodingCharBuffer(builder, encoder, cBuffer);
+ builder.append('%');
+ continue;
+ }
+
+ if (c == ' ' && isRetained(' ')) {
+ builder.append('+');
+ continue;
+ }
+
+ if (isWhitelistedOrRetained(c)) {
+ flushEncodingCharBuffer(builder, encoder, cBuffer);
+ builder.append(c);
+ continue;
+ }
+
+ // Put the character in the queue for encoding.
+ cBuffer.put(c);
+ }
+ flushEncodingCharBuffer(builder, encoder, cBuffer);
+ }
+
+ private static void flushEncodingCharBuffer(
+ StringBuilder builder,
+ CharsetEncoder encoder,
+ CharBuffer cBuffer) {
+ if (cBuffer.position() == 0) {
+ return;
+ }
+ // We are reading from the buffer now.
+ cBuffer.flip();
+ ByteBuffer byteBuffer = ByteBuffer.allocate(
+ cBuffer.remaining() * (int) Math.ceil(encoder.maxBytesPerChar()));
+ byteBuffer.position(0);
+ CoderResult result = encoder.encode(cBuffer, byteBuffer, true /* endOfInput */);
+ // According to the {@code CharsetEncoder#encode} spec, the method returns underflow
+ // and leaves an empty output when all bytes were processed correctly.
+ if (result != CoderResult.UNDERFLOW) {
+ throw new IllegalArgumentException(
+ "Error encoding, unexpected result ["
+ + result.toString()
+ + "] using encoder for ["
+ + encoder.charset().name()
+ + "]");
+ }
+ if (cBuffer.hasRemaining()) {
+ throw new IllegalArgumentException(
+ "Encoder for [" + encoder.charset().name() + "] failed with underflow with "
+ + "remaining input [" + cBuffer + "]");
+ }
+ // Need to flush in case the encoder saves internal state.
+ encoder.flush(byteBuffer);
+ if (result != CoderResult.UNDERFLOW) {
+ throw new IllegalArgumentException(
+ "Error encoding, unexpected result ["
+ + result.toString()
+ + "] flushing encoder for ["
+ + encoder.charset().name()
+ + "]");
+ }
+ encoder.reset();
+
+ byteBuffer.flip();
+ // Write the encoded bytes.
+ while(byteBuffer.hasRemaining()) {
+ byte b = byteBuffer.get();
+ builder.append('%');
+ builder.append(intToHexDigit((b & 0xf0) >>> 4));
+ builder.append(intToHexDigit(b & 0x0f));
+
+ }
+ // Use the character buffer to write again.
+ cBuffer.flip();
+ cBuffer.limit(cBuffer.capacity());
+ }
+
+ private static char intToHexDigit(int b) {
+ if (b < 10) {
+ return (char) ('0' + b);
+ } else {
+ return (char) ('A' + b - 10);
+ }
+ }
+
+ /**
+ * Decode a string according to the rules of this decoder.
+ *
+ * - if {@code convertPlus == true} all ‘+’ chars in the decoded output are converted to ‘ ‘
+ * (white space)
+ * - if {@code throwOnFailure == true}, an {@link IllegalArgumentException} is thrown for
+ * invalid inputs. Else, U+FFFd is emitted to the output in place of invalid input octets.
+ */
+ public static String decode(
+ String s, boolean convertPlus, Charset charset, boolean throwOnFailure) {
+ StringBuilder builder = new StringBuilder(s.length());
+ appendDecoded(builder, s, convertPlus, charset, throwOnFailure);
+ return builder.toString();
+ }
+
+ /**
+ * Character to be output when there's an error decoding an input.
+ */
+ private static final char INVALID_INPUT_CHARACTER = '\ufffd';
+
+ private static void appendDecoded(
+ StringBuilder builder,
+ String s,
+ boolean convertPlus,
+ Charset charset,
+ boolean throwOnFailure) {
+ CharsetDecoder decoder = charset.newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ // Holds the bytes corresponding to the escaped chars being read (empty if the last char
+ // wasn't a escaped char).
+ ByteBuffer byteBuffer = ByteBuffer.allocate(s.length());
+ int i = 0;
+ while (i < s.length()) {
+ char c = s.charAt(i);
+ i++;
+ switch (c) {
+ case '+':
+ flushDecodingByteAccumulator(
+ builder, decoder, byteBuffer, throwOnFailure);
+ builder.append(convertPlus ? ' ' : '+');
+ break;
+ case '%':
+ // Expect two characters representing a number in hex.
+ byte hexValue = 0;
+ for (int j = 0; j < 2; j++) {
+ try {
+ c = getNextCharacter(s, i, s.length(), null /* name */);
+ } catch (URISyntaxException e) {
+ // Unexpected end of input.
+ if (throwOnFailure) {
+ throw new IllegalArgumentException(e);
+ } else {
+ flushDecodingByteAccumulator(
+ builder, decoder, byteBuffer, throwOnFailure);
+ builder.append(INVALID_INPUT_CHARACTER);
+ return;
+ }
+ }
+ i++;
+ int newDigit = hexCharToValue(c);
+ if (newDigit < 0) {
+ if (throwOnFailure) {
+ throw new IllegalArgumentException(
+ unexpectedCharacterException(s, null /* name */, c, i - 1));
+ } else {
+ flushDecodingByteAccumulator(
+ builder, decoder, byteBuffer, throwOnFailure);
+ builder.append(INVALID_INPUT_CHARACTER);
+ break;
+ }
+ }
+ hexValue = (byte) (hexValue * 0x10 + newDigit);
}
- i += 3;
- } while (i < s.length() && s.charAt(i) == '%');
- result.append(new String(out.toByteArray(), charset));
- out.reset();
- } else {
- if (convertPlus && c == '+') {
- c = ' ';
- }
- result.append(c);
- i++;
+ byteBuffer.put(hexValue);
+ break;
+ default:
+ flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
+ builder.append(c);
}
}
- return result.toString();
+ flushDecodingByteAccumulator(builder, decoder, byteBuffer, throwOnFailure);
+ }
+
+ private static void flushDecodingByteAccumulator(
+ StringBuilder builder,
+ CharsetDecoder decoder,
+ ByteBuffer byteBuffer,
+ boolean throwOnFailure) {
+ if (byteBuffer.position() == 0) {
+ return;
+ }
+ byteBuffer.flip();
+ try {
+ builder.append(decoder.decode(byteBuffer));
+ } catch (CharacterCodingException e) {
+ if (throwOnFailure) {
+ throw new IllegalArgumentException(e);
+ } else {
+ builder.append(INVALID_INPUT_CHARACTER);
+ }
+ } finally {
+ // Use the byte buffer to write again.
+ byteBuffer.flip();
+ byteBuffer.limit(byteBuffer.capacity());
+ }
}
/**
- * Like {@link Character#digit}, but without support for non-ASCII
- * characters.
+ * Equivalent to {@code decode(s, false, UTF_8, true)}
*/
- private static int hexToInt(char c) {
- if ('0' <= c && c <= '9') {
- return c - '0';
- } else if ('a' <= c && c <= 'f') {
- return 10 + (c - 'a');
- } else if ('A' <= c && c <= 'F') {
- return 10 + (c - 'A');
- } else {
- return -1;
- }
- }
-
public static String decode(String s) {
- return decode(s, false, StandardCharsets.UTF_8, true);
+ return decode(
+ s, false /* convertPlus */, StandardCharsets.UTF_8, true /* throwOnFailure */);
}
-
- private static void appendHex(StringBuilder builder, String s, Charset charset) {
- for (byte b : s.getBytes(charset)) {
- appendHex(builder, b);
- }
- }
-
- private static void appendHex(StringBuilder sb, byte b) {
- sb.append('%');
- sb.append(Byte.toHexString(b, true));
- }
-}
+}
\ No newline at end of file
diff --git a/luni/src/test/java/libcore/net/UriCodecTest.java b/luni/src/test/java/libcore/net/UriCodecTest.java
new file mode 100644
index 0000000..503bd8b
--- /dev/null
+++ b/luni/src/test/java/libcore/net/UriCodecTest.java
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package libcore.net;
+
+import junit.framework.TestCase;
+
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Tests for {@link UriCodec}
+ */
+public class UriCodecTest extends TestCase {
+ private static final UriCodec CODEC = new UriCodec() {
+ @Override
+ protected boolean isRetained(char c) {
+ // Note: this is a dubious codec specifying to retain the escape character '%'.
+ // Testing that is not treated as retained anyway..
+ return c == '$' || c == '%';
+ }
+ };
+
+ private static final String VALID_ENCODED_STRING = "a0b$CD%01a%23b%45c%67%89%abd%cd%efq";
+
+ public void testValidate_stringOK_passes() throws Exception {
+ assertEquals(
+ VALID_ENCODED_STRING,
+ CODEC.validate(
+ VALID_ENCODED_STRING, 0, VALID_ENCODED_STRING.length(), "test OK string"));
+ }
+
+ // Hex codes in upper case are valid as well.
+ public void testValidate_stringUppercaseOK_passes() throws Exception {
+ String stringOKUpperCase = VALID_ENCODED_STRING.toUpperCase();
+ CODEC.validate(stringOKUpperCase, 0, stringOKUpperCase.length(), "test OK UC string");
+ }
+
+ // Characters before the start index are ignored.
+ public void testValidate_wrongCharsBeforeStart_passes() throws Exception {
+ assertEquals(VALID_ENCODED_STRING, CODEC.validate(
+ "%p" + VALID_ENCODED_STRING,
+ 2,
+ VALID_ENCODED_STRING.length() + 2,
+ "test string"));
+ }
+
+ // Fails with character 'p', invalid after '%'
+ public void testValidate_wrongCharsAtStart_fails() throws Exception {
+ try {
+ CODEC.validate(
+ "%p" + VALID_ENCODED_STRING,
+ 0,
+ VALID_ENCODED_STRING.length() + 2,
+ "test string");
+ fail("Expected URISyntaxException");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ // Fails with character 'p', invalid after '%'
+ public void testValidate_wrongCharsBeyondEnd_passes() throws Exception {
+ assertEquals(VALID_ENCODED_STRING, CODEC.validate(
+ VALID_ENCODED_STRING + "%p",
+ 0,
+ VALID_ENCODED_STRING.length(),
+ "test string"));
+ }
+
+ // Fails with character 'p', invalid after '%'
+ public void testValidate_wrongCharsAtEnd_fails() throws Exception {
+ try {
+ CODEC.validate(
+ VALID_ENCODED_STRING + "%p",
+ 0,
+ VALID_ENCODED_STRING.length() + 2,
+ "test string");
+ fail("Expected URISyntaxException");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ public void testValidate_secondDigitWrong_fails() throws Exception {
+ try {
+ CODEC.validate(
+ VALID_ENCODED_STRING + "%1p",
+ 0,
+ VALID_ENCODED_STRING.length() + 2,
+ "test string");
+ fail("Expected URISyntaxException");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ public void testValidate_emptyString_passes() throws Exception {
+ assertEquals("", CODEC.validate("", 0, 0, "empty string"));
+ }
+
+ public void testValidate_stringEndingWithPercent_fails() throws Exception {
+ try {
+ CODEC.validate("a%", 0, 0, "a% string");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ public void testValidate_stringEndingWithPercentAndSingleDigit_fails() throws Exception {
+ try {
+ CODEC.validate("a%1", 0, 0, "a%1 string");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ public void testValidateSimple_stringOK_passes() throws Exception {
+ UriCodec.validateSimple(VALID_ENCODED_STRING, "$%");
+ }
+
+ // Hex codes in upper case are valid as well.
+ public void testValidateSimple_stringUppercaseOK_passes() throws Exception {
+ UriCodec.validateSimple(VALID_ENCODED_STRING.toUpperCase(), "$%");
+ }
+
+ // Fails with character 'p', invalid after '%'
+ public void testValidateSimple_wrongCharsAtStart_fails() throws Exception {
+ try {
+ UriCodec.validateSimple("%/" + VALID_ENCODED_STRING, "$%");
+ fail("Expected URISyntaxException");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ // Fails with character 'p', invalid after '%'
+ public void testValidateSimple_wrongCharsAtEnd_fails() throws Exception {
+ try {
+ UriCodec.validateSimple(VALID_ENCODED_STRING + "%/", "$%");
+ fail("Expected URISyntaxException");
+ } catch (URISyntaxException expected) {
+ // Expected.
+ }
+ }
+
+ public void testValidateSimple_emptyString_passes() throws Exception {
+ UriCodec.validateSimple("", "$%");
+ }
+
+ public void testValidateSimple_stringEndingWithPercent_passes() throws Exception {
+ UriCodec.validateSimple("a%", "$%");
+ }
+
+ public void testValidateSimple_stringEndingWithPercentAndSingleDigit_passes() throws Exception {
+ UriCodec.validateSimple("a%1", "$%");
+ }
+
+ public void testEncode_emptyString_returnsEmptyString() {
+ assertEquals("", CODEC.encode("", StandardCharsets.UTF_8));
+ }
+
+ public void testEncode() {
+ assertEquals("ab%2F$%C4%82%2512", CODEC.encode("ab/$\u0102%12", StandardCharsets.UTF_8));
+ }
+
+ public void testEncode_convertWhitespace() {
+ // Whitespace is not retained, output %20.
+ assertEquals("ab%2F$%C4%82%2512%20",
+ CODEC.encode("ab/$\u0102%12 ", StandardCharsets.UTF_8));
+
+
+ UriCodec withWhitespaceRetained = new UriCodec() {
+ @Override
+ protected boolean isRetained(char c) {
+ // Note: this is a dubious codec specifying to retain the escape character '%'.
+ // Testing that is not treated as retained anyway..
+ return c == '$' || c == '%' || c == ' ';
+ }
+ };
+ // Whitespace is retained, convert to plus.
+ assertEquals("ab%2F$%C4%82%2512+",
+ withWhitespaceRetained.encode("ab/$\u0102%12 ", StandardCharsets.UTF_8));
+ }
+
+ public void testEncode_partially_returnsPercentUnchanged() {
+ StringBuilder stringBuilder = new StringBuilder();
+ // Check it's really appending instead of returning a new builder.
+ stringBuilder.append("pp");
+ CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%");
+ // Returns % at the end instead of %25.
+ assertEquals("ppab%2F$%C4%82%", stringBuilder.toString());
+ }
+
+ public void testEncode_partially_returnsCharactersAfterPercentEncoded() {
+ StringBuilder stringBuilder = new StringBuilder();
+ // Check it's really appending instead of returning a new builder.
+ stringBuilder.append("pp");
+ CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%\u0102");
+ // Returns %C4%82 at the end.
+ assertEquals("ppab%2F$%C4%82%%C4%82", stringBuilder.toString());
+ }
+
+ public void testEncode_partially_returnsDigitsAfterPercentUnchanged() {
+ StringBuilder stringBuilder = new StringBuilder();
+ // Check it's really appending instead of returning a new builder.
+ stringBuilder.append("pp");
+ CODEC.appendPartiallyEncoded(stringBuilder, "ab/$\u0102%38");
+ // Returns %38 at the end.
+ assertEquals("ppab%2F$%C4%82%38", stringBuilder.toString());
+ }
+
+ // Last character needs encoding (make sure we are flushing the buffer with chars to encode).
+ public void testEncode_lastCharacter() {
+ assertEquals("ab%2F$%C4%82%25%E0%A1%80",
+ CODEC.encode("ab/$\u0102%\u0840", StandardCharsets.UTF_8));
+ }
+
+ public void testDecode_emptyString_returnsEmptyString() {
+ assertEquals("", UriCodec.decode(""));
+ }
+
+ public void testDecode_wrongHexDigit_fails() {
+ try {
+ // %p in the end.
+ UriCodec.decode("ab%2f$%C4%82%25%e0%a1%80%p");
+ fail("Expected URISyntaxException");
+ } catch (IllegalArgumentException expected) {
+ // Expected.
+ }
+ }
+
+ public void testDecode_secondHexDigitWrong_fails() {
+ try {
+ // %1p in the end.
+ UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%1p");
+ fail("Expected URISyntaxException");
+ } catch (IllegalArgumentException expected) {
+ // Expected.
+ }
+ }
+
+ public void testDecode_endsWithPercent_fails() {
+ try {
+ // % in the end.
+ UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%");
+ fail("Expected URISyntaxException");
+ } catch (IllegalArgumentException expected) {
+ // Expected.
+ }
+ }
+
+ public void testDecode_dontThrowException_appendsUnknownCharacter() {
+ assertEquals("ab/$\u0102%\u0840\ufffd",
+ UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80%",
+ false /* convertPlus */,
+ StandardCharsets.UTF_8,
+ false /* throwOnFailure */));
+ }
+
+ public void testDecode_convertPlus() {
+ assertEquals("ab/$\u0102% \u0840",
+ UriCodec.decode("ab%2f$%c4%82%25+%e0%a1%80",
+ true /* convertPlus */,
+ StandardCharsets.UTF_8,
+ false /* throwOnFailure */));
+ }
+
+ // Last character needs decoding (make sure we are flushing the buffer with chars to decode).
+ public void testDecode_lastCharacter() {
+ assertEquals("ab/$\u0102%\u0840",
+ UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80"));
+ }
+
+ // Check that a second row of encoded characters is decoded properly (internal buffers are
+ // reset properly).
+ public void testDecode_secondRowOfEncoded() {
+ assertEquals("ab/$\u0102%\u0840aa\u0840",
+ UriCodec.decode("ab%2f$%c4%82%25%e0%a1%80aa%e0%a1%80"));
+ }
+}