Skip to content

Commit

Permalink
8316681: Rewrite URLEncoder.encode to use small reusable buffers
Browse files Browse the repository at this point in the history
Reviewed-by: dfuchs, rriggs
  • Loading branch information
cl4es committed Sep 22, 2023
1 parent bd2439f commit c24c66d
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 19 deletions.
81 changes: 64 additions & 17 deletions src/java.base/share/classes/java/net/URLEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,13 @@
package java.net;

import java.io.UnsupportedEncodingException;
import java.io.CharArrayWriter;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet;
Expand Down Expand Up @@ -138,11 +143,6 @@ public class URLEncoder {
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
}

private static void encodeByte(StringBuilder out, byte b) {
out.append('%');
HexFormat.of().withUpperCase().toHexDigits(out, b);
}

/**
* You can't call the constructor.
*/
Expand Down Expand Up @@ -205,6 +205,8 @@ public static String encode(String s, String enc)
}
}

private static final int ENCODING_CHUNK_SIZE = 8;

/**
* Translates a string into {@code application/x-www-form-urlencoded}
* format using a specific {@linkplain Charset Charset}.
Expand Down Expand Up @@ -239,11 +241,16 @@ public static String encode(String s, Charset charset) {
}

StringBuilder out = new StringBuilder(s.length() << 1);
CharArrayWriter charArrayWriter = new CharArrayWriter();
if (i > 0) {
out.append(s, 0, i);
}

CharsetEncoder ce = charset.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE);
CharBuffer cb = CharBuffer.allocate(ENCODING_CHUNK_SIZE);
ByteBuffer bb = ByteBuffer.allocate((int)(ENCODING_CHUNK_SIZE * ce.maxBytesPerChar()));

while (i < s.length()) {
char c = s.charAt(i);
if (DONT_NEED_ENCODING.test(c)) {
Expand All @@ -255,7 +262,7 @@ public static String encode(String s, Charset charset) {
} else {
// convert to external encoding before hex conversion
do {
charArrayWriter.write(c);
cb.put(c);
/*
* If this character represents the start of a Unicode
* surrogate pair, then pass in two characters. It's not
Expand All @@ -268,23 +275,63 @@ public static String encode(String s, Charset charset) {
if ((i + 1) < s.length()) {
char d = s.charAt(i + 1);
if (Character.isLowSurrogate(d)) {
charArrayWriter.write(d);
cb.put(d);
i++;
}
}
}
// Limit to ENCODING_CHUNK_SIZE - 1 so that we can always fit in
// a surrogate pair on the next iteration
if (cb.position() >= ENCODING_CHUNK_SIZE - 1) {
flushToStringBuilder(out, ce, cb, bb, false);
}
i++;
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));

String str = charArrayWriter.toString();
byte[] ba = str.getBytes(charset);
for (byte b : ba) {
encodeByte(out, b);
}
charArrayWriter.reset();
flushToStringBuilder(out, ce, cb, bb, true);
}
}

return out.toString();
}

/**
* Encodes input chars in {@code cb} and appends the byte values in an escaped
* format ({@code "%XX"}) to {@code out}. The temporary byte buffer, {@code bb},
* must be able to accept {@code cb.position() * ce.maxBytesPerChar()} bytes.
*
* @param out the StringBuilder to output encoded and escaped bytes to
* @param ce charset encoder. Will be reset if endOfInput is true
* @param cb input buffer, will be cleared
* @param bb output buffer, will be cleared
* @param endOfInput true if this is the last flush for an encoding chunk,
* to all bytes in ce is flushed to out and reset
*/
private static void flushToStringBuilder(StringBuilder out,
CharsetEncoder ce,
CharBuffer cb,
ByteBuffer bb,
boolean endOfInput) {
cb.flip();
try {
CoderResult cr = ce.encode(cb, bb, endOfInput);
if (!cr.isUnderflow())
cr.throwException();
if (endOfInput) {
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
ce.reset();
}
} catch (CharacterCodingException x) {
throw new Error(x); // Can't happen
}
HexFormat hex = HexFormat.of().withUpperCase();
byte[] bytes = bb.array();
int len = bb.position();
for (int i = 0; i < len; i++) {
out.append('%');
hex.toHexDigits(out, bytes[i]);
}
cb.clear();
bb.clear();
}
}
3 changes: 1 addition & 2 deletions src/java.base/share/classes/java/util/HexFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -679,9 +679,8 @@ public char toHighHexDigit(int value) {
* @throws UncheckedIOException if an I/O exception occurs appending to the output
*/
public <A extends Appendable> A toHexDigits(A out, byte value) {
Objects.requireNonNull(out, "out");
try {
out.append(toHighHexDigit(value));
out.append(toHighHexDigit(value)); // implicit null-check
out.append(toLowHexDigit(value));
return out;
} catch (IOException ioe) {
Expand Down
3 changes: 3 additions & 0 deletions test/jdk/java/net/URLEncoder/SurrogatePairs.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ public static String[][] arguments() {
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\u0101\u0101\u0101\u0101\u0101\u0101\u0101\u0101\uDBFF\uDC00\u0101", "%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%C4%81%F4%8F%B0%80%C4%81"},
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
{"\uDE0A\uD83D", "%3F%3F"},
{"1\uDE0A\uD83D", "1%3F%3F"},
Expand Down
14 changes: 14 additions & 0 deletions test/micro/org/openjdk/bench/java/net/URLEncodeDecode.java
Original file line number Diff line number Diff line change
Expand Up @@ -190,4 +190,18 @@ public void testDecodeUTF8(Blackhole bh) throws UnsupportedEncodingException {
}


@Benchmark
public void testEncodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsEncode) {
bh.consume(java.net.URLEncoder.encode(s, StandardCharsets.ISO_8859_1));
}
}

@Benchmark
public void testDecodeLatin1(Blackhole bh) throws UnsupportedEncodingException {
for (String s : testStringsDecode) {
bh.consume(URLDecoder.decode(s, StandardCharsets.ISO_8859_1));
}
}

}

0 comments on commit c24c66d

Please sign in to comment.