From 2a0961250334eb6dafef94336bfa871e10582f2a Mon Sep 17 00:00:00 2001 From: Trustin Lee Date: Tue, 30 Jul 2024 15:52:49 +0900 Subject: [PATCH] Allow some control characters in a request target Motivation: RFC 3986 doesn't prohibit control characters in a request target as long as it's percent-encoded. However, `DefaultRequestTarget` doesn't allow any control characters except `\r` and `\n` in a query string. This is to reject potentially harmful paths such as `/foo\nbar` but it can be overly strict to some users. Modifications: - `DefaultRequestTarget` now allows the following control characters additionally: - TAB (0x09) - FS (0x1C) - GS (0x1D) - RS (0x1E) - US (0x1F) - However, other control characters will remain prohibited until there's a good reason to allow them. - Simplified `appendOneByte()` by introducing additional `BitSet`s of percent-encodable characters. Result: - A user can send and receive an HTTP request whose `:path` contains the following characters now: - TAB (0x09) - FS (0x1C) - GS (0x1D) - RS (0x1E) - US (0x1F) --- .../internal/common/DefaultRequestTarget.java | 82 +++++++++++++------ .../common/DefaultRequestTargetTest.java | 35 ++++++-- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/core/src/main/java/com/linecorp/armeria/internal/common/DefaultRequestTarget.java b/core/src/main/java/com/linecorp/armeria/internal/common/DefaultRequestTarget.java index 47d0fd0800f..1b55fe4bf18 100644 --- a/core/src/main/java/com/linecorp/armeria/internal/common/DefaultRequestTarget.java +++ b/core/src/main/java/com/linecorp/armeria/internal/common/DefaultRequestTarget.java @@ -36,6 +36,36 @@ public final class DefaultRequestTarget implements RequestTarget { private static final String ALLOWED_COMMON_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?@!$&'()*,;="; + private static final String ALLOWED_COMMON_ENCODED_CHARS; + + static { + // Note: + // RFC 3986 doesn't prohibit control characters when percent-encoded. + // However, Armeria does prohibit most control characters except the following for security reasons: + // + // - TAB (0x09) + // - FS (0x1C) + // - GS (0x1D) + // - RS (0x1E) + // - US (0x1F) + // - LF (0x0A, allowed only in a query string) + // - CR (0x0D, allowed only in a query string) + // + // Please open a new issue to start a discussion if you think there are harmless control characters + // we should allow additionally. + final StringBuilder buf = new StringBuilder(128); + buf.append("\t\u001c\u001d\u001e\u001f"); + + // Add all non-control ASCII characters. + // Note that we don't need to add all unicode codepoints here + // because non-ASCII characters will always be percent-encoded. + for (char ch = ' '; ch <= '~'; ch++) { + assert !Character.isISOControl(ch) : "Attempted to add a control character: " + (int) ch; + buf.append(ch); + } + ALLOWED_COMMON_ENCODED_CHARS = buf.toString(); + } + /** * The lookup table for the characters allowed in a path. */ @@ -51,6 +81,21 @@ public final class DefaultRequestTarget implements RequestTarget { */ private static final BitSet FRAGMENT_ALLOWED = PATH_ALLOWED; + /** + * The lookup table for the characters allowed in a path when percent-encoded. + */ + private static final BitSet PATH_ALLOWED_IF_ENCODED = toBitSet(ALLOWED_COMMON_ENCODED_CHARS); + + /** + * The lookup table for the characters allowed in a query when percent-encoded. + */ + private static final BitSet QUERY_ALLOWED_IF_ENCODED = toBitSet(ALLOWED_COMMON_ENCODED_CHARS + "\n\r"); + + /** + * The lookup table for the characters allowed in a fragment when percent-encoded. + */ + private static final BitSet FRAGMENT_ALLOWED_IF_ENCODED = PATH_ALLOWED_IF_ENCODED; + /** * The lookup table for the characters that whose percent encoding must be preserved * when used in a path because whether they are percent-encoded or not affects @@ -83,16 +128,18 @@ private static BitSet toBitSet(String chars) { } private enum ComponentType { - CLIENT_PATH(PATH_ALLOWED, PATH_MUST_PRESERVE_ENCODING), - SERVER_PATH(PATH_ALLOWED, PATH_MUST_PRESERVE_ENCODING), - QUERY(QUERY_ALLOWED, QUERY_MUST_PRESERVE_ENCODING), - FRAGMENT(FRAGMENT_ALLOWED, FRAGMENT_MUST_PRESERVE_ENCODING); + CLIENT_PATH(PATH_ALLOWED, PATH_ALLOWED_IF_ENCODED, PATH_MUST_PRESERVE_ENCODING), + SERVER_PATH(PATH_ALLOWED, PATH_ALLOWED_IF_ENCODED, PATH_MUST_PRESERVE_ENCODING), + QUERY(QUERY_ALLOWED, QUERY_ALLOWED_IF_ENCODED, QUERY_MUST_PRESERVE_ENCODING), + FRAGMENT(FRAGMENT_ALLOWED, FRAGMENT_ALLOWED_IF_ENCODED, FRAGMENT_MUST_PRESERVE_ENCODING); private final BitSet allowed; + private final BitSet allowedIfEncoded; private final BitSet mustPreserveEncoding; - ComponentType(BitSet allowed, BitSet mustPreserveEncoding) { + ComponentType(BitSet allowed, BitSet allowedIfEncoded, BitSet mustPreserveEncoding) { this.allowed = allowed; + this.allowedIfEncoded = allowedIfEncoded; this.mustPreserveEncoding = mustPreserveEncoding; } @@ -100,6 +147,10 @@ boolean isAllowed(int cp) { return allowed.get(cp); } + boolean isAllowedIfEncoded(int cp) { + return (cp & 0xFFFFFF80) != 0 || allowedIfEncoded.get(cp); + } + boolean mustPreserveEncoding(int cp) { return mustPreserveEncoding.get(cp); } @@ -760,23 +811,6 @@ private static Bytes decodePercentsAndEncodeToUtf8(String value, int start, int } private static boolean appendOneByte(Bytes buf, int cp, boolean wasSlash, ComponentType type) { - if (cp == 0x7F) { - // Reject the control character: 0x7F - return false; - } - - if (cp >>> 5 == 0) { - // Reject the control characters: 0x00..0x1F - if (type != ComponentType.QUERY) { - return false; - } - - if (cp != 0x0A && cp != 0x0D && cp != 0x09) { - // .. except 0x0A (LF), 0x0D (CR) and 0x09 (TAB) because they are used in a form. - return false; - } - } - if (cp == '/' && type == ComponentType.SERVER_PATH) { if (!wasSlash) { buf.ensure(1); @@ -788,8 +822,10 @@ private static boolean appendOneByte(Bytes buf, int cp, boolean wasSlash, Compon buf.ensure(1); if (type.isAllowed(cp)) { buf.add((byte) cp); - } else { + } else if (type.isAllowedIfEncoded(cp)) { buf.addEncoded((byte) cp); + } else { + return false; } } diff --git a/core/src/test/java/com/linecorp/armeria/internal/common/DefaultRequestTargetTest.java b/core/src/test/java/com/linecorp/armeria/internal/common/DefaultRequestTargetTest.java index 0eebdf1deea..f959f039552 100644 --- a/core/src/test/java/com/linecorp/armeria/internal/common/DefaultRequestTargetTest.java +++ b/core/src/test/java/com/linecorp/armeria/internal/common/DefaultRequestTargetTest.java @@ -185,18 +185,17 @@ void shouldRejectInvalidPercentEncoding(Mode mode) { void shouldRejectControlChars(Mode mode) { assertRejected(parse(mode, "/\0")); assertRejected(parse(mode, "/a\nb")); + assertRejected(parse(mode, "/a\rb")); assertRejected(parse(mode, "/a\u007fb")); // Escaped assertRejected(parse(mode, "/%00")); - assertRejected(parse(mode, "/a%09b")); assertRejected(parse(mode, "/a%0ab")); assertRejected(parse(mode, "/a%0db")); assertRejected(parse(mode, "/a%7fb")); // With query string assertRejected(parse(mode, "/\0?c")); - assertRejected(parse(mode, "/a\tb?c")); assertRejected(parse(mode, "/a\nb?c")); assertRejected(parse(mode, "/a\rb?c")); assertRejected(parse(mode, "/a\u007fb?c")); @@ -207,13 +206,33 @@ void shouldRejectControlChars(Mode mode) { assertRejected(parse(mode, "/?a\u007fb")); assertRejected(parse(mode, "/?a%7Fb")); - // However, 0x0A, 0x0D, 0x09 should be accepted in a query string. + // However, 0x09, 0x1C-0x1F should be accepted in a path. + assertAccepted(parse(mode, "/a\tb"), "/a%09b"); + assertAccepted(parse(mode, "/a\u001cb"), "/a%1Cb"); + assertAccepted(parse(mode, "/a\u001db"), "/a%1Db"); + assertAccepted(parse(mode, "/a\u001eb"), "/a%1Eb"); + assertAccepted(parse(mode, "/a\u001fb"), "/a%1Fb"); + assertAccepted(parse(mode, "/a%09b"), "/a%09b"); + assertAccepted(parse(mode, "/a%1Cb"), "/a%1Cb"); + assertAccepted(parse(mode, "/a%1Db"), "/a%1Db"); + assertAccepted(parse(mode, "/a%1Eb"), "/a%1Eb"); + assertAccepted(parse(mode, "/a%1Fb"), "/a%1Fb"); + + // However, 0x0A, 0x0D, 0x09, 0x1C-0x1F should be accepted in a query string. assertAccepted(parse(mode, "/?a\tb"), "/", "a%09b"); assertAccepted(parse(mode, "/?a\nb"), "/", "a%0Ab"); assertAccepted(parse(mode, "/?a\rb"), "/", "a%0Db"); + assertAccepted(parse(mode, "/?a\u001cb"), "/", "a%1Cb"); + assertAccepted(parse(mode, "/?a\u001db"), "/", "a%1Db"); + assertAccepted(parse(mode, "/?a\u001eb"), "/", "a%1Eb"); + assertAccepted(parse(mode, "/?a\u001fb"), "/", "a%1Fb"); assertAccepted(parse(mode, "/?a%09b"), "/", "a%09b"); assertAccepted(parse(mode, "/?a%0Ab"), "/", "a%0Ab"); assertAccepted(parse(mode, "/?a%0Db"), "/", "a%0Db"); + assertAccepted(parse(mode, "/?a%1Cb"), "/", "a%1Cb"); + assertAccepted(parse(mode, "/?a%1Db"), "/", "a%1Db"); + assertAccepted(parse(mode, "/?a%1Eb"), "/", "a%1Eb"); + assertAccepted(parse(mode, "/?a%1Fb"), "/", "a%1Fb"); if (mode == Mode.CLIENT) { // All sort of control characters should be rejected in a fragment. @@ -221,12 +240,18 @@ void shouldRejectControlChars(Mode mode) { assertRejected(forClient("/#%00")); assertRejected(forClient("/#a\u007fb")); assertRejected(forClient("/#a%7Fb")); - assertRejected(forClient("/#a\tb")); assertRejected(forClient("/#a\nb")); assertRejected(forClient("/#a\rb")); - assertRejected(forClient("/#a%09b")); assertRejected(forClient("/#a%0Ab")); assertRejected(forClient("/#a%0Db")); + + // ... except the following. + assertAccepted(forClient("/#a\tb"), "/", null, "a%09b"); + assertAccepted(forClient("/#a%09b"), "/", null, "a%09b"); + assertAccepted(forClient("/#a%1Cb"), "/", null, "a%1Cb"); + assertAccepted(forClient("/#a%1Db"), "/", null, "a%1Db"); + assertAccepted(forClient("/#a%1Eb"), "/", null, "a%1Eb"); + assertAccepted(forClient("/#a%1Fb"), "/", null, "a%1Fb"); } }