From c2e1e613fca14f97be7e5f5f98702c17b17c3798 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Fri, 27 Dec 2024 16:22:21 +0100 Subject: [PATCH] Adding new mwthods to UriString --- CHANGELOG.md | 3 + Idna/Converter.php | 2 +- UriString.php | 183 +++++++++++++++++++++++++++++++++++++++++++++ UriStringTest.php | 53 +++++++++++++ 4 files changed, 240 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f17069d..0953748 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ All Notable changes to `League\Uri\Interfaces` will be documented in this file - `UriInterface::toNormalizedString` - `UriInterface::getUser` - `League\Uri\IPv6\Converter::isIpv6` +- `UriString::resolve` +- `UriString::removeDotSegments` +- `UriString::normalize` ### Fixed diff --git a/Idna/Converter.php b/Idna/Converter.php index b993e9e..6bd62a3 100644 --- a/Idna/Converter.php +++ b/Idna/Converter.php @@ -141,7 +141,7 @@ public static function toUnicode(Stringable|string $domain, Option|int|null $opt $domain = rawurldecode((string) $domain); if (false === stripos($domain, 'xn--')) { - return Result::fromIntl(['result' => $domain, 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]); + return Result::fromIntl(['result' => strtolower($domain), 'isTransitionalDifferent' => false, 'errors' => Error::NONE->value]); } FeatureDetection::supportsIdn(); diff --git a/UriString.php b/UriString.php index a791849..c831160 100644 --- a/UriString.php +++ b/UriString.php @@ -17,13 +17,20 @@ use League\Uri\Exceptions\MissingFeature; use League\Uri\Exceptions\SyntaxError; use League\Uri\Idna\Converter; +use League\Uri\Idna\Converter as IdnaConverter; +use League\Uri\IPv6\Converter as IPv6Converter; use Stringable; use function array_merge; +use function array_pop; +use function array_reduce; +use function end; use function explode; use function filter_var; +use function implode; use function inet_pton; use function preg_match; +use function preg_replace_callback; use function rawurldecode; use function sprintf; use function strpos; @@ -159,6 +166,16 @@ final class UriString */ private const REGEXP_IDN_PATTERN = '/[^\x20-\x7f]/'; + /** + * Unreserved characters. + * + * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-2.3 + */ + private const REGEXP_UNRESERVED_CHARACTERS = ',%(2[1-9A-Fa-f]|[3-7][0-9A-Fa-f]|61|62|64|65|66|7[AB]|5F),'; + + /** @var array */ + private const DOT_SEGMENTS = ['.' => 1, '..' => 1]; + /** * Only the address block fe80::/10 can have a Zone ID attach to * let's detect the link local significant 10 bits. @@ -262,6 +279,172 @@ public static function buildAuthority(array $components): ?string return $components['user'].':'.$components['pass'].$authority; } + /** + * Parses and normalizes the URI following RFC3986 destructive and non-destructive constraints. + * + * @throws SyntaxError if the URI is not parsable + * + * @return ComponentMap + */ + public static function normalize(Stringable|string $uri): array + { + $components = UriString::parse($uri); + if (null !== $components['scheme']) { + $components['scheme'] = strtolower($components['scheme']); + } + + if (null !== $components['host']) { + $components['host'] = IdnaConverter::toUnicode((string)IPv6Converter::compress($components['host']))->domain(); + } + + $path = $components['path']; + if ('/' === ($path[0] ?? '') || '' !== $components['scheme'].self::buildAuthority($components)) { + $path = self::removeDotSegments($path); + } + + $path = (string) self::decodeUnreservedCharacters($path); + if (null !== self::buildAuthority($components) && '' === $path) { + $path = '/'; + } + + $components['path'] = $path; + $components['query'] = (string) self::decodeUnreservedCharacters($components['query']); + $components['fragment'] = (string) self::decodeUnreservedCharacters($components['fragment']); + $components['user'] = self::decodeUnreservedCharacters($components['user']); + $components['pass'] = self::decodeUnreservedCharacters($components['pass']); + + return $components; + } + + private static function decodeUnreservedCharacters(?string $str): ?string + { + return match (true) { + null === $str, + '' === $str => $str, + default => preg_replace_callback( + self::REGEXP_UNRESERVED_CHARACTERS, + static fn (array $matches): string => rawurldecode($matches[0]), + $str + ) ?? '', + }; + } + + /** + * Resolves a URI against a base URI using RFC3986 rules. + * + * This method MUST retain the state of the submitted URI instance, and return + * a URI instance of the same type that contains the applied modifications. + * + * This method MUST be transparent when dealing with error and exceptions. + * It MUST not alter or silence them apart from validating its own parameters. + * + * @see https://www.rfc-editor.org/rfc/rfc3986.html#section-5 + * + * @throws SyntaxError if the BaseUri is not absolute or in absence of a BaseUri if the uri is not absolute + * + * @return ComponentMap + */ + public static function resolve(Stringable|string $uri, Stringable|string|null $baseUri = null): array + { + $uri = self::parse($uri); + $baseUri = null !== $baseUri ? self::parse($baseUri) : $uri; + if (null === $baseUri['scheme']) { + throw new SyntaxError('The base URI must be an absolute URI or null; If the base URI is null the URI must be an absolute URI.'); + } + + if (null !== $uri['scheme'] && '' !== $uri['scheme']) { + $uri['path'] = self::removeDotSegments($uri['path']); + + return $uri; + } + + if (null !== self::buildAuthority($uri)) { + $uri['scheme'] = $baseUri['scheme']; + $uri['path'] = self::removeDotSegments($uri['path']); + + return $uri; + } + + [$path, $query] = self::resolvePathAndQuery($uri, $baseUri); + $path = UriString::removeDotSegments($path); + if ('' !== $path && '/' !== $path[0] && null !== self::buildAuthority($baseUri)) { + $path = '/'.$path; + } + + $baseUri['path'] = $path; + $baseUri['query'] = $query; + $baseUri['fragment'] = $uri['fragment']; + + return $baseUri; + } + + /** + * Remove dot segments from the URI path as per RFC specification. + */ + public static function removeDotSegments(Stringable|string $path): string + { + $path = (string) $path; + if (!str_contains($path, '.')) { + return $path; + } + + $reducer = function (array $carry, string $segment): array { + if ('..' === $segment) { + array_pop($carry); + + return $carry; + } + + if (!isset(static::DOT_SEGMENTS[$segment])) { + $carry[] = $segment; + } + + return $carry; + }; + + $oldSegments = explode('/', $path); + $newPath = implode('/', array_reduce($oldSegments, $reducer(...), [])); + if (isset(static::DOT_SEGMENTS[end($oldSegments)])) { + $newPath .= '/'; + } + + return $newPath; + } + + /** + * Resolves an URI path and query component. + * + * @param ComponentMap $uri + * @param ComponentMap $baseUri + * + * @return array{0:string, 1:string|null} + */ + private static function resolvePathAndQuery(array $uri, array $baseUri): array + { + if (str_starts_with($uri['path'], '/')) { + return [$uri['path'], $uri['query']]; + } + + if ('' === $uri['path']) { + return [$baseUri['path'], $uri['query'] ?? $baseUri['query']]; + } + + $targetPath = $uri['path']; + if (null !== self::buildAuthority($baseUri) && '' === $baseUri['path']) { + $targetPath = '/'.$targetPath; + } + + if ('' !== $baseUri['path']) { + $segments = explode('/', $baseUri['path']); + array_pop($segments); + if ([] !== $segments) { + $targetPath = implode('/', $segments).'/'.$targetPath; + } + } + + return [$targetPath, $uri['query']]; + } + /** * Parse a URI string into its components. * diff --git a/UriStringTest.php b/UriStringTest.php index 87fce8b..1825d53 100644 --- a/UriStringTest.php +++ b/UriStringTest.php @@ -20,6 +20,8 @@ final class UriStringTest extends TestCase { + private const BASE_URI = 'http://a/b/c/d;p?q'; + #[DataProvider('validUriProvider')] public function testParseSucced(Stringable|string|int $uri, array $expected): void { @@ -974,4 +976,55 @@ public static function buildUriProvider(): array ], ]; } + + #[DataProvider('resolveProvider')] + public function testCreateResolve(string $baseUri, string $uri, string $expected): void + { + self::assertSame($expected, UriString::build(UriString::resolve($uri, $baseUri))); + } + + public static function resolveProvider(): array + { + return [ + 'base uri' => [self::BASE_URI, '', self::BASE_URI], + 'scheme' => [self::BASE_URI, 'http://d/e/f', 'http://d/e/f'], + 'path 1' => [self::BASE_URI, 'g', 'http://a/b/c/g'], + 'path 2' => [self::BASE_URI, './g', 'http://a/b/c/g'], + 'path 3' => [self::BASE_URI, 'g/', 'http://a/b/c/g/'], + 'path 4' => [self::BASE_URI, '/g', 'http://a/g'], + 'authority' => [self::BASE_URI, '//g', 'http://g'], + 'query' => [self::BASE_URI, '?y', 'http://a/b/c/d;p?y'], + 'path + query' => [self::BASE_URI, 'g?y', 'http://a/b/c/g?y'], + 'fragment' => [self::BASE_URI, '#s', 'http://a/b/c/d;p?q#s'], + 'path + fragment' => [self::BASE_URI, 'g#s', 'http://a/b/c/g#s'], + 'path + query + fragment' => [self::BASE_URI, 'g?y#s', 'http://a/b/c/g?y#s'], + 'single dot 1' => [self::BASE_URI, '.', 'http://a/b/c/'], + 'single dot 2' => [self::BASE_URI, './', 'http://a/b/c/'], + 'single dot 3' => [self::BASE_URI, './g/.', 'http://a/b/c/g/'], + 'single dot 4' => [self::BASE_URI, 'g/./h', 'http://a/b/c/g/h'], + 'double dot 1' => [self::BASE_URI, '..', 'http://a/b/'], + 'double dot 2' => [self::BASE_URI, '../', 'http://a/b/'], + 'double dot 3' => [self::BASE_URI, '../g', 'http://a/b/g'], + 'double dot 4' => [self::BASE_URI, '../..', 'http://a/'], + 'double dot 5' => [self::BASE_URI, '../../', 'http://a/'], + 'double dot 6' => [self::BASE_URI, '../../g', 'http://a/g'], + 'double dot 7' => [self::BASE_URI, '../../../g', 'http://a/g'], + 'double dot 8' => [self::BASE_URI, '../../../../g', 'http://a/g'], + 'double dot 9' => [self::BASE_URI, 'g/../h' , 'http://a/b/c/h'], + 'mulitple slashes' => [self::BASE_URI, 'foo////g', 'http://a/b/c/foo////g'], + 'complex path 1' => [self::BASE_URI, ';x', 'http://a/b/c/;x'], + 'complex path 2' => [self::BASE_URI, 'g;x', 'http://a/b/c/g;x'], + 'complex path 3' => [self::BASE_URI, 'g;x?y#s', 'http://a/b/c/g;x?y#s'], + 'complex path 4' => [self::BASE_URI, 'g;x=1/./y', 'http://a/b/c/g;x=1/y'], + 'complex path 5' => [self::BASE_URI, 'g;x=1/../y', 'http://a/b/c/y'], + 'dot segments presence 1' => [self::BASE_URI, '/./g', 'http://a/g'], + 'dot segments presence 2' => [self::BASE_URI, '/../g', 'http://a/g'], + 'dot segments presence 3' => [self::BASE_URI, 'g.', 'http://a/b/c/g.'], + 'dot segments presence 4' => [self::BASE_URI, '.g', 'http://a/b/c/.g'], + 'dot segments presence 5' => [self::BASE_URI, 'g..', 'http://a/b/c/g..'], + 'dot segments presence 6' => [self::BASE_URI, '..g', 'http://a/b/c/..g'], + 'origin uri without path' => ['http://h:b@a', 'b/../y', 'http://h:b@a/y'], + 'not same origin' => [self::BASE_URI, 'ftp://a/b/c/d', 'ftp://a/b/c/d'], + ]; + } }