annotate vendor/guzzlehttp/psr7/src/UriNormalizer.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 4c8ae668cc8c
children
rev   line source
Chris@0 1 <?php
Chris@0 2 namespace GuzzleHttp\Psr7;
Chris@0 3
Chris@0 4 use Psr\Http\Message\UriInterface;
Chris@0 5
Chris@0 6 /**
Chris@0 7 * Provides methods to normalize and compare URIs.
Chris@0 8 *
Chris@0 9 * @author Tobias Schultze
Chris@0 10 *
Chris@0 11 * @link https://tools.ietf.org/html/rfc3986#section-6
Chris@0 12 */
Chris@0 13 final class UriNormalizer
Chris@0 14 {
Chris@0 15 /**
Chris@0 16 * Default normalizations which only include the ones that preserve semantics.
Chris@0 17 *
Chris@0 18 * self::CAPITALIZE_PERCENT_ENCODING | self::DECODE_UNRESERVED_CHARACTERS | self::CONVERT_EMPTY_PATH |
Chris@0 19 * self::REMOVE_DEFAULT_HOST | self::REMOVE_DEFAULT_PORT | self::REMOVE_DOT_SEGMENTS
Chris@0 20 */
Chris@0 21 const PRESERVING_NORMALIZATIONS = 63;
Chris@0 22
Chris@0 23 /**
Chris@0 24 * All letters within a percent-encoding triplet (e.g., "%3A") are case-insensitive, and should be capitalized.
Chris@0 25 *
Chris@0 26 * Example: http://example.org/a%c2%b1b → http://example.org/a%C2%B1b
Chris@0 27 */
Chris@0 28 const CAPITALIZE_PERCENT_ENCODING = 1;
Chris@0 29
Chris@0 30 /**
Chris@0 31 * Decodes percent-encoded octets of unreserved characters.
Chris@0 32 *
Chris@0 33 * For consistency, percent-encoded octets in the ranges of ALPHA (%41–%5A and %61–%7A), DIGIT (%30–%39),
Chris@0 34 * hyphen (%2D), period (%2E), underscore (%5F), or tilde (%7E) should not be created by URI producers and,
Chris@0 35 * when found in a URI, should be decoded to their corresponding unreserved characters by URI normalizers.
Chris@0 36 *
Chris@0 37 * Example: http://example.org/%7Eusern%61me/ → http://example.org/~username/
Chris@0 38 */
Chris@0 39 const DECODE_UNRESERVED_CHARACTERS = 2;
Chris@0 40
Chris@0 41 /**
Chris@0 42 * Converts the empty path to "/" for http and https URIs.
Chris@0 43 *
Chris@0 44 * Example: http://example.org → http://example.org/
Chris@0 45 */
Chris@0 46 const CONVERT_EMPTY_PATH = 4;
Chris@0 47
Chris@0 48 /**
Chris@0 49 * Removes the default host of the given URI scheme from the URI.
Chris@0 50 *
Chris@0 51 * Only the "file" scheme defines the default host "localhost".
Chris@0 52 * All of `file:/myfile`, `file:///myfile`, and `file://localhost/myfile`
Chris@0 53 * are equivalent according to RFC 3986. The first format is not accepted
Chris@0 54 * by PHPs stream functions and thus already normalized implicitly to the
Chris@0 55 * second format in the Uri class. See `GuzzleHttp\Psr7\Uri::composeComponents`.
Chris@0 56 *
Chris@0 57 * Example: file://localhost/myfile → file:///myfile
Chris@0 58 */
Chris@0 59 const REMOVE_DEFAULT_HOST = 8;
Chris@0 60
Chris@0 61 /**
Chris@0 62 * Removes the default port of the given URI scheme from the URI.
Chris@0 63 *
Chris@0 64 * Example: http://example.org:80/ → http://example.org/
Chris@0 65 */
Chris@0 66 const REMOVE_DEFAULT_PORT = 16;
Chris@0 67
Chris@0 68 /**
Chris@0 69 * Removes unnecessary dot-segments.
Chris@0 70 *
Chris@0 71 * Dot-segments in relative-path references are not removed as it would
Chris@0 72 * change the semantics of the URI reference.
Chris@0 73 *
Chris@0 74 * Example: http://example.org/../a/b/../c/./d.html → http://example.org/a/c/d.html
Chris@0 75 */
Chris@0 76 const REMOVE_DOT_SEGMENTS = 32;
Chris@0 77
Chris@0 78 /**
Chris@0 79 * Paths which include two or more adjacent slashes are converted to one.
Chris@0 80 *
Chris@0 81 * Webservers usually ignore duplicate slashes and treat those URIs equivalent.
Chris@0 82 * But in theory those URIs do not need to be equivalent. So this normalization
Chris@0 83 * may change the semantics. Encoded slashes (%2F) are not removed.
Chris@0 84 *
Chris@0 85 * Example: http://example.org//foo///bar.html → http://example.org/foo/bar.html
Chris@0 86 */
Chris@0 87 const REMOVE_DUPLICATE_SLASHES = 64;
Chris@0 88
Chris@0 89 /**
Chris@0 90 * Sort query parameters with their values in alphabetical order.
Chris@0 91 *
Chris@0 92 * However, the order of parameters in a URI may be significant (this is not defined by the standard).
Chris@0 93 * So this normalization is not safe and may change the semantics of the URI.
Chris@0 94 *
Chris@0 95 * Example: ?lang=en&article=fred → ?article=fred&lang=en
Chris@0 96 *
Chris@0 97 * Note: The sorting is neither locale nor Unicode aware (the URI query does not get decoded at all) as the
Chris@0 98 * purpose is to be able to compare URIs in a reproducible way, not to have the params sorted perfectly.
Chris@0 99 */
Chris@0 100 const SORT_QUERY_PARAMETERS = 128;
Chris@0 101
Chris@0 102 /**
Chris@0 103 * Returns a normalized URI.
Chris@0 104 *
Chris@0 105 * The scheme and host component are already normalized to lowercase per PSR-7 UriInterface.
Chris@0 106 * This methods adds additional normalizations that can be configured with the $flags parameter.
Chris@0 107 *
Chris@0 108 * PSR-7 UriInterface cannot distinguish between an empty component and a missing component as
Chris@0 109 * getQuery(), getFragment() etc. always return a string. This means the URIs "/?#" and "/" are
Chris@0 110 * treated equivalent which is not necessarily true according to RFC 3986. But that difference
Chris@0 111 * is highly uncommon in reality. So this potential normalization is implied in PSR-7 as well.
Chris@0 112 *
Chris@0 113 * @param UriInterface $uri The URI to normalize
Chris@0 114 * @param int $flags A bitmask of normalizations to apply, see constants
Chris@0 115 *
Chris@0 116 * @return UriInterface The normalized URI
Chris@0 117 * @link https://tools.ietf.org/html/rfc3986#section-6.2
Chris@0 118 */
Chris@0 119 public static function normalize(UriInterface $uri, $flags = self::PRESERVING_NORMALIZATIONS)
Chris@0 120 {
Chris@0 121 if ($flags & self::CAPITALIZE_PERCENT_ENCODING) {
Chris@0 122 $uri = self::capitalizePercentEncoding($uri);
Chris@0 123 }
Chris@0 124
Chris@0 125 if ($flags & self::DECODE_UNRESERVED_CHARACTERS) {
Chris@0 126 $uri = self::decodeUnreservedCharacters($uri);
Chris@0 127 }
Chris@0 128
Chris@0 129 if ($flags & self::CONVERT_EMPTY_PATH && $uri->getPath() === '' &&
Chris@0 130 ($uri->getScheme() === 'http' || $uri->getScheme() === 'https')
Chris@0 131 ) {
Chris@0 132 $uri = $uri->withPath('/');
Chris@0 133 }
Chris@0 134
Chris@0 135 if ($flags & self::REMOVE_DEFAULT_HOST && $uri->getScheme() === 'file' && $uri->getHost() === 'localhost') {
Chris@0 136 $uri = $uri->withHost('');
Chris@0 137 }
Chris@0 138
Chris@0 139 if ($flags & self::REMOVE_DEFAULT_PORT && $uri->getPort() !== null && Uri::isDefaultPort($uri)) {
Chris@0 140 $uri = $uri->withPort(null);
Chris@0 141 }
Chris@0 142
Chris@0 143 if ($flags & self::REMOVE_DOT_SEGMENTS && !Uri::isRelativePathReference($uri)) {
Chris@0 144 $uri = $uri->withPath(UriResolver::removeDotSegments($uri->getPath()));
Chris@0 145 }
Chris@0 146
Chris@0 147 if ($flags & self::REMOVE_DUPLICATE_SLASHES) {
Chris@0 148 $uri = $uri->withPath(preg_replace('#//++#', '/', $uri->getPath()));
Chris@0 149 }
Chris@0 150
Chris@0 151 if ($flags & self::SORT_QUERY_PARAMETERS && $uri->getQuery() !== '') {
Chris@0 152 $queryKeyValues = explode('&', $uri->getQuery());
Chris@0 153 sort($queryKeyValues);
Chris@0 154 $uri = $uri->withQuery(implode('&', $queryKeyValues));
Chris@0 155 }
Chris@0 156
Chris@0 157 return $uri;
Chris@0 158 }
Chris@0 159
Chris@0 160 /**
Chris@0 161 * Whether two URIs can be considered equivalent.
Chris@0 162 *
Chris@0 163 * Both URIs are normalized automatically before comparison with the given $normalizations bitmask. The method also
Chris@0 164 * accepts relative URI references and returns true when they are equivalent. This of course assumes they will be
Chris@0 165 * resolved against the same base URI. If this is not the case, determination of equivalence or difference of
Chris@0 166 * relative references does not mean anything.
Chris@0 167 *
Chris@0 168 * @param UriInterface $uri1 An URI to compare
Chris@0 169 * @param UriInterface $uri2 An URI to compare
Chris@0 170 * @param int $normalizations A bitmask of normalizations to apply, see constants
Chris@0 171 *
Chris@0 172 * @return bool
Chris@0 173 * @link https://tools.ietf.org/html/rfc3986#section-6.1
Chris@0 174 */
Chris@0 175 public static function isEquivalent(UriInterface $uri1, UriInterface $uri2, $normalizations = self::PRESERVING_NORMALIZATIONS)
Chris@0 176 {
Chris@0 177 return (string) self::normalize($uri1, $normalizations) === (string) self::normalize($uri2, $normalizations);
Chris@0 178 }
Chris@0 179
Chris@0 180 private static function capitalizePercentEncoding(UriInterface $uri)
Chris@0 181 {
Chris@0 182 $regex = '/(?:%[A-Fa-f0-9]{2})++/';
Chris@0 183
Chris@0 184 $callback = function (array $match) {
Chris@0 185 return strtoupper($match[0]);
Chris@0 186 };
Chris@0 187
Chris@0 188 return
Chris@0 189 $uri->withPath(
Chris@0 190 preg_replace_callback($regex, $callback, $uri->getPath())
Chris@0 191 )->withQuery(
Chris@0 192 preg_replace_callback($regex, $callback, $uri->getQuery())
Chris@0 193 );
Chris@0 194 }
Chris@0 195
Chris@0 196 private static function decodeUnreservedCharacters(UriInterface $uri)
Chris@0 197 {
Chris@0 198 $regex = '/%(?:2D|2E|5F|7E|3[0-9]|[46][1-9A-F]|[57][0-9A])/i';
Chris@0 199
Chris@0 200 $callback = function (array $match) {
Chris@0 201 return rawurldecode($match[0]);
Chris@0 202 };
Chris@0 203
Chris@0 204 return
Chris@0 205 $uri->withPath(
Chris@0 206 preg_replace_callback($regex, $callback, $uri->getPath())
Chris@0 207 )->withQuery(
Chris@0 208 preg_replace_callback($regex, $callback, $uri->getQuery())
Chris@0 209 );
Chris@0 210 }
Chris@0 211
Chris@0 212 private function __construct()
Chris@0 213 {
Chris@0 214 // cannot be instantiated
Chris@0 215 }
Chris@0 216 }