Mercurial > hg > isophonics-drupal-site
comparison core/lib/Drupal/Component/Utility/UrlHelper.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 129ea1e6d783 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 | |
3 namespace Drupal\Component\Utility; | |
4 | |
5 /** | |
6 * Helper class URL based methods. | |
7 * | |
8 * @ingroup utility | |
9 */ | |
10 class UrlHelper { | |
11 | |
12 /** | |
13 * The list of allowed protocols. | |
14 * | |
15 * @var array | |
16 */ | |
17 protected static $allowedProtocols = ['http', 'https']; | |
18 | |
19 /** | |
20 * Parses an array into a valid, rawurlencoded query string. | |
21 * | |
22 * rawurlencode() is RFC3986 compliant, and as a consequence RFC3987 | |
23 * compliant. The latter defines the required format of "URLs" in HTML5. | |
24 * urlencode() is almost the same as rawurlencode(), except that it encodes | |
25 * spaces as "+" instead of "%20". This makes its result non compliant to | |
26 * RFC3986 and as a consequence non compliant to RFC3987 and as a consequence | |
27 * not valid as a "URL" in HTML5. | |
28 * | |
29 * @todo Remove this function once PHP 5.4 is required as we can use just | |
30 * http_build_query() directly. | |
31 * | |
32 * @param array $query | |
33 * The query parameter array to be processed; for instance, | |
34 * \Drupal::request()->query->all(). | |
35 * @param string $parent | |
36 * (optional) Internal use only. Used to build the $query array key for | |
37 * nested items. Defaults to an empty string. | |
38 * | |
39 * @return string | |
40 * A rawurlencoded string which can be used as or appended to the URL query | |
41 * string. | |
42 * | |
43 * @ingroup php_wrappers | |
44 */ | |
45 public static function buildQuery(array $query, $parent = '') { | |
46 $params = []; | |
47 | |
48 foreach ($query as $key => $value) { | |
49 $key = ($parent ? $parent . rawurlencode('[' . $key . ']') : rawurlencode($key)); | |
50 | |
51 // Recurse into children. | |
52 if (is_array($value)) { | |
53 $params[] = static::buildQuery($value, $key); | |
54 } | |
55 // If a query parameter value is NULL, only append its key. | |
56 elseif (!isset($value)) { | |
57 $params[] = $key; | |
58 } | |
59 else { | |
60 // For better readability of paths in query strings, we decode slashes. | |
61 $params[] = $key . '=' . str_replace('%2F', '/', rawurlencode($value)); | |
62 } | |
63 } | |
64 | |
65 return implode('&', $params); | |
66 } | |
67 | |
68 /** | |
69 * Filters a URL query parameter array to remove unwanted elements. | |
70 * | |
71 * @param array $query | |
72 * An array to be processed. | |
73 * @param array $exclude | |
74 * (optional) A list of $query array keys to remove. Use "parent[child]" to | |
75 * exclude nested items. | |
76 * @param string $parent | |
77 * Internal use only. Used to build the $query array key for nested items. | |
78 * | |
79 * @return | |
80 * An array containing query parameters. | |
81 */ | |
82 public static function filterQueryParameters(array $query, array $exclude = [], $parent = '') { | |
83 // If $exclude is empty, there is nothing to filter. | |
84 if (empty($exclude)) { | |
85 return $query; | |
86 } | |
87 elseif (!$parent) { | |
88 $exclude = array_flip($exclude); | |
89 } | |
90 | |
91 $params = []; | |
92 foreach ($query as $key => $value) { | |
93 $string_key = ($parent ? $parent . '[' . $key . ']' : $key); | |
94 if (isset($exclude[$string_key])) { | |
95 continue; | |
96 } | |
97 | |
98 if (is_array($value)) { | |
99 $params[$key] = static::filterQueryParameters($value, $exclude, $string_key); | |
100 } | |
101 else { | |
102 $params[$key] = $value; | |
103 } | |
104 } | |
105 | |
106 return $params; | |
107 } | |
108 | |
109 /** | |
110 * Parses a URL string into its path, query, and fragment components. | |
111 * | |
112 * This function splits both internal paths like @code node?b=c#d @endcode and | |
113 * external URLs like @code https://example.com/a?b=c#d @endcode into their | |
114 * component parts. See | |
115 * @link http://tools.ietf.org/html/rfc3986#section-3 RFC 3986 @endlink for an | |
116 * explanation of what the component parts are. | |
117 * | |
118 * Note that, unlike the RFC, when passed an external URL, this function | |
119 * groups the scheme, authority, and path together into the path component. | |
120 * | |
121 * @param string $url | |
122 * The internal path or external URL string to parse. | |
123 * | |
124 * @return array | |
125 * An associative array containing: | |
126 * - path: The path component of $url. If $url is an external URL, this | |
127 * includes the scheme, authority, and path. | |
128 * - query: An array of query parameters from $url, if they exist. | |
129 * - fragment: The fragment component from $url, if it exists. | |
130 * | |
131 * @see \Drupal\Core\Utility\LinkGenerator | |
132 * @see http://tools.ietf.org/html/rfc3986 | |
133 * | |
134 * @ingroup php_wrappers | |
135 */ | |
136 public static function parse($url) { | |
137 $options = [ | |
138 'path' => NULL, | |
139 'query' => [], | |
140 'fragment' => '', | |
141 ]; | |
142 | |
143 // External URLs: not using parse_url() here, so we do not have to rebuild | |
144 // the scheme, host, and path without having any use for it. | |
145 // The URL is considered external if it contains the '://' delimiter. Since | |
146 // a URL can also be passed as a query argument, we check if this delimiter | |
147 // appears in front of the '?' query argument delimiter. | |
148 $scheme_delimiter_position = strpos($url, '://'); | |
149 $query_delimiter_position = strpos($url, '?'); | |
150 if ($scheme_delimiter_position !== FALSE && ($query_delimiter_position === FALSE || $scheme_delimiter_position < $query_delimiter_position)) { | |
151 // Split off the fragment, if any. | |
152 if (strpos($url, '#') !== FALSE) { | |
153 list($url, $options['fragment']) = explode('#', $url, 2); | |
154 } | |
155 | |
156 // Split off everything before the query string into 'path'. | |
157 $parts = explode('?', $url); | |
158 | |
159 // Don't support URLs without a path, like 'http://'. | |
160 list(, $path) = explode('://', $parts[0], 2); | |
161 if ($path != '') { | |
162 $options['path'] = $parts[0]; | |
163 } | |
164 // If there is a query string, transform it into keyed query parameters. | |
165 if (isset($parts[1])) { | |
166 parse_str($parts[1], $options['query']); | |
167 } | |
168 } | |
169 // Internal URLs. | |
170 else { | |
171 // parse_url() does not support relative URLs, so make it absolute. For | |
172 // instance, the relative URL "foo/bar:1" isn't properly parsed. | |
173 $parts = parse_url('http://example.com/' . $url); | |
174 // Strip the leading slash that was just added. | |
175 $options['path'] = substr($parts['path'], 1); | |
176 if (isset($parts['query'])) { | |
177 parse_str($parts['query'], $options['query']); | |
178 } | |
179 if (isset($parts['fragment'])) { | |
180 $options['fragment'] = $parts['fragment']; | |
181 } | |
182 } | |
183 | |
184 return $options; | |
185 } | |
186 | |
187 /** | |
188 * Encodes a Drupal path for use in a URL. | |
189 * | |
190 * For aesthetic reasons slashes are not escaped. | |
191 * | |
192 * @param string $path | |
193 * The Drupal path to encode. | |
194 * | |
195 * @return string | |
196 * The encoded path. | |
197 */ | |
198 public static function encodePath($path) { | |
199 return str_replace('%2F', '/', rawurlencode($path)); | |
200 } | |
201 | |
202 /** | |
203 * Determines whether a path is external to Drupal. | |
204 * | |
205 * An example of an external path is http://example.com. If a path cannot be | |
206 * assessed by Drupal's menu handler, then we must treat it as potentially | |
207 * insecure. | |
208 * | |
209 * @param string $path | |
210 * The internal path or external URL being linked to, such as "node/34" or | |
211 * "http://example.com/foo". | |
212 * | |
213 * @return bool | |
214 * TRUE or FALSE, where TRUE indicates an external path. | |
215 */ | |
216 public static function isExternal($path) { | |
217 $colonpos = strpos($path, ':'); | |
218 // Some browsers treat \ as / so normalize to forward slashes. | |
219 $path = str_replace('\\', '/', $path); | |
220 // If the path starts with 2 slashes then it is always considered an | |
221 // external URL without an explicit protocol part. | |
222 return (strpos($path, '//') === 0) | |
223 // Leading control characters may be ignored or mishandled by browsers, | |
224 // so assume such a path may lead to an external location. The \p{C} | |
225 // character class matches all UTF-8 control, unassigned, and private | |
226 // characters. | |
227 || (preg_match('/^\p{C}/u', $path) !== 0) | |
228 // Avoid calling static::stripDangerousProtocols() if there is any slash | |
229 // (/), hash (#) or question_mark (?) before the colon (:) occurrence - | |
230 // if any - as this would clearly mean it is not a URL. | |
231 || ($colonpos !== FALSE | |
232 && !preg_match('![/?#]!', substr($path, 0, $colonpos)) | |
233 && static::stripDangerousProtocols($path) == $path); | |
234 } | |
235 | |
236 /** | |
237 * Determines if an external URL points to this installation. | |
238 * | |
239 * @param string $url | |
240 * A string containing an external URL, such as "http://example.com/foo". | |
241 * @param string $base_url | |
242 * The base URL string to check against, such as "http://example.com/" | |
243 * | |
244 * @return bool | |
245 * TRUE if the URL has the same domain and base path. | |
246 * | |
247 * @throws \InvalidArgumentException | |
248 * Exception thrown when a either $url or $bath_url are not fully qualified. | |
249 */ | |
250 public static function externalIsLocal($url, $base_url) { | |
251 $url_parts = parse_url($url); | |
252 $base_parts = parse_url($base_url); | |
253 | |
254 if (empty($base_parts['host']) || empty($url_parts['host'])) { | |
255 throw new \InvalidArgumentException('A path was passed when a fully qualified domain was expected.'); | |
256 } | |
257 | |
258 if (!isset($url_parts['path']) || !isset($base_parts['path'])) { | |
259 return (!isset($base_parts['path']) || $base_parts['path'] == '/') | |
260 && ($url_parts['host'] == $base_parts['host']); | |
261 } | |
262 else { | |
263 // When comparing base paths, we need a trailing slash to make sure a | |
264 // partial URL match isn't occurring. Since base_path() always returns | |
265 // with a trailing slash, we don't need to add the trailing slash here. | |
266 return ($url_parts['host'] == $base_parts['host'] && stripos($url_parts['path'], $base_parts['path']) === 0); | |
267 } | |
268 } | |
269 | |
270 /** | |
271 * Processes an HTML attribute value and strips dangerous protocols from URLs. | |
272 * | |
273 * @param string $string | |
274 * The string with the attribute value. | |
275 * | |
276 * @return string | |
277 * Cleaned up and HTML-escaped version of $string. | |
278 */ | |
279 public static function filterBadProtocol($string) { | |
280 // Get the plain text representation of the attribute value (i.e. its | |
281 // meaning). | |
282 $string = Html::decodeEntities($string); | |
283 return Html::escape(static::stripDangerousProtocols($string)); | |
284 } | |
285 | |
286 /** | |
287 * Gets the allowed protocols. | |
288 * | |
289 * @return array | |
290 * An array of protocols, for example http, https and irc. | |
291 */ | |
292 public static function getAllowedProtocols() { | |
293 return static::$allowedProtocols; | |
294 } | |
295 | |
296 /** | |
297 * Sets the allowed protocols. | |
298 * | |
299 * @param array $protocols | |
300 * An array of protocols, for example http, https and irc. | |
301 */ | |
302 public static function setAllowedProtocols(array $protocols = []) { | |
303 static::$allowedProtocols = $protocols; | |
304 } | |
305 | |
306 /** | |
307 * Strips dangerous protocols (for example, 'javascript:') from a URI. | |
308 * | |
309 * This function must be called for all URIs within user-entered input prior | |
310 * to being output to an HTML attribute value. It is often called as part of | |
311 * \Drupal\Component\Utility\UrlHelper::filterBadProtocol() or | |
312 * \Drupal\Component\Utility\Xss::filter(), but those functions return an | |
313 * HTML-encoded string, so this function can be called independently when the | |
314 * output needs to be a plain-text string for passing to functions that will | |
315 * call Html::escape() separately. The exact behavior depends on the value: | |
316 * - If the value is a well-formed (per RFC 3986) relative URL or | |
317 * absolute URL that does not use a dangerous protocol (like | |
318 * "javascript:"), then the URL remains unchanged. This includes all | |
319 * URLs generated via Url::toString() and UrlGeneratorTrait::url(). | |
320 * - If the value is a well-formed absolute URL with a dangerous protocol, | |
321 * the protocol is stripped. This process is repeated on the remaining URL | |
322 * until it is stripped down to a safe protocol. | |
323 * - If the value is not a well-formed URL, the same sanitization behavior as | |
324 * for well-formed URLs will be invoked, which strips most substrings that | |
325 * precede a ":". The result can be used in URL attributes such as "href" | |
326 * or "src" (only after calling Html::escape() separately), but this may not | |
327 * produce valid HTML (for example, malformed URLs within "href" attributes | |
328 * fail HTML validation). This can be avoided by using | |
329 * Url::fromUri($possibly_not_a_url)->toString(), which either throws an | |
330 * exception or returns a well-formed URL. | |
331 * | |
332 * @param string $uri | |
333 * A plain-text URI that might contain dangerous protocols. | |
334 * | |
335 * @return string | |
336 * A plain-text URI stripped of dangerous protocols. As with all plain-text | |
337 * strings, this return value must not be output to an HTML page without | |
338 * being sanitized first. However, it can be passed to functions | |
339 * expecting plain-text strings. | |
340 * | |
341 * @see \Drupal\Component\Utility\Html::escape() | |
342 * @see \Drupal\Core\Url::toString() | |
343 * @see \Drupal\Core\Routing\UrlGeneratorTrait::url() | |
344 * @see \Drupal\Core\Url::fromUri() | |
345 */ | |
346 public static function stripDangerousProtocols($uri) { | |
347 $allowed_protocols = array_flip(static::$allowedProtocols); | |
348 | |
349 // Iteratively remove any invalid protocol found. | |
350 do { | |
351 $before = $uri; | |
352 $colonpos = strpos($uri, ':'); | |
353 if ($colonpos > 0) { | |
354 // We found a colon, possibly a protocol. Verify. | |
355 $protocol = substr($uri, 0, $colonpos); | |
356 // If a colon is preceded by a slash, question mark or hash, it cannot | |
357 // possibly be part of the URL scheme. This must be a relative URL, which | |
358 // inherits the (safe) protocol of the base document. | |
359 if (preg_match('![/?#]!', $protocol)) { | |
360 break; | |
361 } | |
362 // Check if this is a disallowed protocol. Per RFC2616, section 3.2.3 | |
363 // (URI Comparison) scheme comparison must be case-insensitive. | |
364 if (!isset($allowed_protocols[strtolower($protocol)])) { | |
365 $uri = substr($uri, $colonpos + 1); | |
366 } | |
367 } | |
368 } while ($before != $uri); | |
369 | |
370 return $uri; | |
371 } | |
372 | |
373 /** | |
374 * Verifies the syntax of the given URL. | |
375 * | |
376 * This function should only be used on actual URLs. It should not be used for | |
377 * Drupal menu paths, which can contain arbitrary characters. | |
378 * Valid values per RFC 3986. | |
379 * | |
380 * @param string $url | |
381 * The URL to verify. | |
382 * @param bool $absolute | |
383 * Whether the URL is absolute (beginning with a scheme such as "http:"). | |
384 * | |
385 * @return bool | |
386 * TRUE if the URL is in a valid format, FALSE otherwise. | |
387 */ | |
388 public static function isValid($url, $absolute = FALSE) { | |
389 if ($absolute) { | |
390 return (bool) preg_match(" | |
391 /^ # Start at the beginning of the text | |
392 (?:ftp|https?|feed):\/\/ # Look for ftp, http, https or feed schemes | |
393 (?: # Userinfo (optional) which is typically | |
394 (?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)* # a username or a username and password | |
395 (?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@ # combination | |
396 )? | |
397 (?: | |
398 (?:[a-z0-9\-\.]|%[0-9a-f]{2})+ # A domain name or a IPv4 address | |
399 |(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\]) # or a well formed IPv6 address | |
400 ) | |
401 (?::[0-9]+)? # Server port number (optional) | |
402 (?:[\/|\?] | |
403 (?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2}) # The path and query (optional) | |
404 *)? | |
405 $/xi", $url); | |
406 } | |
407 else { | |
408 return (bool) preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url); | |
409 } | |
410 } | |
411 | |
412 } |