Chris@0: query->all(). Chris@0: * @param string $parent Chris@0: * (optional) Internal use only. Used to build the $query array key for Chris@0: * nested items. Defaults to an empty string. Chris@0: * Chris@0: * @return string Chris@0: * A rawurlencoded string which can be used as or appended to the URL query Chris@0: * string. Chris@0: * Chris@0: * @ingroup php_wrappers Chris@0: */ Chris@0: public static function buildQuery(array $query, $parent = '') { Chris@0: $params = []; Chris@0: Chris@0: foreach ($query as $key => $value) { Chris@0: $key = ($parent ? $parent . rawurlencode('[' . $key . ']') : rawurlencode($key)); Chris@0: Chris@0: // Recurse into children. Chris@0: if (is_array($value)) { Chris@0: $params[] = static::buildQuery($value, $key); Chris@0: } Chris@0: // If a query parameter value is NULL, only append its key. Chris@0: elseif (!isset($value)) { Chris@0: $params[] = $key; Chris@0: } Chris@0: else { Chris@0: // For better readability of paths in query strings, we decode slashes. Chris@0: $params[] = $key . '=' . str_replace('%2F', '/', rawurlencode($value)); Chris@0: } Chris@0: } Chris@0: Chris@0: return implode('&', $params); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Filters a URL query parameter array to remove unwanted elements. Chris@0: * Chris@0: * @param array $query Chris@0: * An array to be processed. Chris@0: * @param array $exclude Chris@0: * (optional) A list of $query array keys to remove. Use "parent[child]" to Chris@0: * exclude nested items. Chris@0: * @param string $parent Chris@0: * Internal use only. Used to build the $query array key for nested items. Chris@0: * Chris@0: * @return Chris@0: * An array containing query parameters. Chris@0: */ Chris@0: public static function filterQueryParameters(array $query, array $exclude = [], $parent = '') { Chris@0: // If $exclude is empty, there is nothing to filter. Chris@0: if (empty($exclude)) { Chris@0: return $query; Chris@0: } Chris@0: elseif (!$parent) { Chris@0: $exclude = array_flip($exclude); Chris@0: } Chris@0: Chris@0: $params = []; Chris@0: foreach ($query as $key => $value) { Chris@0: $string_key = ($parent ? $parent . '[' . $key . ']' : $key); Chris@0: if (isset($exclude[$string_key])) { Chris@0: continue; Chris@0: } Chris@0: Chris@0: if (is_array($value)) { Chris@0: $params[$key] = static::filterQueryParameters($value, $exclude, $string_key); Chris@0: } Chris@0: else { Chris@0: $params[$key] = $value; Chris@0: } Chris@0: } Chris@0: Chris@0: return $params; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Parses a URL string into its path, query, and fragment components. Chris@0: * Chris@0: * This function splits both internal paths like @code node?b=c#d @endcode and Chris@0: * external URLs like @code https://example.com/a?b=c#d @endcode into their Chris@0: * component parts. See Chris@0: * @link http://tools.ietf.org/html/rfc3986#section-3 RFC 3986 @endlink for an Chris@0: * explanation of what the component parts are. Chris@0: * Chris@0: * Note that, unlike the RFC, when passed an external URL, this function Chris@0: * groups the scheme, authority, and path together into the path component. Chris@0: * Chris@0: * @param string $url Chris@0: * The internal path or external URL string to parse. Chris@0: * Chris@0: * @return array Chris@0: * An associative array containing: Chris@0: * - path: The path component of $url. If $url is an external URL, this Chris@0: * includes the scheme, authority, and path. Chris@0: * - query: An array of query parameters from $url, if they exist. Chris@0: * - fragment: The fragment component from $url, if it exists. Chris@0: * Chris@0: * @see \Drupal\Core\Utility\LinkGenerator Chris@0: * @see http://tools.ietf.org/html/rfc3986 Chris@0: * Chris@0: * @ingroup php_wrappers Chris@0: */ Chris@0: public static function parse($url) { Chris@0: $options = [ Chris@0: 'path' => NULL, Chris@0: 'query' => [], Chris@0: 'fragment' => '', Chris@0: ]; Chris@0: Chris@0: // External URLs: not using parse_url() here, so we do not have to rebuild Chris@0: // the scheme, host, and path without having any use for it. Chris@0: // The URL is considered external if it contains the '://' delimiter. Since Chris@0: // a URL can also be passed as a query argument, we check if this delimiter Chris@0: // appears in front of the '?' query argument delimiter. Chris@0: $scheme_delimiter_position = strpos($url, '://'); Chris@0: $query_delimiter_position = strpos($url, '?'); Chris@0: if ($scheme_delimiter_position !== FALSE && ($query_delimiter_position === FALSE || $scheme_delimiter_position < $query_delimiter_position)) { Chris@0: // Split off the fragment, if any. Chris@0: if (strpos($url, '#') !== FALSE) { Chris@0: list($url, $options['fragment']) = explode('#', $url, 2); Chris@0: } Chris@0: Chris@0: // Split off everything before the query string into 'path'. Chris@0: $parts = explode('?', $url); Chris@0: Chris@0: // Don't support URLs without a path, like 'http://'. Chris@0: list(, $path) = explode('://', $parts[0], 2); Chris@0: if ($path != '') { Chris@0: $options['path'] = $parts[0]; Chris@0: } Chris@0: // If there is a query string, transform it into keyed query parameters. Chris@0: if (isset($parts[1])) { Chris@0: parse_str($parts[1], $options['query']); Chris@0: } Chris@0: } Chris@0: // Internal URLs. Chris@0: else { Chris@0: // parse_url() does not support relative URLs, so make it absolute. For Chris@0: // instance, the relative URL "foo/bar:1" isn't properly parsed. Chris@0: $parts = parse_url('http://example.com/' . $url); Chris@0: // Strip the leading slash that was just added. Chris@0: $options['path'] = substr($parts['path'], 1); Chris@0: if (isset($parts['query'])) { Chris@0: parse_str($parts['query'], $options['query']); Chris@0: } Chris@0: if (isset($parts['fragment'])) { Chris@0: $options['fragment'] = $parts['fragment']; Chris@0: } Chris@0: } Chris@0: Chris@0: return $options; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Encodes a Drupal path for use in a URL. Chris@0: * Chris@0: * For aesthetic reasons slashes are not escaped. Chris@0: * Chris@0: * @param string $path Chris@0: * The Drupal path to encode. Chris@0: * Chris@0: * @return string Chris@0: * The encoded path. Chris@0: */ Chris@0: public static function encodePath($path) { Chris@0: return str_replace('%2F', '/', rawurlencode($path)); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Determines whether a path is external to Drupal. Chris@0: * Chris@0: * An example of an external path is http://example.com. If a path cannot be Chris@0: * assessed by Drupal's menu handler, then we must treat it as potentially Chris@0: * insecure. Chris@0: * Chris@0: * @param string $path Chris@0: * The internal path or external URL being linked to, such as "node/34" or Chris@0: * "http://example.com/foo". Chris@0: * Chris@0: * @return bool Chris@0: * TRUE or FALSE, where TRUE indicates an external path. Chris@0: */ Chris@0: public static function isExternal($path) { Chris@0: $colonpos = strpos($path, ':'); Chris@0: // Some browsers treat \ as / so normalize to forward slashes. Chris@0: $path = str_replace('\\', '/', $path); Chris@0: // If the path starts with 2 slashes then it is always considered an Chris@0: // external URL without an explicit protocol part. Chris@0: return (strpos($path, '//') === 0) Chris@0: // Leading control characters may be ignored or mishandled by browsers, Chris@0: // so assume such a path may lead to an external location. The \p{C} Chris@0: // character class matches all UTF-8 control, unassigned, and private Chris@0: // characters. Chris@0: || (preg_match('/^\p{C}/u', $path) !== 0) Chris@0: // Avoid calling static::stripDangerousProtocols() if there is any slash Chris@0: // (/), hash (#) or question_mark (?) before the colon (:) occurrence - Chris@0: // if any - as this would clearly mean it is not a URL. Chris@0: || ($colonpos !== FALSE Chris@0: && !preg_match('![/?#]!', substr($path, 0, $colonpos)) Chris@0: && static::stripDangerousProtocols($path) == $path); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Determines if an external URL points to this installation. Chris@0: * Chris@0: * @param string $url Chris@0: * A string containing an external URL, such as "http://example.com/foo". Chris@0: * @param string $base_url Chris@0: * The base URL string to check against, such as "http://example.com/" Chris@0: * Chris@0: * @return bool Chris@0: * TRUE if the URL has the same domain and base path. Chris@0: * Chris@0: * @throws \InvalidArgumentException Chris@0: * Exception thrown when a either $url or $bath_url are not fully qualified. Chris@0: */ Chris@0: public static function externalIsLocal($url, $base_url) { Chris@17: // Some browsers treat \ as / so normalize to forward slashes. Chris@17: $url = str_replace('\\', '/', $url); Chris@17: Chris@17: // Leading control characters may be ignored or mishandled by browsers, so Chris@17: // assume such a path may lead to an non-local location. The \p{C} character Chris@17: // class matches all UTF-8 control, unassigned, and private characters. Chris@17: if (preg_match('/^\p{C}/u', $url) !== 0) { Chris@17: return FALSE; Chris@17: } Chris@17: Chris@0: $url_parts = parse_url($url); Chris@0: $base_parts = parse_url($base_url); Chris@0: Chris@0: if (empty($base_parts['host']) || empty($url_parts['host'])) { Chris@0: throw new \InvalidArgumentException('A path was passed when a fully qualified domain was expected.'); Chris@0: } Chris@0: Chris@0: if (!isset($url_parts['path']) || !isset($base_parts['path'])) { Chris@0: return (!isset($base_parts['path']) || $base_parts['path'] == '/') Chris@0: && ($url_parts['host'] == $base_parts['host']); Chris@0: } Chris@0: else { Chris@0: // When comparing base paths, we need a trailing slash to make sure a Chris@0: // partial URL match isn't occurring. Since base_path() always returns Chris@0: // with a trailing slash, we don't need to add the trailing slash here. Chris@0: return ($url_parts['host'] == $base_parts['host'] && stripos($url_parts['path'], $base_parts['path']) === 0); Chris@0: } Chris@0: } Chris@0: Chris@0: /** Chris@0: * Processes an HTML attribute value and strips dangerous protocols from URLs. Chris@0: * Chris@0: * @param string $string Chris@0: * The string with the attribute value. Chris@0: * Chris@0: * @return string Chris@0: * Cleaned up and HTML-escaped version of $string. Chris@0: */ Chris@0: public static function filterBadProtocol($string) { Chris@0: // Get the plain text representation of the attribute value (i.e. its Chris@0: // meaning). Chris@0: $string = Html::decodeEntities($string); Chris@0: return Html::escape(static::stripDangerousProtocols($string)); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the allowed protocols. Chris@0: * Chris@0: * @return array Chris@0: * An array of protocols, for example http, https and irc. Chris@0: */ Chris@0: public static function getAllowedProtocols() { Chris@0: return static::$allowedProtocols; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Sets the allowed protocols. Chris@0: * Chris@0: * @param array $protocols Chris@0: * An array of protocols, for example http, https and irc. Chris@0: */ Chris@0: public static function setAllowedProtocols(array $protocols = []) { Chris@0: static::$allowedProtocols = $protocols; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Strips dangerous protocols (for example, 'javascript:') from a URI. Chris@0: * Chris@0: * This function must be called for all URIs within user-entered input prior Chris@0: * to being output to an HTML attribute value. It is often called as part of Chris@0: * \Drupal\Component\Utility\UrlHelper::filterBadProtocol() or Chris@0: * \Drupal\Component\Utility\Xss::filter(), but those functions return an Chris@0: * HTML-encoded string, so this function can be called independently when the Chris@0: * output needs to be a plain-text string for passing to functions that will Chris@0: * call Html::escape() separately. The exact behavior depends on the value: Chris@0: * - If the value is a well-formed (per RFC 3986) relative URL or Chris@0: * absolute URL that does not use a dangerous protocol (like Chris@0: * "javascript:"), then the URL remains unchanged. This includes all Chris@0: * URLs generated via Url::toString() and UrlGeneratorTrait::url(). Chris@0: * - If the value is a well-formed absolute URL with a dangerous protocol, Chris@0: * the protocol is stripped. This process is repeated on the remaining URL Chris@0: * until it is stripped down to a safe protocol. Chris@0: * - If the value is not a well-formed URL, the same sanitization behavior as Chris@0: * for well-formed URLs will be invoked, which strips most substrings that Chris@0: * precede a ":". The result can be used in URL attributes such as "href" Chris@0: * or "src" (only after calling Html::escape() separately), but this may not Chris@0: * produce valid HTML (for example, malformed URLs within "href" attributes Chris@0: * fail HTML validation). This can be avoided by using Chris@0: * Url::fromUri($possibly_not_a_url)->toString(), which either throws an Chris@0: * exception or returns a well-formed URL. Chris@0: * Chris@0: * @param string $uri Chris@0: * A plain-text URI that might contain dangerous protocols. Chris@0: * Chris@0: * @return string Chris@0: * A plain-text URI stripped of dangerous protocols. As with all plain-text Chris@0: * strings, this return value must not be output to an HTML page without Chris@0: * being sanitized first. However, it can be passed to functions Chris@0: * expecting plain-text strings. Chris@0: * Chris@0: * @see \Drupal\Component\Utility\Html::escape() Chris@0: * @see \Drupal\Core\Url::toString() Chris@0: * @see \Drupal\Core\Routing\UrlGeneratorTrait::url() Chris@0: * @see \Drupal\Core\Url::fromUri() Chris@0: */ Chris@0: public static function stripDangerousProtocols($uri) { Chris@0: $allowed_protocols = array_flip(static::$allowedProtocols); Chris@0: Chris@0: // Iteratively remove any invalid protocol found. Chris@0: do { Chris@0: $before = $uri; Chris@0: $colonpos = strpos($uri, ':'); Chris@0: if ($colonpos > 0) { Chris@0: // We found a colon, possibly a protocol. Verify. Chris@0: $protocol = substr($uri, 0, $colonpos); Chris@0: // If a colon is preceded by a slash, question mark or hash, it cannot Chris@0: // possibly be part of the URL scheme. This must be a relative URL, which Chris@0: // inherits the (safe) protocol of the base document. Chris@0: if (preg_match('![/?#]!', $protocol)) { Chris@0: break; Chris@0: } Chris@0: // Check if this is a disallowed protocol. Per RFC2616, section 3.2.3 Chris@0: // (URI Comparison) scheme comparison must be case-insensitive. Chris@0: if (!isset($allowed_protocols[strtolower($protocol)])) { Chris@0: $uri = substr($uri, $colonpos + 1); Chris@0: } Chris@0: } Chris@0: } while ($before != $uri); Chris@0: Chris@0: return $uri; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Verifies the syntax of the given URL. Chris@0: * Chris@0: * This function should only be used on actual URLs. It should not be used for Chris@0: * Drupal menu paths, which can contain arbitrary characters. Chris@0: * Valid values per RFC 3986. Chris@0: * Chris@0: * @param string $url Chris@0: * The URL to verify. Chris@0: * @param bool $absolute Chris@0: * Whether the URL is absolute (beginning with a scheme such as "http:"). Chris@0: * Chris@0: * @return bool Chris@0: * TRUE if the URL is in a valid format, FALSE otherwise. Chris@0: */ Chris@0: public static function isValid($url, $absolute = FALSE) { Chris@0: if ($absolute) { Chris@0: return (bool) preg_match(" Chris@0: /^ # Start at the beginning of the text Chris@0: (?:ftp|https?|feed):\/\/ # Look for ftp, http, https or feed schemes Chris@0: (?: # Userinfo (optional) which is typically Chris@0: (?:(?:[\w\.\-\+!$&'\(\)*\+,;=]|%[0-9a-f]{2})+:)* # a username or a username and password Chris@0: (?:[\w\.\-\+%!$&'\(\)*\+,;=]|%[0-9a-f]{2})+@ # combination Chris@0: )? Chris@0: (?: Chris@0: (?:[a-z0-9\-\.]|%[0-9a-f]{2})+ # A domain name or a IPv4 address Chris@0: |(?:\[(?:[0-9a-f]{0,4}:)*(?:[0-9a-f]{0,4})\]) # or a well formed IPv6 address Chris@0: ) Chris@0: (?::[0-9]+)? # Server port number (optional) Chris@0: (?:[\/|\?] Chris@0: (?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2}) # The path and query (optional) Chris@0: *)? Chris@0: $/xi", $url); Chris@0: } Chris@0: else { Chris@0: return (bool) preg_match("/^(?:[\w#!:\.\?\+=&@$'~*,;\/\(\)\[\]\-]|%[0-9a-f]{2})+$/i", $url); Chris@0: } Chris@0: } Chris@0: Chris@0: }