Chris@0: # a comment Chris@0: | # or Chris@0: <[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string Chris@0: | # or Chris@0: > # just a > Chris@0: )%x', $splitter, $string); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Applies a very permissive XSS/HTML filter for admin-only use. Chris@0: * Chris@0: * Use only for fields where it is impractical to use the Chris@0: * whole filter system, but where some (mainly inline) mark-up Chris@0: * is desired (so \Drupal\Component\Utility\Html::escape() is Chris@0: * not acceptable). Chris@0: * Chris@0: * Allows all tags that can be used inside an HTML body, save Chris@0: * for scripts and styles. Chris@0: * Chris@0: * @param string $string Chris@0: * The string to apply the filter to. Chris@0: * Chris@0: * @return string Chris@0: * The filtered string. Chris@0: * Chris@0: * @ingroup sanitization Chris@0: * Chris@0: * @see \Drupal\Component\Utility\Xss::getAdminTagList() Chris@0: */ Chris@0: public static function filterAdmin($string) { Chris@0: return static::filter($string, static::$adminTags); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Processes an HTML tag. Chris@0: * Chris@0: * @param string $string Chris@0: * The HTML tag to process. Chris@0: * @param array $html_tags Chris@0: * An array where the keys are the allowed tags and the values are not Chris@0: * used. Chris@0: * @param string $class Chris@0: * The called class. This method is called from an anonymous function which Chris@0: * breaks late static binding. See https://bugs.php.net/bug.php?id=66622 for Chris@0: * more information. Chris@0: * Chris@0: * @return string Chris@0: * If the element isn't allowed, an empty string. Otherwise, the cleaned up Chris@0: * version of the HTML element. Chris@0: */ Chris@0: protected static function split($string, $html_tags, $class) { Chris@0: if (substr($string, 0, 1) != '<') { Chris@0: // We matched a lone ">" character. Chris@0: return '>'; Chris@0: } Chris@0: elseif (strlen($string) == 1) { Chris@0: // We matched a lone "<" character. Chris@0: return '<'; Chris@0: } Chris@0: Chris@0: if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9\-]+)\s*([^>]*)>?|()$%', $string, $matches)) { Chris@0: // Seriously malformed. Chris@0: return ''; Chris@0: } Chris@0: $slash = trim($matches[1]); Chris@0: $elem = &$matches[2]; Chris@0: $attrlist = &$matches[3]; Chris@0: $comment = &$matches[4]; Chris@0: Chris@0: if ($comment) { Chris@0: $elem = '!--'; Chris@0: } Chris@0: Chris@0: // When in whitelist mode, an element is disallowed when not listed. Chris@0: if ($class::needsRemoval($html_tags, $elem)) { Chris@0: return ''; Chris@0: } Chris@0: Chris@0: if ($comment) { Chris@0: return $comment; Chris@0: } Chris@0: Chris@0: if ($slash != '') { Chris@0: return ""; Chris@0: } Chris@0: Chris@0: // Is there a closing XHTML slash at the end of the attributes? Chris@0: $attrlist = preg_replace('%(\s?)/\s*$%', '\1', $attrlist, -1, $count); Chris@0: $xhtml_slash = $count ? ' /' : ''; Chris@0: Chris@0: // Clean up attributes. Chris@0: $attr2 = implode(' ', $class::attributes($attrlist)); Chris@0: $attr2 = preg_replace('/[<>]/', '', $attr2); Chris@0: $attr2 = strlen($attr2) ? ' ' . $attr2 : ''; Chris@0: Chris@0: return "<$elem$attr2$xhtml_slash>"; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Processes a string of HTML attributes. Chris@0: * Chris@0: * @param string $attributes Chris@0: * The html attribute to process. Chris@0: * Chris@0: * @return string Chris@0: * Cleaned up version of the HTML attributes. Chris@0: */ Chris@0: protected static function attributes($attributes) { Chris@0: $attributes_array = []; Chris@0: $mode = 0; Chris@0: $attribute_name = ''; Chris@0: $skip = FALSE; Chris@0: $skip_protocol_filtering = FALSE; Chris@0: Chris@0: while (strlen($attributes) != 0) { Chris@0: // Was the last operation successful? Chris@0: $working = 0; Chris@0: Chris@0: switch ($mode) { Chris@0: case 0: Chris@0: // Attribute name, href for instance. Chris@0: if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) { Chris@0: $attribute_name = strtolower($match[1]); Chris@0: $skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on'); Chris@0: Chris@0: // Values for attributes of type URI should be filtered for Chris@0: // potentially malicious protocols (for example, an href-attribute Chris@0: // starting with "javascript:"). However, for some non-URI Chris@0: // attributes performing this filtering causes valid and safe data Chris@0: // to be mangled. We prevent this by skipping protocol filtering on Chris@0: // such attributes. Chris@0: // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol() Chris@0: // @see http://www.w3.org/TR/html4/index/attributes.html Chris@0: $skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' || in_array($attribute_name, [ Chris@0: 'title', Chris@0: 'alt', Chris@0: 'rel', Chris@0: 'property', Chris@0: ]); Chris@0: Chris@0: $working = $mode = 1; Chris@0: $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes); Chris@0: } Chris@0: break; Chris@0: Chris@0: case 1: Chris@0: // Equals sign or valueless ("selected"). Chris@0: if (preg_match('/^\s*=\s*/', $attributes)) { Chris@0: $working = 1; $mode = 2; Chris@0: $attributes = preg_replace('/^\s*=\s*/', '', $attributes); Chris@0: break; Chris@0: } Chris@0: Chris@0: if (preg_match('/^\s+/', $attributes)) { Chris@0: $working = 1; $mode = 0; Chris@0: if (!$skip) { Chris@0: $attributes_array[] = $attribute_name; Chris@0: } Chris@0: $attributes = preg_replace('/^\s+/', '', $attributes); Chris@0: } Chris@0: break; Chris@0: Chris@0: case 2: Chris@0: // Attribute value, a URL after href= for instance. Chris@0: if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) { Chris@0: $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); Chris@0: Chris@0: if (!$skip) { Chris@0: $attributes_array[] = "$attribute_name=\"$thisval\""; Chris@0: } Chris@0: $working = 1; Chris@0: $mode = 0; Chris@0: $attributes = preg_replace('/^"[^"]*"(\s+|$)/', '', $attributes); Chris@0: break; Chris@0: } Chris@0: Chris@0: if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) { Chris@0: $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); Chris@0: Chris@0: if (!$skip) { Chris@0: $attributes_array[] = "$attribute_name='$thisval'"; Chris@0: } Chris@0: $working = 1; $mode = 0; Chris@0: $attributes = preg_replace("/^'[^']*'(\s+|$)/", '', $attributes); Chris@0: break; Chris@0: } Chris@0: Chris@0: if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) { Chris@0: $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); Chris@0: Chris@0: if (!$skip) { Chris@0: $attributes_array[] = "$attribute_name=\"$thisval\""; Chris@0: } Chris@0: $working = 1; $mode = 0; Chris@0: $attributes = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attributes); Chris@0: } Chris@0: break; Chris@0: } Chris@0: Chris@0: if ($working == 0) { Chris@0: // Not well formed; remove and try again. Chris@0: $attributes = preg_replace('/ Chris@0: ^ Chris@0: ( Chris@0: "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string Chris@0: | # or Chris@0: \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string Chris@0: | # or Chris@0: \S # - a non-whitespace character Chris@0: )* # any number of the above three Chris@0: \s* # any number of whitespaces Chris@0: /x', '', $attributes); Chris@0: $mode = 0; Chris@0: } Chris@0: } Chris@0: Chris@0: // The attribute list ends with a valueless attribute like "selected". Chris@0: if ($mode == 1 && !$skip) { Chris@0: $attributes_array[] = $attribute_name; Chris@0: } Chris@0: return $attributes_array; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Whether this element needs to be removed altogether. Chris@0: * Chris@0: * @param $html_tags Chris@0: * The list of HTML tags. Chris@0: * @param $elem Chris@0: * The name of the HTML element. Chris@0: * Chris@0: * @return bool Chris@0: * TRUE if this element needs to be removed. Chris@0: */ Chris@0: protected static function needsRemoval($html_tags, $elem) { Chris@0: return !isset($html_tags[strtolower($elem)]); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the list of HTML tags allowed by Xss::filterAdmin(). Chris@0: * Chris@0: * @return array Chris@0: * The list of HTML tags allowed by filterAdmin(). Chris@0: */ Chris@0: public static function getAdminTagList() { Chris@0: return static::$adminTags; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the standard list of HTML tags allowed by Xss::filter(). Chris@0: * Chris@0: * @return array Chris@0: * The list of HTML tags allowed by Xss::filter(). Chris@0: */ Chris@0: public static function getHtmlTagList() { Chris@0: return static::$htmlTags; Chris@0: } Chris@0: Chris@0: }