Mercurial > hg > cmmr2012-drupal-site
diff vendor/zendframework/zend-escaper/src/Escaper.php @ 0:c75dbcec494b
Initial commit from drush-created site
author | Chris Cannam |
---|---|
date | Thu, 05 Jul 2018 14:24:15 +0000 |
parents | |
children | 5311817fb629 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vendor/zendframework/zend-escaper/src/Escaper.php Thu Jul 05 14:24:15 2018 +0000 @@ -0,0 +1,388 @@ +<?php +/** + * Zend Framework (http://framework.zend.com/) + * + * @link http://github.com/zendframework/zf2 for the canonical source repository + * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com) + * @license http://framework.zend.com/license/new-bsd New BSD License + */ + +namespace Zend\Escaper; + +/** + * Context specific methods for use in secure output escaping + */ +class Escaper +{ + /** + * Entity Map mapping Unicode codepoints to any available named HTML entities. + * + * While HTML supports far more named entities, the lowest common denominator + * has become HTML5's XML Serialisation which is restricted to the those named + * entities that XML supports. Using HTML entities would result in this error: + * XML Parsing Error: undefined entity + * + * @var array + */ + protected static $htmlNamedEntityMap = [ + 34 => 'quot', // quotation mark + 38 => 'amp', // ampersand + 60 => 'lt', // less-than sign + 62 => 'gt', // greater-than sign + ]; + + /** + * Current encoding for escaping. If not UTF-8, we convert strings from this encoding + * pre-escaping and back to this encoding post-escaping. + * + * @var string + */ + protected $encoding = 'utf-8'; + + /** + * Holds the value of the special flags passed as second parameter to + * htmlspecialchars(). + * + * @var int + */ + protected $htmlSpecialCharsFlags; + + /** + * Static Matcher which escapes characters for HTML Attribute contexts + * + * @var callable + */ + protected $htmlAttrMatcher; + + /** + * Static Matcher which escapes characters for Javascript contexts + * + * @var callable + */ + protected $jsMatcher; + + /** + * Static Matcher which escapes characters for CSS Attribute contexts + * + * @var callable + */ + protected $cssMatcher; + + /** + * List of all encoding supported by this class + * + * @var array + */ + protected $supportedEncodings = [ + 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5', + 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866', + 'ibm866', '866', 'cp1251', 'windows-1251', + 'win-1251', '1251', 'cp1252', 'windows-1252', + '1252', 'koi8-r', 'koi8-ru', 'koi8r', + 'big5', '950', 'gb2312', '936', + 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win', + 'cp932', '932', 'euc-jp', 'eucjp', + 'eucjp-win', 'macroman' + ]; + + /** + * Constructor: Single parameter allows setting of global encoding for use by + * the current object. + * + * @param string $encoding + * @throws Exception\InvalidArgumentException + */ + public function __construct($encoding = null) + { + if ($encoding !== null) { + $encoding = (string) $encoding; + if ($encoding === '') { + throw new Exception\InvalidArgumentException( + get_class($this) . ' constructor parameter does not allow a blank value' + ); + } + + $encoding = strtolower($encoding); + if (!in_array($encoding, $this->supportedEncodings)) { + throw new Exception\InvalidArgumentException( + 'Value of \'' . $encoding . '\' passed to ' . get_class($this) + . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()' + ); + } + + $this->encoding = $encoding; + } + + // We take advantage of ENT_SUBSTITUTE flag to correctly deal with invalid UTF-8 sequences. + $this->htmlSpecialCharsFlags = ENT_QUOTES | ENT_SUBSTITUTE; + + // set matcher callbacks + $this->htmlAttrMatcher = [$this, 'htmlAttrMatcher']; + $this->jsMatcher = [$this, 'jsMatcher']; + $this->cssMatcher = [$this, 'cssMatcher']; + } + + /** + * Return the encoding that all output/input is expected to be encoded in. + * + * @return string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Escape a string for the HTML Body context where there are very few characters + * of special meaning. Internally this will use htmlspecialchars(). + * + * @param string $string + * @return string + */ + public function escapeHtml($string) + { + return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding); + } + + /** + * Escape a string for the HTML Attribute context. We use an extended set of characters + * to escape that are not covered by htmlspecialchars() to cover cases where an attribute + * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE). + * + * @param string $string + * @return string + */ + public function escapeHtmlAttr($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Escape a string for the Javascript context. This does not use json_encode(). An extended + * set of characters are escaped beyond ECMAScript's rules for Javascript literal string + * escaping in order to prevent misinterpretation of Javascript as HTML leading to the + * injection of special characters and entities. The escaping used should be tolerant + * of cases where HTML escaping was not applied on top of Javascript escaping correctly. + * Backslash escaping is not used as it still leaves the escaped character as-is and so + * is not useful in a HTML context. + * + * @param string $string + * @return string + */ + public function escapeJs($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Escape a string for the URI or Parameter contexts. This should not be used to escape + * an entire URI - only a subcomponent being inserted. The function is a simple proxy + * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely. + * + * @param string $string + * @return string + */ + public function escapeUrl($string) + { + return rawurlencode($string); + } + + /** + * Escape a string for the CSS context. CSS escaping can be applied to any string being + * inserted into CSS and escapes everything except alphanumerics. + * + * @param string $string + * @return string + */ + public function escapeCss($string) + { + $string = $this->toUtf8($string); + if ($string === '' || ctype_digit($string)) { + return $string; + } + + $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string); + return $this->fromUtf8($result); + } + + /** + * Callback function for preg_replace_callback that applies HTML Attribute + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function htmlAttrMatcher($matches) + { + $chr = $matches[0]; + $ord = ord($chr); + + /** + * The following replaces characters undefined in HTML with the + * hex entity for the Unicode replacement character. + */ + if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r") + || ($ord >= 0x7f && $ord <= 0x9f) + ) { + return '�'; + } + + /** + * Check if the current character to escape has a name entity we should + * replace it with while grabbing the integer value of the character. + */ + if (strlen($chr) > 1) { + $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); + } + + $hex = bin2hex($chr); + $ord = hexdec($hex); + if (isset(static::$htmlNamedEntityMap[$ord])) { + return '&' . static::$htmlNamedEntityMap[$ord] . ';'; + } + + /** + * Per OWASP recommendations, we'll use upper hex entities + * for any other characters where a named entity does not exist. + */ + if ($ord > 255) { + return sprintf('&#x%04X;', $ord); + } + return sprintf('&#x%02X;', $ord); + } + + /** + * Callback function for preg_replace_callback that applies Javascript + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function jsMatcher($matches) + { + $chr = $matches[0]; + if (strlen($chr) == 1) { + return sprintf('\\x%02X', ord($chr)); + } + $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); + $hex = strtoupper(bin2hex($chr)); + if (strlen($hex) <= 4) { + return sprintf('\\u%04s', $hex); + } + $highSurrogate = substr($hex, 0, 4); + $lowSurrogate = substr($hex, 4, 4); + return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate); + } + + /** + * Callback function for preg_replace_callback that applies CSS + * escaping to all matches. + * + * @param array $matches + * @return string + */ + protected function cssMatcher($matches) + { + $chr = $matches[0]; + if (strlen($chr) == 1) { + $ord = ord($chr); + } else { + $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); + $ord = hexdec(bin2hex($chr)); + } + return sprintf('\\%X ', $ord); + } + + /** + * Converts a string to UTF-8 from the base encoding. The base encoding is set via this + * class' constructor. + * + * @param string $string + * @throws Exception\RuntimeException + * @return string + */ + protected function toUtf8($string) + { + if ($this->getEncoding() === 'utf-8') { + $result = $string; + } else { + $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding()); + } + + if (!$this->isUtf8($result)) { + throw new Exception\RuntimeException( + sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result) + ); + } + + return $result; + } + + /** + * Converts a string from UTF-8 to the base encoding. The base encoding is set via this + * class' constructor. + * @param string $string + * @return string + */ + protected function fromUtf8($string) + { + if ($this->getEncoding() === 'utf-8') { + return $string; + } + + return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8'); + } + + /** + * Checks if a given string appears to be valid UTF-8 or not. + * + * @param string $string + * @return bool + */ + protected function isUtf8($string) + { + return ($string === '' || preg_match('/^./su', $string)); + } + + /** + * Encoding conversion helper which wraps iconv and mbstring where they exist or throws + * and exception where neither is available. + * + * @param string $string + * @param string $to + * @param array|string $from + * @throws Exception\RuntimeException + * @return string + */ + protected function convertEncoding($string, $to, $from) + { + if (function_exists('iconv')) { + $result = iconv($from, $to, $string); + } elseif (function_exists('mb_convert_encoding')) { + $result = mb_convert_encoding($string, $to, $from); + } else { + throw new Exception\RuntimeException( + get_class($this) + . ' requires either the iconv or mbstring extension to be installed' + . ' when escaping for non UTF-8 strings.' + ); + } + + if ($result === false) { + return ''; // return non-fatal blank string on encoding errors from users + } + return $result; + } +}