annotate vendor/zendframework/zend-escaper/src/Escaper.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents e200cb7efeb3
children
rev   line source
Chris@0 1 <?php
Chris@0 2 /**
Chris@0 3 * Zend Framework (http://framework.zend.com/)
Chris@0 4 *
Chris@0 5 * @link http://github.com/zendframework/zf2 for the canonical source repository
Chris@0 6 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
Chris@0 7 * @license http://framework.zend.com/license/new-bsd New BSD License
Chris@0 8 */
Chris@0 9
Chris@0 10 namespace Zend\Escaper;
Chris@0 11
Chris@0 12 /**
Chris@0 13 * Context specific methods for use in secure output escaping
Chris@0 14 */
Chris@0 15 class Escaper
Chris@0 16 {
Chris@0 17 /**
Chris@0 18 * Entity Map mapping Unicode codepoints to any available named HTML entities.
Chris@0 19 *
Chris@0 20 * While HTML supports far more named entities, the lowest common denominator
Chris@0 21 * has become HTML5's XML Serialisation which is restricted to the those named
Chris@0 22 * entities that XML supports. Using HTML entities would result in this error:
Chris@0 23 * XML Parsing Error: undefined entity
Chris@0 24 *
Chris@0 25 * @var array
Chris@0 26 */
Chris@0 27 protected static $htmlNamedEntityMap = [
Chris@0 28 34 => 'quot', // quotation mark
Chris@0 29 38 => 'amp', // ampersand
Chris@0 30 60 => 'lt', // less-than sign
Chris@0 31 62 => 'gt', // greater-than sign
Chris@0 32 ];
Chris@0 33
Chris@0 34 /**
Chris@0 35 * Current encoding for escaping. If not UTF-8, we convert strings from this encoding
Chris@0 36 * pre-escaping and back to this encoding post-escaping.
Chris@0 37 *
Chris@0 38 * @var string
Chris@0 39 */
Chris@0 40 protected $encoding = 'utf-8';
Chris@0 41
Chris@0 42 /**
Chris@0 43 * Holds the value of the special flags passed as second parameter to
Chris@0 44 * htmlspecialchars().
Chris@0 45 *
Chris@0 46 * @var int
Chris@0 47 */
Chris@0 48 protected $htmlSpecialCharsFlags;
Chris@0 49
Chris@0 50 /**
Chris@0 51 * Static Matcher which escapes characters for HTML Attribute contexts
Chris@0 52 *
Chris@0 53 * @var callable
Chris@0 54 */
Chris@0 55 protected $htmlAttrMatcher;
Chris@0 56
Chris@0 57 /**
Chris@0 58 * Static Matcher which escapes characters for Javascript contexts
Chris@0 59 *
Chris@0 60 * @var callable
Chris@0 61 */
Chris@0 62 protected $jsMatcher;
Chris@0 63
Chris@0 64 /**
Chris@0 65 * Static Matcher which escapes characters for CSS Attribute contexts
Chris@0 66 *
Chris@0 67 * @var callable
Chris@0 68 */
Chris@0 69 protected $cssMatcher;
Chris@0 70
Chris@0 71 /**
Chris@0 72 * List of all encoding supported by this class
Chris@0 73 *
Chris@0 74 * @var array
Chris@0 75 */
Chris@0 76 protected $supportedEncodings = [
Chris@0 77 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5',
Chris@0 78 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866',
Chris@0 79 'ibm866', '866', 'cp1251', 'windows-1251',
Chris@0 80 'win-1251', '1251', 'cp1252', 'windows-1252',
Chris@0 81 '1252', 'koi8-r', 'koi8-ru', 'koi8r',
Chris@0 82 'big5', '950', 'gb2312', '936',
Chris@0 83 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win',
Chris@0 84 'cp932', '932', 'euc-jp', 'eucjp',
Chris@0 85 'eucjp-win', 'macroman'
Chris@0 86 ];
Chris@0 87
Chris@0 88 /**
Chris@0 89 * Constructor: Single parameter allows setting of global encoding for use by
Chris@0 90 * the current object.
Chris@0 91 *
Chris@0 92 * @param string $encoding
Chris@0 93 * @throws Exception\InvalidArgumentException
Chris@0 94 */
Chris@0 95 public function __construct($encoding = null)
Chris@0 96 {
Chris@0 97 if ($encoding !== null) {
Chris@15 98 if (! is_string($encoding)) {
Chris@15 99 throw new Exception\InvalidArgumentException(
Chris@15 100 get_class($this) . ' constructor parameter must be a string, received ' . gettype($encoding)
Chris@15 101 );
Chris@15 102 }
Chris@0 103 if ($encoding === '') {
Chris@0 104 throw new Exception\InvalidArgumentException(
Chris@0 105 get_class($this) . ' constructor parameter does not allow a blank value'
Chris@0 106 );
Chris@0 107 }
Chris@0 108
Chris@0 109 $encoding = strtolower($encoding);
Chris@15 110 if (! in_array($encoding, $this->supportedEncodings)) {
Chris@0 111 throw new Exception\InvalidArgumentException(
Chris@0 112 'Value of \'' . $encoding . '\' passed to ' . get_class($this)
Chris@0 113 . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
Chris@0 114 );
Chris@0 115 }
Chris@0 116
Chris@0 117 $this->encoding = $encoding;
Chris@0 118 }
Chris@0 119
Chris@0 120 // We take advantage of ENT_SUBSTITUTE flag to correctly deal with invalid UTF-8 sequences.
Chris@0 121 $this->htmlSpecialCharsFlags = ENT_QUOTES | ENT_SUBSTITUTE;
Chris@0 122
Chris@0 123 // set matcher callbacks
Chris@0 124 $this->htmlAttrMatcher = [$this, 'htmlAttrMatcher'];
Chris@0 125 $this->jsMatcher = [$this, 'jsMatcher'];
Chris@0 126 $this->cssMatcher = [$this, 'cssMatcher'];
Chris@0 127 }
Chris@0 128
Chris@0 129 /**
Chris@0 130 * Return the encoding that all output/input is expected to be encoded in.
Chris@0 131 *
Chris@0 132 * @return string
Chris@0 133 */
Chris@0 134 public function getEncoding()
Chris@0 135 {
Chris@0 136 return $this->encoding;
Chris@0 137 }
Chris@0 138
Chris@0 139 /**
Chris@0 140 * Escape a string for the HTML Body context where there are very few characters
Chris@0 141 * of special meaning. Internally this will use htmlspecialchars().
Chris@0 142 *
Chris@0 143 * @param string $string
Chris@0 144 * @return string
Chris@0 145 */
Chris@0 146 public function escapeHtml($string)
Chris@0 147 {
Chris@0 148 return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding);
Chris@0 149 }
Chris@0 150
Chris@0 151 /**
Chris@0 152 * Escape a string for the HTML Attribute context. We use an extended set of characters
Chris@0 153 * to escape that are not covered by htmlspecialchars() to cover cases where an attribute
Chris@0 154 * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
Chris@0 155 *
Chris@0 156 * @param string $string
Chris@0 157 * @return string
Chris@0 158 */
Chris@0 159 public function escapeHtmlAttr($string)
Chris@0 160 {
Chris@0 161 $string = $this->toUtf8($string);
Chris@0 162 if ($string === '' || ctype_digit($string)) {
Chris@0 163 return $string;
Chris@0 164 }
Chris@0 165
Chris@0 166 $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string);
Chris@0 167 return $this->fromUtf8($result);
Chris@0 168 }
Chris@0 169
Chris@0 170 /**
Chris@0 171 * Escape a string for the Javascript context. This does not use json_encode(). An extended
Chris@0 172 * set of characters are escaped beyond ECMAScript's rules for Javascript literal string
Chris@0 173 * escaping in order to prevent misinterpretation of Javascript as HTML leading to the
Chris@0 174 * injection of special characters and entities. The escaping used should be tolerant
Chris@0 175 * of cases where HTML escaping was not applied on top of Javascript escaping correctly.
Chris@0 176 * Backslash escaping is not used as it still leaves the escaped character as-is and so
Chris@0 177 * is not useful in a HTML context.
Chris@0 178 *
Chris@0 179 * @param string $string
Chris@0 180 * @return string
Chris@0 181 */
Chris@0 182 public function escapeJs($string)
Chris@0 183 {
Chris@0 184 $string = $this->toUtf8($string);
Chris@0 185 if ($string === '' || ctype_digit($string)) {
Chris@0 186 return $string;
Chris@0 187 }
Chris@0 188
Chris@0 189 $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string);
Chris@0 190 return $this->fromUtf8($result);
Chris@0 191 }
Chris@0 192
Chris@0 193 /**
Chris@0 194 * Escape a string for the URI or Parameter contexts. This should not be used to escape
Chris@0 195 * an entire URI - only a subcomponent being inserted. The function is a simple proxy
Chris@0 196 * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
Chris@0 197 *
Chris@0 198 * @param string $string
Chris@0 199 * @return string
Chris@0 200 */
Chris@0 201 public function escapeUrl($string)
Chris@0 202 {
Chris@0 203 return rawurlencode($string);
Chris@0 204 }
Chris@0 205
Chris@0 206 /**
Chris@0 207 * Escape a string for the CSS context. CSS escaping can be applied to any string being
Chris@0 208 * inserted into CSS and escapes everything except alphanumerics.
Chris@0 209 *
Chris@0 210 * @param string $string
Chris@0 211 * @return string
Chris@0 212 */
Chris@0 213 public function escapeCss($string)
Chris@0 214 {
Chris@0 215 $string = $this->toUtf8($string);
Chris@0 216 if ($string === '' || ctype_digit($string)) {
Chris@0 217 return $string;
Chris@0 218 }
Chris@0 219
Chris@0 220 $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string);
Chris@0 221 return $this->fromUtf8($result);
Chris@0 222 }
Chris@0 223
Chris@0 224 /**
Chris@0 225 * Callback function for preg_replace_callback that applies HTML Attribute
Chris@0 226 * escaping to all matches.
Chris@0 227 *
Chris@0 228 * @param array $matches
Chris@0 229 * @return string
Chris@0 230 */
Chris@0 231 protected function htmlAttrMatcher($matches)
Chris@0 232 {
Chris@0 233 $chr = $matches[0];
Chris@0 234 $ord = ord($chr);
Chris@0 235
Chris@0 236 /**
Chris@0 237 * The following replaces characters undefined in HTML with the
Chris@0 238 * hex entity for the Unicode replacement character.
Chris@0 239 */
Chris@0 240 if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
Chris@0 241 || ($ord >= 0x7f && $ord <= 0x9f)
Chris@0 242 ) {
Chris@0 243 return '&#xFFFD;';
Chris@0 244 }
Chris@0 245
Chris@0 246 /**
Chris@0 247 * Check if the current character to escape has a name entity we should
Chris@0 248 * replace it with while grabbing the integer value of the character.
Chris@0 249 */
Chris@0 250 if (strlen($chr) > 1) {
Chris@0 251 $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
Chris@0 252 }
Chris@0 253
Chris@0 254 $hex = bin2hex($chr);
Chris@0 255 $ord = hexdec($hex);
Chris@0 256 if (isset(static::$htmlNamedEntityMap[$ord])) {
Chris@0 257 return '&' . static::$htmlNamedEntityMap[$ord] . ';';
Chris@0 258 }
Chris@0 259
Chris@0 260 /**
Chris@0 261 * Per OWASP recommendations, we'll use upper hex entities
Chris@0 262 * for any other characters where a named entity does not exist.
Chris@0 263 */
Chris@0 264 if ($ord > 255) {
Chris@0 265 return sprintf('&#x%04X;', $ord);
Chris@0 266 }
Chris@0 267 return sprintf('&#x%02X;', $ord);
Chris@0 268 }
Chris@0 269
Chris@0 270 /**
Chris@0 271 * Callback function for preg_replace_callback that applies Javascript
Chris@0 272 * escaping to all matches.
Chris@0 273 *
Chris@0 274 * @param array $matches
Chris@0 275 * @return string
Chris@0 276 */
Chris@0 277 protected function jsMatcher($matches)
Chris@0 278 {
Chris@0 279 $chr = $matches[0];
Chris@0 280 if (strlen($chr) == 1) {
Chris@0 281 return sprintf('\\x%02X', ord($chr));
Chris@0 282 }
Chris@0 283 $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
Chris@0 284 $hex = strtoupper(bin2hex($chr));
Chris@0 285 if (strlen($hex) <= 4) {
Chris@0 286 return sprintf('\\u%04s', $hex);
Chris@0 287 }
Chris@0 288 $highSurrogate = substr($hex, 0, 4);
Chris@0 289 $lowSurrogate = substr($hex, 4, 4);
Chris@0 290 return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate);
Chris@0 291 }
Chris@0 292
Chris@0 293 /**
Chris@0 294 * Callback function for preg_replace_callback that applies CSS
Chris@0 295 * escaping to all matches.
Chris@0 296 *
Chris@0 297 * @param array $matches
Chris@0 298 * @return string
Chris@0 299 */
Chris@0 300 protected function cssMatcher($matches)
Chris@0 301 {
Chris@0 302 $chr = $matches[0];
Chris@0 303 if (strlen($chr) == 1) {
Chris@0 304 $ord = ord($chr);
Chris@0 305 } else {
Chris@0 306 $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
Chris@0 307 $ord = hexdec(bin2hex($chr));
Chris@0 308 }
Chris@0 309 return sprintf('\\%X ', $ord);
Chris@0 310 }
Chris@0 311
Chris@0 312 /**
Chris@0 313 * Converts a string to UTF-8 from the base encoding. The base encoding is set via this
Chris@0 314 * class' constructor.
Chris@0 315 *
Chris@0 316 * @param string $string
Chris@0 317 * @throws Exception\RuntimeException
Chris@0 318 * @return string
Chris@0 319 */
Chris@0 320 protected function toUtf8($string)
Chris@0 321 {
Chris@0 322 if ($this->getEncoding() === 'utf-8') {
Chris@0 323 $result = $string;
Chris@0 324 } else {
Chris@0 325 $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding());
Chris@0 326 }
Chris@0 327
Chris@15 328 if (! $this->isUtf8($result)) {
Chris@0 329 throw new Exception\RuntimeException(
Chris@0 330 sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result)
Chris@0 331 );
Chris@0 332 }
Chris@0 333
Chris@0 334 return $result;
Chris@0 335 }
Chris@0 336
Chris@0 337 /**
Chris@0 338 * Converts a string from UTF-8 to the base encoding. The base encoding is set via this
Chris@0 339 * class' constructor.
Chris@0 340 * @param string $string
Chris@0 341 * @return string
Chris@0 342 */
Chris@0 343 protected function fromUtf8($string)
Chris@0 344 {
Chris@0 345 if ($this->getEncoding() === 'utf-8') {
Chris@0 346 return $string;
Chris@0 347 }
Chris@0 348
Chris@0 349 return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8');
Chris@0 350 }
Chris@0 351
Chris@0 352 /**
Chris@0 353 * Checks if a given string appears to be valid UTF-8 or not.
Chris@0 354 *
Chris@0 355 * @param string $string
Chris@0 356 * @return bool
Chris@0 357 */
Chris@0 358 protected function isUtf8($string)
Chris@0 359 {
Chris@0 360 return ($string === '' || preg_match('/^./su', $string));
Chris@0 361 }
Chris@0 362
Chris@0 363 /**
Chris@0 364 * Encoding conversion helper which wraps iconv and mbstring where they exist or throws
Chris@0 365 * and exception where neither is available.
Chris@0 366 *
Chris@0 367 * @param string $string
Chris@0 368 * @param string $to
Chris@0 369 * @param array|string $from
Chris@0 370 * @throws Exception\RuntimeException
Chris@0 371 * @return string
Chris@0 372 */
Chris@0 373 protected function convertEncoding($string, $to, $from)
Chris@0 374 {
Chris@0 375 if (function_exists('iconv')) {
Chris@0 376 $result = iconv($from, $to, $string);
Chris@0 377 } elseif (function_exists('mb_convert_encoding')) {
Chris@0 378 $result = mb_convert_encoding($string, $to, $from);
Chris@0 379 } else {
Chris@0 380 throw new Exception\RuntimeException(
Chris@0 381 get_class($this)
Chris@0 382 . ' requires either the iconv or mbstring extension to be installed'
Chris@0 383 . ' when escaping for non UTF-8 strings.'
Chris@0 384 );
Chris@0 385 }
Chris@0 386
Chris@0 387 if ($result === false) {
Chris@0 388 return ''; // return non-fatal blank string on encoding errors from users
Chris@0 389 }
Chris@0 390 return $result;
Chris@0 391 }
Chris@0 392 }