To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / vendor / zendframework / zend-escaper / src / Escaper.php @ 15:e200cb7efeb3
History | View | Annotate | Download (12.2 KB)
| 1 |
<?php
|
|---|---|
| 2 |
/**
|
| 3 |
* Zend Framework (http://framework.zend.com/)
|
| 4 |
*
|
| 5 |
* @link http://github.com/zendframework/zf2 for the canonical source repository
|
| 6 |
* @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
|
| 7 |
* @license http://framework.zend.com/license/new-bsd New BSD License
|
| 8 |
*/
|
| 9 |
|
| 10 |
namespace Zend\Escaper; |
| 11 |
|
| 12 |
/**
|
| 13 |
* Context specific methods for use in secure output escaping
|
| 14 |
*/
|
| 15 |
class Escaper |
| 16 |
{
|
| 17 |
/**
|
| 18 |
* Entity Map mapping Unicode codepoints to any available named HTML entities.
|
| 19 |
*
|
| 20 |
* While HTML supports far more named entities, the lowest common denominator
|
| 21 |
* has become HTML5's XML Serialisation which is restricted to the those named
|
| 22 |
* entities that XML supports. Using HTML entities would result in this error:
|
| 23 |
* XML Parsing Error: undefined entity
|
| 24 |
*
|
| 25 |
* @var array
|
| 26 |
*/
|
| 27 |
protected static $htmlNamedEntityMap = [ |
| 28 |
34 => 'quot', // quotation mark |
| 29 |
38 => 'amp', // ampersand |
| 30 |
60 => 'lt', // less-than sign |
| 31 |
62 => 'gt', // greater-than sign |
| 32 |
]; |
| 33 |
|
| 34 |
/**
|
| 35 |
* Current encoding for escaping. If not UTF-8, we convert strings from this encoding
|
| 36 |
* pre-escaping and back to this encoding post-escaping.
|
| 37 |
*
|
| 38 |
* @var string
|
| 39 |
*/
|
| 40 |
protected $encoding = 'utf-8'; |
| 41 |
|
| 42 |
/**
|
| 43 |
* Holds the value of the special flags passed as second parameter to
|
| 44 |
* htmlspecialchars().
|
| 45 |
*
|
| 46 |
* @var int
|
| 47 |
*/
|
| 48 |
protected $htmlSpecialCharsFlags; |
| 49 |
|
| 50 |
/**
|
| 51 |
* Static Matcher which escapes characters for HTML Attribute contexts
|
| 52 |
*
|
| 53 |
* @var callable
|
| 54 |
*/
|
| 55 |
protected $htmlAttrMatcher; |
| 56 |
|
| 57 |
/**
|
| 58 |
* Static Matcher which escapes characters for Javascript contexts
|
| 59 |
*
|
| 60 |
* @var callable
|
| 61 |
*/
|
| 62 |
protected $jsMatcher; |
| 63 |
|
| 64 |
/**
|
| 65 |
* Static Matcher which escapes characters for CSS Attribute contexts
|
| 66 |
*
|
| 67 |
* @var callable
|
| 68 |
*/
|
| 69 |
protected $cssMatcher; |
| 70 |
|
| 71 |
/**
|
| 72 |
* List of all encoding supported by this class
|
| 73 |
*
|
| 74 |
* @var array
|
| 75 |
*/
|
| 76 |
protected $supportedEncodings = [ |
| 77 |
'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5', |
| 78 |
'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866', |
| 79 |
'ibm866', '866', 'cp1251', 'windows-1251', |
| 80 |
'win-1251', '1251', 'cp1252', 'windows-1252', |
| 81 |
'1252', 'koi8-r', 'koi8-ru', 'koi8r', |
| 82 |
'big5', '950', 'gb2312', '936', |
| 83 |
'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win', |
| 84 |
'cp932', '932', 'euc-jp', 'eucjp', |
| 85 |
'eucjp-win', 'macroman' |
| 86 |
]; |
| 87 |
|
| 88 |
/**
|
| 89 |
* Constructor: Single parameter allows setting of global encoding for use by
|
| 90 |
* the current object.
|
| 91 |
*
|
| 92 |
* @param string $encoding
|
| 93 |
* @throws Exception\InvalidArgumentException
|
| 94 |
*/
|
| 95 |
public function __construct($encoding = null) |
| 96 |
{
|
| 97 |
if ($encoding !== null) { |
| 98 |
if (! is_string($encoding)) { |
| 99 |
throw new Exception\InvalidArgumentException( |
| 100 |
get_class($this) . ' constructor parameter must be a string, received ' . gettype($encoding) |
| 101 |
); |
| 102 |
} |
| 103 |
if ($encoding === '') { |
| 104 |
throw new Exception\InvalidArgumentException( |
| 105 |
get_class($this) . ' constructor parameter does not allow a blank value' |
| 106 |
); |
| 107 |
} |
| 108 |
|
| 109 |
$encoding = strtolower($encoding); |
| 110 |
if (! in_array($encoding, $this->supportedEncodings)) { |
| 111 |
throw new Exception\InvalidArgumentException( |
| 112 |
'Value of \'' . $encoding . '\' passed to ' . get_class($this) |
| 113 |
. ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
|
| 114 |
); |
| 115 |
} |
| 116 |
|
| 117 |
$this->encoding = $encoding; |
| 118 |
} |
| 119 |
|
| 120 |
// We take advantage of ENT_SUBSTITUTE flag to correctly deal with invalid UTF-8 sequences.
|
| 121 |
$this->htmlSpecialCharsFlags = ENT_QUOTES | ENT_SUBSTITUTE; |
| 122 |
|
| 123 |
// set matcher callbacks
|
| 124 |
$this->htmlAttrMatcher = [$this, 'htmlAttrMatcher']; |
| 125 |
$this->jsMatcher = [$this, 'jsMatcher']; |
| 126 |
$this->cssMatcher = [$this, 'cssMatcher']; |
| 127 |
} |
| 128 |
|
| 129 |
/**
|
| 130 |
* Return the encoding that all output/input is expected to be encoded in.
|
| 131 |
*
|
| 132 |
* @return string
|
| 133 |
*/
|
| 134 |
public function getEncoding() |
| 135 |
{
|
| 136 |
return $this->encoding; |
| 137 |
} |
| 138 |
|
| 139 |
/**
|
| 140 |
* Escape a string for the HTML Body context where there are very few characters
|
| 141 |
* of special meaning. Internally this will use htmlspecialchars().
|
| 142 |
*
|
| 143 |
* @param string $string
|
| 144 |
* @return string
|
| 145 |
*/
|
| 146 |
public function escapeHtml($string) |
| 147 |
{
|
| 148 |
return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding); |
| 149 |
} |
| 150 |
|
| 151 |
/**
|
| 152 |
* Escape a string for the HTML Attribute context. We use an extended set of characters
|
| 153 |
* to escape that are not covered by htmlspecialchars() to cover cases where an attribute
|
| 154 |
* might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
|
| 155 |
*
|
| 156 |
* @param string $string
|
| 157 |
* @return string
|
| 158 |
*/
|
| 159 |
public function escapeHtmlAttr($string) |
| 160 |
{
|
| 161 |
$string = $this->toUtf8($string); |
| 162 |
if ($string === '' || ctype_digit($string)) { |
| 163 |
return $string; |
| 164 |
} |
| 165 |
|
| 166 |
$result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string); |
| 167 |
return $this->fromUtf8($result); |
| 168 |
} |
| 169 |
|
| 170 |
/**
|
| 171 |
* Escape a string for the Javascript context. This does not use json_encode(). An extended
|
| 172 |
* set of characters are escaped beyond ECMAScript's rules for Javascript literal string
|
| 173 |
* escaping in order to prevent misinterpretation of Javascript as HTML leading to the
|
| 174 |
* injection of special characters and entities. The escaping used should be tolerant
|
| 175 |
* of cases where HTML escaping was not applied on top of Javascript escaping correctly.
|
| 176 |
* Backslash escaping is not used as it still leaves the escaped character as-is and so
|
| 177 |
* is not useful in a HTML context.
|
| 178 |
*
|
| 179 |
* @param string $string
|
| 180 |
* @return string
|
| 181 |
*/
|
| 182 |
public function escapeJs($string) |
| 183 |
{
|
| 184 |
$string = $this->toUtf8($string); |
| 185 |
if ($string === '' || ctype_digit($string)) { |
| 186 |
return $string; |
| 187 |
} |
| 188 |
|
| 189 |
$result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string); |
| 190 |
return $this->fromUtf8($result); |
| 191 |
} |
| 192 |
|
| 193 |
/**
|
| 194 |
* Escape a string for the URI or Parameter contexts. This should not be used to escape
|
| 195 |
* an entire URI - only a subcomponent being inserted. The function is a simple proxy
|
| 196 |
* to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
|
| 197 |
*
|
| 198 |
* @param string $string
|
| 199 |
* @return string
|
| 200 |
*/
|
| 201 |
public function escapeUrl($string) |
| 202 |
{
|
| 203 |
return rawurlencode($string); |
| 204 |
} |
| 205 |
|
| 206 |
/**
|
| 207 |
* Escape a string for the CSS context. CSS escaping can be applied to any string being
|
| 208 |
* inserted into CSS and escapes everything except alphanumerics.
|
| 209 |
*
|
| 210 |
* @param string $string
|
| 211 |
* @return string
|
| 212 |
*/
|
| 213 |
public function escapeCss($string) |
| 214 |
{
|
| 215 |
$string = $this->toUtf8($string); |
| 216 |
if ($string === '' || ctype_digit($string)) { |
| 217 |
return $string; |
| 218 |
} |
| 219 |
|
| 220 |
$result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string); |
| 221 |
return $this->fromUtf8($result); |
| 222 |
} |
| 223 |
|
| 224 |
/**
|
| 225 |
* Callback function for preg_replace_callback that applies HTML Attribute
|
| 226 |
* escaping to all matches.
|
| 227 |
*
|
| 228 |
* @param array $matches
|
| 229 |
* @return string
|
| 230 |
*/
|
| 231 |
protected function htmlAttrMatcher($matches) |
| 232 |
{
|
| 233 |
$chr = $matches[0]; |
| 234 |
$ord = ord($chr); |
| 235 |
|
| 236 |
/**
|
| 237 |
* The following replaces characters undefined in HTML with the
|
| 238 |
* hex entity for the Unicode replacement character.
|
| 239 |
*/
|
| 240 |
if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r") |
| 241 |
|| ($ord >= 0x7f && $ord <= 0x9f) |
| 242 |
) {
|
| 243 |
return '�'; |
| 244 |
} |
| 245 |
|
| 246 |
/**
|
| 247 |
* Check if the current character to escape has a name entity we should
|
| 248 |
* replace it with while grabbing the integer value of the character.
|
| 249 |
*/
|
| 250 |
if (strlen($chr) > 1) { |
| 251 |
$chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); |
| 252 |
} |
| 253 |
|
| 254 |
$hex = bin2hex($chr); |
| 255 |
$ord = hexdec($hex); |
| 256 |
if (isset(static::$htmlNamedEntityMap[$ord])) { |
| 257 |
return '&' . static::$htmlNamedEntityMap[$ord] . ';'; |
| 258 |
} |
| 259 |
|
| 260 |
/**
|
| 261 |
* Per OWASP recommendations, we'll use upper hex entities
|
| 262 |
* for any other characters where a named entity does not exist.
|
| 263 |
*/
|
| 264 |
if ($ord > 255) { |
| 265 |
return sprintf('&#x%04X;', $ord); |
| 266 |
} |
| 267 |
return sprintf('&#x%02X;', $ord); |
| 268 |
} |
| 269 |
|
| 270 |
/**
|
| 271 |
* Callback function for preg_replace_callback that applies Javascript
|
| 272 |
* escaping to all matches.
|
| 273 |
*
|
| 274 |
* @param array $matches
|
| 275 |
* @return string
|
| 276 |
*/
|
| 277 |
protected function jsMatcher($matches) |
| 278 |
{
|
| 279 |
$chr = $matches[0]; |
| 280 |
if (strlen($chr) == 1) { |
| 281 |
return sprintf('\\x%02X', ord($chr)); |
| 282 |
} |
| 283 |
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8'); |
| 284 |
$hex = strtoupper(bin2hex($chr)); |
| 285 |
if (strlen($hex) <= 4) { |
| 286 |
return sprintf('\\u%04s', $hex); |
| 287 |
} |
| 288 |
$highSurrogate = substr($hex, 0, 4); |
| 289 |
$lowSurrogate = substr($hex, 4, 4); |
| 290 |
return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate); |
| 291 |
} |
| 292 |
|
| 293 |
/**
|
| 294 |
* Callback function for preg_replace_callback that applies CSS
|
| 295 |
* escaping to all matches.
|
| 296 |
*
|
| 297 |
* @param array $matches
|
| 298 |
* @return string
|
| 299 |
*/
|
| 300 |
protected function cssMatcher($matches) |
| 301 |
{
|
| 302 |
$chr = $matches[0]; |
| 303 |
if (strlen($chr) == 1) { |
| 304 |
$ord = ord($chr); |
| 305 |
} else {
|
| 306 |
$chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8'); |
| 307 |
$ord = hexdec(bin2hex($chr)); |
| 308 |
} |
| 309 |
return sprintf('\\%X ', $ord); |
| 310 |
} |
| 311 |
|
| 312 |
/**
|
| 313 |
* Converts a string to UTF-8 from the base encoding. The base encoding is set via this
|
| 314 |
* class' constructor.
|
| 315 |
*
|
| 316 |
* @param string $string
|
| 317 |
* @throws Exception\RuntimeException
|
| 318 |
* @return string
|
| 319 |
*/
|
| 320 |
protected function toUtf8($string) |
| 321 |
{
|
| 322 |
if ($this->getEncoding() === 'utf-8') { |
| 323 |
$result = $string; |
| 324 |
} else {
|
| 325 |
$result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding()); |
| 326 |
} |
| 327 |
|
| 328 |
if (! $this->isUtf8($result)) { |
| 329 |
throw new Exception\RuntimeException( |
| 330 |
sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result) |
| 331 |
); |
| 332 |
} |
| 333 |
|
| 334 |
return $result; |
| 335 |
} |
| 336 |
|
| 337 |
/**
|
| 338 |
* Converts a string from UTF-8 to the base encoding. The base encoding is set via this
|
| 339 |
* class' constructor.
|
| 340 |
* @param string $string
|
| 341 |
* @return string
|
| 342 |
*/
|
| 343 |
protected function fromUtf8($string) |
| 344 |
{
|
| 345 |
if ($this->getEncoding() === 'utf-8') { |
| 346 |
return $string; |
| 347 |
} |
| 348 |
|
| 349 |
return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8'); |
| 350 |
} |
| 351 |
|
| 352 |
/**
|
| 353 |
* Checks if a given string appears to be valid UTF-8 or not.
|
| 354 |
*
|
| 355 |
* @param string $string
|
| 356 |
* @return bool
|
| 357 |
*/
|
| 358 |
protected function isUtf8($string) |
| 359 |
{
|
| 360 |
return ($string === '' || preg_match('/^./su', $string)); |
| 361 |
} |
| 362 |
|
| 363 |
/**
|
| 364 |
* Encoding conversion helper which wraps iconv and mbstring where they exist or throws
|
| 365 |
* and exception where neither is available.
|
| 366 |
*
|
| 367 |
* @param string $string
|
| 368 |
* @param string $to
|
| 369 |
* @param array|string $from
|
| 370 |
* @throws Exception\RuntimeException
|
| 371 |
* @return string
|
| 372 |
*/
|
| 373 |
protected function convertEncoding($string, $to, $from) |
| 374 |
{
|
| 375 |
if (function_exists('iconv')) { |
| 376 |
$result = iconv($from, $to, $string); |
| 377 |
} elseif (function_exists('mb_convert_encoding')) { |
| 378 |
$result = mb_convert_encoding($string, $to, $from); |
| 379 |
} else {
|
| 380 |
throw new Exception\RuntimeException( |
| 381 |
get_class($this) |
| 382 |
. ' requires either the iconv or mbstring extension to be installed'
|
| 383 |
. ' when escaping for non UTF-8 strings.'
|
| 384 |
); |
| 385 |
} |
| 386 |
|
| 387 |
if ($result === false) { |
| 388 |
return ''; // return non-fatal blank string on encoding errors from users |
| 389 |
} |
| 390 |
return $result; |
| 391 |
} |
| 392 |
} |