To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Revision:

root / vendor / zendframework / zend-escaper / src / Escaper.php @ 15:e200cb7efeb3

History | View | Annotate | Download (12.2 KB)

1
<?php
2
/**
3
 * Zend Framework (http://framework.zend.com/)
4
 *
5
 * @link      http://github.com/zendframework/zf2 for the canonical source repository
6
 * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
7
 * @license   http://framework.zend.com/license/new-bsd New BSD License
8
 */
9

    
10
namespace Zend\Escaper;
11

    
12
/**
13
 * Context specific methods for use in secure output escaping
14
 */
15
class Escaper
16
{
17
    /**
18
     * Entity Map mapping Unicode codepoints to any available named HTML entities.
19
     *
20
     * While HTML supports far more named entities, the lowest common denominator
21
     * has become HTML5's XML Serialisation which is restricted to the those named
22
     * entities that XML supports. Using HTML entities would result in this error:
23
     *     XML Parsing Error: undefined entity
24
     *
25
     * @var array
26
     */
27
    protected static $htmlNamedEntityMap = [
28
        34 => 'quot',         // quotation mark
29
        38 => 'amp',          // ampersand
30
        60 => 'lt',           // less-than sign
31
        62 => 'gt',           // greater-than sign
32
    ];
33

    
34
    /**
35
     * Current encoding for escaping. If not UTF-8, we convert strings from this encoding
36
     * pre-escaping and back to this encoding post-escaping.
37
     *
38
     * @var string
39
     */
40
    protected $encoding = 'utf-8';
41

    
42
    /**
43
     * Holds the value of the special flags passed as second parameter to
44
     * htmlspecialchars().
45
     *
46
     * @var int
47
     */
48
    protected $htmlSpecialCharsFlags;
49

    
50
    /**
51
     * Static Matcher which escapes characters for HTML Attribute contexts
52
     *
53
     * @var callable
54
     */
55
    protected $htmlAttrMatcher;
56

    
57
    /**
58
     * Static Matcher which escapes characters for Javascript contexts
59
     *
60
     * @var callable
61
     */
62
    protected $jsMatcher;
63

    
64
    /**
65
     * Static Matcher which escapes characters for CSS Attribute contexts
66
     *
67
     * @var callable
68
     */
69
    protected $cssMatcher;
70

    
71
    /**
72
     * List of all encoding supported by this class
73
     *
74
     * @var array
75
     */
76
    protected $supportedEncodings = [
77
        'iso-8859-1',   'iso8859-1',    'iso-8859-5',   'iso8859-5',
78
        'iso-8859-15',  'iso8859-15',   'utf-8',        'cp866',
79
        'ibm866',       '866',          'cp1251',       'windows-1251',
80
        'win-1251',     '1251',         'cp1252',       'windows-1252',
81
        '1252',         'koi8-r',       'koi8-ru',      'koi8r',
82
        'big5',         '950',          'gb2312',       '936',
83
        'big5-hkscs',   'shift_jis',    'sjis',         'sjis-win',
84
        'cp932',        '932',          'euc-jp',       'eucjp',
85
        'eucjp-win',    'macroman'
86
    ];
87

    
88
    /**
89
     * Constructor: Single parameter allows setting of global encoding for use by
90
     * the current object.
91
     *
92
     * @param string $encoding
93
     * @throws Exception\InvalidArgumentException
94
     */
95
    public function __construct($encoding = null)
96
    {
97
        if ($encoding !== null) {
98
            if (! is_string($encoding)) {
99
                throw new Exception\InvalidArgumentException(
100
                    get_class($this) . ' constructor parameter must be a string, received ' . gettype($encoding)
101
                );
102
            }
103
            if ($encoding === '') {
104
                throw new Exception\InvalidArgumentException(
105
                    get_class($this) . ' constructor parameter does not allow a blank value'
106
                );
107
            }
108

    
109
            $encoding = strtolower($encoding);
110
            if (! in_array($encoding, $this->supportedEncodings)) {
111
                throw new Exception\InvalidArgumentException(
112
                    'Value of \'' . $encoding . '\' passed to ' . get_class($this)
113
                    . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
114
                );
115
            }
116

    
117
            $this->encoding = $encoding;
118
        }
119

    
120
        // We take advantage of ENT_SUBSTITUTE flag to correctly deal with invalid UTF-8 sequences.
121
        $this->htmlSpecialCharsFlags = ENT_QUOTES | ENT_SUBSTITUTE;
122

    
123
        // set matcher callbacks
124
        $this->htmlAttrMatcher = [$this, 'htmlAttrMatcher'];
125
        $this->jsMatcher       = [$this, 'jsMatcher'];
126
        $this->cssMatcher      = [$this, 'cssMatcher'];
127
    }
128

    
129
    /**
130
     * Return the encoding that all output/input is expected to be encoded in.
131
     *
132
     * @return string
133
     */
134
    public function getEncoding()
135
    {
136
        return $this->encoding;
137
    }
138

    
139
    /**
140
     * Escape a string for the HTML Body context where there are very few characters
141
     * of special meaning. Internally this will use htmlspecialchars().
142
     *
143
     * @param string $string
144
     * @return string
145
     */
146
    public function escapeHtml($string)
147
    {
148
        return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding);
149
    }
150

    
151
    /**
152
     * Escape a string for the HTML Attribute context. We use an extended set of characters
153
     * to escape that are not covered by htmlspecialchars() to cover cases where an attribute
154
     * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
155
     *
156
     * @param string $string
157
     * @return string
158
     */
159
    public function escapeHtmlAttr($string)
160
    {
161
        $string = $this->toUtf8($string);
162
        if ($string === '' || ctype_digit($string)) {
163
            return $string;
164
        }
165

    
166
        $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string);
167
        return $this->fromUtf8($result);
168
    }
169

    
170
    /**
171
     * Escape a string for the Javascript context. This does not use json_encode(). An extended
172
     * set of characters are escaped beyond ECMAScript's rules for Javascript literal string
173
     * escaping in order to prevent misinterpretation of Javascript as HTML leading to the
174
     * injection of special characters and entities. The escaping used should be tolerant
175
     * of cases where HTML escaping was not applied on top of Javascript escaping correctly.
176
     * Backslash escaping is not used as it still leaves the escaped character as-is and so
177
     * is not useful in a HTML context.
178
     *
179
     * @param string $string
180
     * @return string
181
     */
182
    public function escapeJs($string)
183
    {
184
        $string = $this->toUtf8($string);
185
        if ($string === '' || ctype_digit($string)) {
186
            return $string;
187
        }
188

    
189
        $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string);
190
        return $this->fromUtf8($result);
191
    }
192

    
193
    /**
194
     * Escape a string for the URI or Parameter contexts. This should not be used to escape
195
     * an entire URI - only a subcomponent being inserted. The function is a simple proxy
196
     * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
197
     *
198
     * @param string $string
199
     * @return string
200
     */
201
    public function escapeUrl($string)
202
    {
203
        return rawurlencode($string);
204
    }
205

    
206
    /**
207
     * Escape a string for the CSS context. CSS escaping can be applied to any string being
208
     * inserted into CSS and escapes everything except alphanumerics.
209
     *
210
     * @param string $string
211
     * @return string
212
     */
213
    public function escapeCss($string)
214
    {
215
        $string = $this->toUtf8($string);
216
        if ($string === '' || ctype_digit($string)) {
217
            return $string;
218
        }
219

    
220
        $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string);
221
        return $this->fromUtf8($result);
222
    }
223

    
224
    /**
225
     * Callback function for preg_replace_callback that applies HTML Attribute
226
     * escaping to all matches.
227
     *
228
     * @param array $matches
229
     * @return string
230
     */
231
    protected function htmlAttrMatcher($matches)
232
    {
233
        $chr = $matches[0];
234
        $ord = ord($chr);
235

    
236
        /**
237
         * The following replaces characters undefined in HTML with the
238
         * hex entity for the Unicode replacement character.
239
         */
240
        if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
241
            || ($ord >= 0x7f && $ord <= 0x9f)
242
        ) {
243
            return '&#xFFFD;';
244
        }
245

    
246
        /**
247
         * Check if the current character to escape has a name entity we should
248
         * replace it with while grabbing the integer value of the character.
249
         */
250
        if (strlen($chr) > 1) {
251
            $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
252
        }
253

    
254
        $hex = bin2hex($chr);
255
        $ord = hexdec($hex);
256
        if (isset(static::$htmlNamedEntityMap[$ord])) {
257
            return '&' . static::$htmlNamedEntityMap[$ord] . ';';
258
        }
259

    
260
        /**
261
         * Per OWASP recommendations, we'll use upper hex entities
262
         * for any other characters where a named entity does not exist.
263
         */
264
        if ($ord > 255) {
265
            return sprintf('&#x%04X;', $ord);
266
        }
267
        return sprintf('&#x%02X;', $ord);
268
    }
269

    
270
    /**
271
     * Callback function for preg_replace_callback that applies Javascript
272
     * escaping to all matches.
273
     *
274
     * @param array $matches
275
     * @return string
276
     */
277
    protected function jsMatcher($matches)
278
    {
279
        $chr = $matches[0];
280
        if (strlen($chr) == 1) {
281
            return sprintf('\\x%02X', ord($chr));
282
        }
283
        $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
284
        $hex = strtoupper(bin2hex($chr));
285
        if (strlen($hex) <= 4) {
286
            return sprintf('\\u%04s', $hex);
287
        }
288
        $highSurrogate = substr($hex, 0, 4);
289
        $lowSurrogate = substr($hex, 4, 4);
290
        return sprintf('\\u%04s\\u%04s', $highSurrogate, $lowSurrogate);
291
    }
292

    
293
    /**
294
     * Callback function for preg_replace_callback that applies CSS
295
     * escaping to all matches.
296
     *
297
     * @param array $matches
298
     * @return string
299
     */
300
    protected function cssMatcher($matches)
301
    {
302
        $chr = $matches[0];
303
        if (strlen($chr) == 1) {
304
            $ord = ord($chr);
305
        } else {
306
            $chr = $this->convertEncoding($chr, 'UTF-32BE', 'UTF-8');
307
            $ord = hexdec(bin2hex($chr));
308
        }
309
        return sprintf('\\%X ', $ord);
310
    }
311

    
312
    /**
313
     * Converts a string to UTF-8 from the base encoding. The base encoding is set via this
314
     * class' constructor.
315
     *
316
     * @param string $string
317
     * @throws Exception\RuntimeException
318
     * @return string
319
     */
320
    protected function toUtf8($string)
321
    {
322
        if ($this->getEncoding() === 'utf-8') {
323
            $result = $string;
324
        } else {
325
            $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding());
326
        }
327

    
328
        if (! $this->isUtf8($result)) {
329
            throw new Exception\RuntimeException(
330
                sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result)
331
            );
332
        }
333

    
334
        return $result;
335
    }
336

    
337
    /**
338
     * Converts a string from UTF-8 to the base encoding. The base encoding is set via this
339
     * class' constructor.
340
     * @param string $string
341
     * @return string
342
     */
343
    protected function fromUtf8($string)
344
    {
345
        if ($this->getEncoding() === 'utf-8') {
346
            return $string;
347
        }
348

    
349
        return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8');
350
    }
351

    
352
    /**
353
     * Checks if a given string appears to be valid UTF-8 or not.
354
     *
355
     * @param string $string
356
     * @return bool
357
     */
358
    protected function isUtf8($string)
359
    {
360
        return ($string === '' || preg_match('/^./su', $string));
361
    }
362

    
363
    /**
364
     * Encoding conversion helper which wraps iconv and mbstring where they exist or throws
365
     * and exception where neither is available.
366
     *
367
     * @param string $string
368
     * @param string $to
369
     * @param array|string $from
370
     * @throws Exception\RuntimeException
371
     * @return string
372
     */
373
    protected function convertEncoding($string, $to, $from)
374
    {
375
        if (function_exists('iconv')) {
376
            $result = iconv($from, $to, $string);
377
        } elseif (function_exists('mb_convert_encoding')) {
378
            $result = mb_convert_encoding($string, $to, $from);
379
        } else {
380
            throw new Exception\RuntimeException(
381
                get_class($this)
382
                . ' requires either the iconv or mbstring extension to be installed'
383
                . ' when escaping for non UTF-8 strings.'
384
            );
385
        }
386

    
387
        if ($result === false) {
388
            return ''; // return non-fatal blank string on encoding errors from users
389
        }
390
        return $result;
391
    }
392
}