Mercurial > hg > isophonics-drupal-site
comparison vendor/nikic/php-parser/lib/PhpParser/Lexer.php @ 13:5fb285c0d0e3
Update Drupal core to 8.4.7 via Composer. Security update; I *think* we've
been lucky to get away with this so far, as we don't support self-registration
which seems to be used by the so-called "drupalgeddon 2" attack that 8.4.5
was vulnerable to.
author | Chris Cannam |
---|---|
date | Mon, 23 Apr 2018 09:33:26 +0100 |
parents | 7a779792577d |
children |
comparison
equal
deleted
inserted
replaced
12:7a779792577d | 13:5fb285c0d0e3 |
---|---|
1 <?php | 1 <?php declare(strict_types=1); |
2 | 2 |
3 namespace PhpParser; | 3 namespace PhpParser; |
4 | 4 |
5 use PhpParser\Parser\Tokens; | 5 use PhpParser\Parser\Tokens; |
6 | 6 |
25 * which is an array of attributes to add to the AST nodes. Possible | 25 * which is an array of attributes to add to the AST nodes. Possible |
26 * attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos', | 26 * attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos', |
27 * 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the | 27 * 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the |
28 * first three. For more info see getNextToken() docs. | 28 * first three. For more info see getNextToken() docs. |
29 */ | 29 */ |
30 public function __construct(array $options = array()) { | 30 public function __construct(array $options = []) { |
31 // map from internal tokens to PhpParser tokens | 31 // map from internal tokens to PhpParser tokens |
32 $this->tokenMap = $this->createTokenMap(); | 32 $this->tokenMap = $this->createTokenMap(); |
33 | 33 |
34 // map of tokens to drop while lexing (the map is only used for isset lookup, | 34 // map of tokens to drop while lexing (the map is only used for isset lookup, |
35 // that's why the value is simply set to 1; the value is never actually used.) | 35 // that's why the value is simply set to 1; the value is never actually used.) |
36 $this->dropTokens = array_fill_keys( | 36 $this->dropTokens = array_fill_keys( |
37 array(T_WHITESPACE, T_OPEN_TAG, T_COMMENT, T_DOC_COMMENT), 1 | 37 [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT], 1 |
38 ); | 38 ); |
39 | 39 |
40 // the usedAttributes member is a map of the used attribute names to a dummy | 40 // the usedAttributes member is a map of the used attribute names to a dummy |
41 // value (here "true") | 41 // value (here "true") |
42 $options += array( | 42 $options += [ |
43 'usedAttributes' => array('comments', 'startLine', 'endLine'), | 43 'usedAttributes' => ['comments', 'startLine', 'endLine'], |
44 ); | 44 ]; |
45 $this->usedAttributes = array_fill_keys($options['usedAttributes'], true); | 45 $this->usedAttributes = array_fill_keys($options['usedAttributes'], true); |
46 } | 46 } |
47 | 47 |
48 /** | 48 /** |
49 * Initializes the lexer for lexing the provided source code. | 49 * Initializes the lexer for lexing the provided source code. |
53 * | 53 * |
54 * @param string $code The source code to lex | 54 * @param string $code The source code to lex |
55 * @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to | 55 * @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to |
56 * ErrorHandler\Throwing | 56 * ErrorHandler\Throwing |
57 */ | 57 */ |
58 public function startLexing($code, ErrorHandler $errorHandler = null) { | 58 public function startLexing(string $code, ErrorHandler $errorHandler = null) { |
59 if (null === $errorHandler) { | 59 if (null === $errorHandler) { |
60 $errorHandler = new ErrorHandler\Throwing(); | 60 $errorHandler = new ErrorHandler\Throwing(); |
61 } | 61 } |
62 | 62 |
63 $this->code = $code; // keep the code around for __halt_compiler() handling | 63 $this->code = $code; // keep the code around for __halt_compiler() handling |
69 // This ensures proper composability, because having a newline is the "safe" assumption. | 69 // This ensures proper composability, because having a newline is the "safe" assumption. |
70 $this->prevCloseTagHasNewline = true; | 70 $this->prevCloseTagHasNewline = true; |
71 | 71 |
72 $scream = ini_set('xdebug.scream', '0'); | 72 $scream = ini_set('xdebug.scream', '0'); |
73 | 73 |
74 $this->resetErrors(); | 74 error_clear_last(); |
75 $this->tokens = @token_get_all($code); | 75 $this->tokens = @token_get_all($code); |
76 $this->handleErrors($errorHandler); | 76 $this->handleErrors($errorHandler); |
77 | 77 |
78 if (false !== $scream) { | 78 if (false !== $scream) { |
79 ini_set('xdebug.scream', $scream); | 79 ini_set('xdebug.scream', $scream); |
80 } | |
81 } | |
82 | |
83 protected function resetErrors() { | |
84 if (function_exists('error_clear_last')) { | |
85 error_clear_last(); | |
86 } else { | |
87 // set error_get_last() to defined state by forcing an undefined variable error | |
88 set_error_handler(function() { return false; }, 0); | |
89 @$undefinedVariable; | |
90 restore_error_handler(); | |
91 } | 80 } |
92 } | 81 } |
93 | 82 |
94 private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) { | 83 private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) { |
95 for ($i = $start; $i < $end; $i++) { | 84 for ($i = $start; $i < $end; $i++) { |
115 'endFilePos' => $i, | 104 'endFilePos' => $i, |
116 ])); | 105 ])); |
117 } | 106 } |
118 } | 107 } |
119 | 108 |
120 private function isUnterminatedComment($token) { | 109 /** |
121 return ($token[0] === T_COMMENT || $token[0] === T_DOC_COMMENT) | 110 * Check whether comment token is unterminated. |
111 * | |
112 * @return bool | |
113 */ | |
114 private function isUnterminatedComment($token) : bool { | |
115 return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT) | |
122 && substr($token[1], 0, 2) === '/*' | 116 && substr($token[1], 0, 2) === '/*' |
123 && substr($token[1], -2) !== '*/'; | 117 && substr($token[1], -2) !== '*/'; |
124 } | 118 } |
125 | 119 |
126 private function errorMayHaveOccurred() { | 120 /** |
121 * Check whether an error *may* have occurred during tokenization. | |
122 * | |
123 * @return bool | |
124 */ | |
125 private function errorMayHaveOccurred() : bool { | |
127 if (defined('HHVM_VERSION')) { | 126 if (defined('HHVM_VERSION')) { |
128 // In HHVM token_get_all() does not throw warnings, so we need to conservatively | 127 // In HHVM token_get_all() does not throw warnings, so we need to conservatively |
129 // assume that an error occurred | 128 // assume that an error occurred |
130 return true; | 129 return true; |
131 } | 130 } |
132 | 131 |
133 $error = error_get_last(); | 132 return null !== error_get_last(); |
134 return null !== $error | |
135 && false === strpos($error['message'], 'Undefined variable'); | |
136 } | 133 } |
137 | 134 |
138 protected function handleErrors(ErrorHandler $errorHandler) { | 135 protected function handleErrors(ErrorHandler $errorHandler) { |
139 if (!$this->errorMayHaveOccurred()) { | 136 if (!$this->errorMayHaveOccurred()) { |
140 return; | 137 return; |
145 // detected by finding "gaps" in the token array. Unterminated comments are detected | 142 // detected by finding "gaps" in the token array. Unterminated comments are detected |
146 // by checking if a trailing comment has a "*/" at the end. | 143 // by checking if a trailing comment has a "*/" at the end. |
147 | 144 |
148 $filePos = 0; | 145 $filePos = 0; |
149 $line = 1; | 146 $line = 1; |
150 foreach ($this->tokens as $i => $token) { | 147 foreach ($this->tokens as $token) { |
151 $tokenValue = \is_string($token) ? $token : $token[1]; | 148 $tokenValue = \is_string($token) ? $token : $token[1]; |
152 $tokenLen = \strlen($tokenValue); | 149 $tokenLen = \strlen($tokenValue); |
153 | 150 |
154 if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) { | 151 if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) { |
155 // Something is missing, must be an invalid character | 152 // Something is missing, must be an invalid character |
156 $nextFilePos = strpos($this->code, $tokenValue, $filePos); | 153 $nextFilePos = strpos($this->code, $tokenValue, $filePos); |
157 $this->handleInvalidCharacterRange( | 154 $this->handleInvalidCharacterRange( |
158 $filePos, $nextFilePos, $line, $errorHandler); | 155 $filePos, $nextFilePos, $line, $errorHandler); |
159 $filePos = $nextFilePos; | 156 $filePos = (int) $nextFilePos; |
160 } | 157 } |
161 | 158 |
162 $filePos += $tokenLen; | 159 $filePos += $tokenLen; |
163 $line += substr_count($tokenValue, "\n"); | 160 $line += substr_count($tokenValue, "\n"); |
164 } | 161 } |
174 'endFilePos' => $filePos + \strlen($comment), | 171 'endFilePos' => $filePos + \strlen($comment), |
175 ])); | 172 ])); |
176 | 173 |
177 // Emulate the PHP behavior | 174 // Emulate the PHP behavior |
178 $isDocComment = isset($comment[3]) && $comment[3] === '*'; | 175 $isDocComment = isset($comment[3]) && $comment[3] === '*'; |
179 $this->tokens[] = [$isDocComment ? T_DOC_COMMENT : T_COMMENT, $comment, $line]; | 176 $this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line]; |
180 } else { | 177 } else { |
181 // Invalid characters at the end of the input | 178 // Invalid characters at the end of the input |
182 $this->handleInvalidCharacterRange( | 179 $this->handleInvalidCharacterRange( |
183 $filePos, \strlen($this->code), $line, $errorHandler); | 180 $filePos, \strlen($this->code), $line, $errorHandler); |
184 } | 181 } |
219 * @param mixed $startAttributes Variable to store start attributes in | 216 * @param mixed $startAttributes Variable to store start attributes in |
220 * @param mixed $endAttributes Variable to store end attributes in | 217 * @param mixed $endAttributes Variable to store end attributes in |
221 * | 218 * |
222 * @return int Token id | 219 * @return int Token id |
223 */ | 220 */ |
224 public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { | 221 public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) : int { |
225 $startAttributes = array(); | 222 $startAttributes = []; |
226 $endAttributes = array(); | 223 $endAttributes = []; |
227 | 224 |
228 while (1) { | 225 while (1) { |
229 if (isset($this->tokens[++$this->pos])) { | 226 if (isset($this->tokens[++$this->pos])) { |
230 $token = $this->tokens[$this->pos]; | 227 $token = $this->tokens[$this->pos]; |
231 } else { | 228 } else { |
254 $id = ord($token); | 251 $id = ord($token); |
255 } | 252 } |
256 } elseif (!isset($this->dropTokens[$token[0]])) { | 253 } elseif (!isset($this->dropTokens[$token[0]])) { |
257 $value = $token[1]; | 254 $value = $token[1]; |
258 $id = $this->tokenMap[$token[0]]; | 255 $id = $this->tokenMap[$token[0]]; |
259 if (T_CLOSE_TAG === $token[0]) { | 256 if (\T_CLOSE_TAG === $token[0]) { |
260 $this->prevCloseTagHasNewline = false !== strpos($token[1], "\n"); | 257 $this->prevCloseTagHasNewline = false !== strpos($token[1], "\n"); |
261 } else if (T_INLINE_HTML === $token[0]) { | 258 } elseif (\T_INLINE_HTML === $token[0]) { |
262 $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline; | 259 $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline; |
263 } | 260 } |
264 | 261 |
265 $this->line += substr_count($value, "\n"); | 262 $this->line += substr_count($value, "\n"); |
266 $this->filePos += \strlen($value); | 263 $this->filePos += \strlen($value); |
267 } else { | 264 } else { |
268 if (T_COMMENT === $token[0] || T_DOC_COMMENT === $token[0]) { | 265 if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) { |
269 if (isset($this->usedAttributes['comments'])) { | 266 if (isset($this->usedAttributes['comments'])) { |
270 $comment = T_DOC_COMMENT === $token[0] | 267 $comment = \T_DOC_COMMENT === $token[0] |
271 ? new Comment\Doc($token[1], $this->line, $this->filePos) | 268 ? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos) |
272 : new Comment($token[1], $this->line, $this->filePos); | 269 : new Comment($token[1], $this->line, $this->filePos, $this->pos); |
273 $startAttributes['comments'][] = $comment; | 270 $startAttributes['comments'][] = $comment; |
274 } | 271 } |
275 } | 272 } |
276 | 273 |
277 $this->line += substr_count($token[1], "\n"); | 274 $this->line += substr_count($token[1], "\n"); |
303 * whitespace and comments are included). The token position | 300 * whitespace and comments are included). The token position |
304 * attributes are against this token array. | 301 * attributes are against this token array. |
305 * | 302 * |
306 * @return array Array of tokens in token_get_all() format | 303 * @return array Array of tokens in token_get_all() format |
307 */ | 304 */ |
308 public function getTokens() { | 305 public function getTokens() : array { |
309 return $this->tokens; | 306 return $this->tokens; |
310 } | 307 } |
311 | 308 |
312 /** | 309 /** |
313 * Handles __halt_compiler() by returning the text after it. | 310 * Handles __halt_compiler() by returning the text after it. |
314 * | 311 * |
315 * @return string Remaining text | 312 * @return string Remaining text |
316 */ | 313 */ |
317 public function handleHaltCompiler() { | 314 public function handleHaltCompiler() : string { |
318 // text after T_HALT_COMPILER, still including (); | 315 // text after T_HALT_COMPILER, still including (); |
319 $textAfter = substr($this->code, $this->filePos); | 316 $textAfter = substr($this->code, $this->filePos); |
320 | 317 |
321 // ensure that it is followed by (); | 318 // ensure that it is followed by (); |
322 // this simplifies the situation, by not allowing any comments | 319 // this simplifies the situation, by not allowing any comments |
327 | 324 |
328 // prevent the lexer from returning any further tokens | 325 // prevent the lexer from returning any further tokens |
329 $this->pos = count($this->tokens); | 326 $this->pos = count($this->tokens); |
330 | 327 |
331 // return with (); removed | 328 // return with (); removed |
332 return (string) substr($textAfter, strlen($matches[0])); // (string) converts false to '' | 329 return substr($textAfter, strlen($matches[0])); |
333 } | 330 } |
334 | 331 |
335 /** | 332 /** |
336 * Creates the token map. | 333 * Creates the token map. |
337 * | 334 * |
339 * to the identifiers used by the Parser. Additionally it | 336 * to the identifiers used by the Parser. Additionally it |
340 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. | 337 * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. |
341 * | 338 * |
342 * @return array The token map | 339 * @return array The token map |
343 */ | 340 */ |
344 protected function createTokenMap() { | 341 protected function createTokenMap() : array { |
345 $tokenMap = array(); | 342 $tokenMap = []; |
346 | 343 |
347 // 256 is the minimum possible token number, as everything below | 344 // 256 is the minimum possible token number, as everything below |
348 // it is an ASCII value | 345 // it is an ASCII value |
349 for ($i = 256; $i < 1000; ++$i) { | 346 for ($i = 256; $i < 1000; ++$i) { |
350 if (T_DOUBLE_COLON === $i) { | 347 if (\T_DOUBLE_COLON === $i) { |
351 // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM | 348 // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM |
352 $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; | 349 $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; |
353 } elseif(T_OPEN_TAG_WITH_ECHO === $i) { | 350 } elseif(\T_OPEN_TAG_WITH_ECHO === $i) { |
354 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO | 351 // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO |
355 $tokenMap[$i] = Tokens::T_ECHO; | 352 $tokenMap[$i] = Tokens::T_ECHO; |
356 } elseif(T_CLOSE_TAG === $i) { | 353 } elseif(\T_CLOSE_TAG === $i) { |
357 // T_CLOSE_TAG is equivalent to ';' | 354 // T_CLOSE_TAG is equivalent to ';' |
358 $tokenMap[$i] = ord(';'); | 355 $tokenMap[$i] = ord(';'); |
359 } elseif ('UNKNOWN' !== $name = token_name($i)) { | 356 } elseif ('UNKNOWN' !== $name = token_name($i)) { |
360 if ('T_HASHBANG' === $name) { | 357 if ('T_HASHBANG' === $name) { |
361 // HHVM uses a special token for #! hashbang lines | 358 // HHVM uses a special token for #! hashbang lines |
362 $tokenMap[$i] = Tokens::T_INLINE_HTML; | 359 $tokenMap[$i] = Tokens::T_INLINE_HTML; |
363 } else if (defined($name = Tokens::class . '::' . $name)) { | 360 } elseif (defined($name = Tokens::class . '::' . $name)) { |
364 // Other tokens can be mapped directly | 361 // Other tokens can be mapped directly |
365 $tokenMap[$i] = constant($name); | 362 $tokenMap[$i] = constant($name); |
366 } | 363 } |
367 } | 364 } |
368 } | 365 } |
369 | 366 |
370 // HHVM uses a special token for numbers that overflow to double | 367 // HHVM uses a special token for numbers that overflow to double |
371 if (defined('T_ONUMBER')) { | 368 if (defined('T_ONUMBER')) { |
372 $tokenMap[T_ONUMBER] = Tokens::T_DNUMBER; | 369 $tokenMap[\T_ONUMBER] = Tokens::T_DNUMBER; |
373 } | 370 } |
374 // HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant | 371 // HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant |
375 if (defined('T_COMPILER_HALT_OFFSET')) { | 372 if (defined('T_COMPILER_HALT_OFFSET')) { |
376 $tokenMap[T_COMPILER_HALT_OFFSET] = Tokens::T_STRING; | 373 $tokenMap[\T_COMPILER_HALT_OFFSET] = Tokens::T_STRING; |
377 } | 374 } |
378 | 375 |
379 return $tokenMap; | 376 return $tokenMap; |
380 } | 377 } |
381 } | 378 } |