Mercurial > hg > isophonics-drupal-site
comparison vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/PHP.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 7a779792577d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 /** | |
3 * Tokenizes PHP code. | |
4 * | |
5 * PHP version 5 | |
6 * | |
7 * @category PHP | |
8 * @package PHP_CodeSniffer | |
9 * @author Greg Sherwood <gsherwood@squiz.net> | |
10 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) | |
11 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence | |
12 * @link http://pear.php.net/package/PHP_CodeSniffer | |
13 */ | |
14 | |
15 /** | |
16 * Tokenizes PHP code. | |
17 * | |
18 * @category PHP | |
19 * @package PHP_CodeSniffer | |
20 * @author Greg Sherwood <gsherwood@squiz.net> | |
21 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600) | |
22 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence | |
23 * @version Release: @package_version@ | |
24 * @link http://pear.php.net/package/PHP_CodeSniffer | |
25 */ | |
26 class PHP_CodeSniffer_Tokenizers_PHP | |
27 { | |
28 | |
29 /** | |
30 * If TRUE, files that appear to be minified will not be processed. | |
31 * | |
32 * @var boolean | |
33 */ | |
34 public $skipMinified = false; | |
35 | |
36 /** | |
37 * A list of tokens that are allowed to open a scope. | |
38 * | |
39 * This array also contains information about what kind of token the scope | |
40 * opener uses to open and close the scope, if the token strictly requires | |
41 * an opener, if the token can share a scope closer, and who it can be shared | |
42 * with. An example of a token that shares a scope closer is a CASE scope. | |
43 * | |
44 * @var array | |
45 */ | |
46 public $scopeOpeners = array( | |
47 T_IF => array( | |
48 'start' => array( | |
49 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
50 T_COLON => T_COLON, | |
51 ), | |
52 'end' => array( | |
53 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
54 T_ENDIF => T_ENDIF, | |
55 T_ELSE => T_ELSE, | |
56 T_ELSEIF => T_ELSEIF, | |
57 ), | |
58 'strict' => false, | |
59 'shared' => false, | |
60 'with' => array( | |
61 T_ELSE => T_ELSE, | |
62 T_ELSEIF => T_ELSEIF, | |
63 ), | |
64 ), | |
65 T_TRY => array( | |
66 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
67 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
68 'strict' => true, | |
69 'shared' => false, | |
70 'with' => array(), | |
71 ), | |
72 T_CATCH => array( | |
73 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
74 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
75 'strict' => true, | |
76 'shared' => false, | |
77 'with' => array(), | |
78 ), | |
79 T_FINALLY => array( | |
80 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
81 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
82 'strict' => true, | |
83 'shared' => false, | |
84 'with' => array(), | |
85 ), | |
86 T_ELSE => array( | |
87 'start' => array( | |
88 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
89 T_COLON => T_COLON, | |
90 ), | |
91 'end' => array( | |
92 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
93 T_ENDIF => T_ENDIF, | |
94 ), | |
95 'strict' => false, | |
96 'shared' => false, | |
97 'with' => array( | |
98 T_IF => T_IF, | |
99 T_ELSEIF => T_ELSEIF, | |
100 ), | |
101 ), | |
102 T_ELSEIF => array( | |
103 'start' => array( | |
104 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
105 T_COLON => T_COLON, | |
106 ), | |
107 'end' => array( | |
108 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
109 T_ENDIF => T_ENDIF, | |
110 T_ELSE => T_ELSE, | |
111 T_ELSEIF => T_ELSEIF, | |
112 ), | |
113 'strict' => false, | |
114 'shared' => false, | |
115 'with' => array( | |
116 T_IF => T_IF, | |
117 T_ELSE => T_ELSE, | |
118 ), | |
119 ), | |
120 T_FOR => array( | |
121 'start' => array( | |
122 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
123 T_COLON => T_COLON, | |
124 ), | |
125 'end' => array( | |
126 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
127 T_ENDFOR => T_ENDFOR, | |
128 ), | |
129 'strict' => false, | |
130 'shared' => false, | |
131 'with' => array(), | |
132 ), | |
133 T_FOREACH => array( | |
134 'start' => array( | |
135 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
136 T_COLON => T_COLON, | |
137 ), | |
138 'end' => array( | |
139 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
140 T_ENDFOREACH => T_ENDFOREACH, | |
141 ), | |
142 'strict' => false, | |
143 'shared' => false, | |
144 'with' => array(), | |
145 ), | |
146 T_INTERFACE => array( | |
147 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
148 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
149 'strict' => true, | |
150 'shared' => false, | |
151 'with' => array(), | |
152 ), | |
153 T_FUNCTION => array( | |
154 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
155 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
156 'strict' => true, | |
157 'shared' => false, | |
158 'with' => array(), | |
159 ), | |
160 T_CLASS => array( | |
161 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
162 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
163 'strict' => true, | |
164 'shared' => false, | |
165 'with' => array(), | |
166 ), | |
167 T_TRAIT => array( | |
168 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
169 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
170 'strict' => true, | |
171 'shared' => false, | |
172 'with' => array(), | |
173 ), | |
174 T_USE => array( | |
175 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
176 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
177 'strict' => false, | |
178 'shared' => false, | |
179 'with' => array(), | |
180 ), | |
181 T_DECLARE => array( | |
182 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
183 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
184 'strict' => false, | |
185 'shared' => false, | |
186 'with' => array(), | |
187 ), | |
188 T_NAMESPACE => array( | |
189 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
190 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
191 'strict' => false, | |
192 'shared' => false, | |
193 'with' => array(), | |
194 ), | |
195 T_WHILE => array( | |
196 'start' => array( | |
197 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
198 T_COLON => T_COLON, | |
199 ), | |
200 'end' => array( | |
201 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
202 T_ENDWHILE => T_ENDWHILE, | |
203 ), | |
204 'strict' => false, | |
205 'shared' => false, | |
206 'with' => array(), | |
207 ), | |
208 T_DO => array( | |
209 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET), | |
210 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET), | |
211 'strict' => true, | |
212 'shared' => false, | |
213 'with' => array(), | |
214 ), | |
215 T_SWITCH => array( | |
216 'start' => array( | |
217 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, | |
218 T_COLON => T_COLON, | |
219 ), | |
220 'end' => array( | |
221 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
222 T_ENDSWITCH => T_ENDSWITCH, | |
223 ), | |
224 'strict' => true, | |
225 'shared' => false, | |
226 'with' => array(), | |
227 ), | |
228 T_CASE => array( | |
229 'start' => array( | |
230 T_COLON => T_COLON, | |
231 T_SEMICOLON => T_SEMICOLON, | |
232 ), | |
233 'end' => array( | |
234 T_BREAK => T_BREAK, | |
235 T_RETURN => T_RETURN, | |
236 T_CONTINUE => T_CONTINUE, | |
237 T_THROW => T_THROW, | |
238 T_EXIT => T_EXIT, | |
239 ), | |
240 'strict' => true, | |
241 'shared' => true, | |
242 'with' => array( | |
243 T_DEFAULT => T_DEFAULT, | |
244 T_CASE => T_CASE, | |
245 T_SWITCH => T_SWITCH, | |
246 ), | |
247 ), | |
248 T_DEFAULT => array( | |
249 'start' => array( | |
250 T_COLON => T_COLON, | |
251 T_SEMICOLON => T_SEMICOLON, | |
252 ), | |
253 'end' => array( | |
254 T_BREAK => T_BREAK, | |
255 T_RETURN => T_RETURN, | |
256 T_CONTINUE => T_CONTINUE, | |
257 T_THROW => T_THROW, | |
258 T_EXIT => T_EXIT, | |
259 ), | |
260 'strict' => true, | |
261 'shared' => true, | |
262 'with' => array( | |
263 T_CASE => T_CASE, | |
264 T_SWITCH => T_SWITCH, | |
265 ), | |
266 ), | |
267 T_START_HEREDOC => array( | |
268 'start' => array(T_START_HEREDOC => T_START_HEREDOC), | |
269 'end' => array(T_END_HEREDOC => T_END_HEREDOC), | |
270 'strict' => true, | |
271 'shared' => false, | |
272 'with' => array(), | |
273 ), | |
274 ); | |
275 | |
276 /** | |
277 * A list of tokens that end the scope. | |
278 * | |
279 * This array is just a unique collection of the end tokens | |
280 * from the _scopeOpeners array. The data is duplicated here to | |
281 * save time during parsing of the file. | |
282 * | |
283 * @var array | |
284 */ | |
285 public $endScopeTokens = array( | |
286 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, | |
287 T_ENDIF => T_ENDIF, | |
288 T_ENDFOR => T_ENDFOR, | |
289 T_ENDFOREACH => T_ENDFOREACH, | |
290 T_ENDWHILE => T_ENDWHILE, | |
291 T_ENDSWITCH => T_ENDSWITCH, | |
292 T_BREAK => T_BREAK, | |
293 T_END_HEREDOC => T_END_HEREDOC, | |
294 ); | |
295 | |
296 /** | |
297 * A cache of different token types, resolved into arrays. | |
298 * | |
299 * @var array() | |
300 * @see standardiseToken() | |
301 */ | |
302 private static $_resolveTokenCache = array(); | |
303 | |
304 | |
305 /** | |
306 * Creates an array of tokens when given some PHP code. | |
307 * | |
308 * Starts by using token_get_all() but does a lot of extra processing | |
309 * to insert information about the context of the token. | |
310 * | |
311 * @param string $string The string to tokenize. | |
312 * @param string $eolChar The EOL character to use for splitting strings. | |
313 * | |
314 * @return array | |
315 */ | |
316 public function tokenizeString($string, $eolChar='\n') | |
317 { | |
318 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
319 echo "\t*** START PHP TOKENIZING ***".PHP_EOL; | |
320 $isWin = false; | |
321 if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') { | |
322 $isWin = true; | |
323 } | |
324 } | |
325 | |
326 $tokens = @token_get_all($string); | |
327 $finalTokens = array(); | |
328 | |
329 $newStackPtr = 0; | |
330 $numTokens = count($tokens); | |
331 $lastNotEmptyToken = 0; | |
332 | |
333 $insideInlineIf = array(); | |
334 $insideUseGroup = false; | |
335 | |
336 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment(); | |
337 | |
338 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { | |
339 $token = (array) $tokens[$stackPtr]; | |
340 $tokenIsArray = isset($token[1]); | |
341 | |
342 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
343 if ($tokenIsArray === true) { | |
344 $type = token_name($token[0]); | |
345 $content = PHP_CodeSniffer::prepareForOutput($token[1]); | |
346 } else { | |
347 $newToken = self::resolveSimpleToken($token[0]); | |
348 $type = $newToken['type']; | |
349 $content = PHP_CodeSniffer::prepareForOutput($token[0]); | |
350 } | |
351 | |
352 echo "\tProcess token "; | |
353 if ($tokenIsArray === true) { | |
354 echo "[$stackPtr]"; | |
355 } else { | |
356 echo " $stackPtr "; | |
357 } | |
358 | |
359 echo ": $type => $content"; | |
360 }//end if | |
361 | |
362 if ($newStackPtr > 0 && $finalTokens[($newStackPtr - 1)]['code'] !== T_WHITESPACE) { | |
363 $lastNotEmptyToken = ($newStackPtr - 1); | |
364 } | |
365 | |
366 /* | |
367 If we are using \r\n newline characters, the \r and \n are sometimes | |
368 split over two tokens. This normally occurs after comments. We need | |
369 to merge these two characters together so that our line endings are | |
370 consistent for all lines. | |
371 */ | |
372 | |
373 if ($tokenIsArray === true && substr($token[1], -1) === "\r") { | |
374 if (isset($tokens[($stackPtr + 1)]) === true | |
375 && is_array($tokens[($stackPtr + 1)]) === true | |
376 && $tokens[($stackPtr + 1)][1][0] === "\n" | |
377 ) { | |
378 $token[1] .= "\n"; | |
379 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
380 if ($isWin === true) { | |
381 echo '\n'; | |
382 } else { | |
383 echo "\033[30;1m\\n\033[0m"; | |
384 } | |
385 } | |
386 | |
387 if ($tokens[($stackPtr + 1)][1] === "\n") { | |
388 // This token's content has been merged into the previous, | |
389 // so we can skip it. | |
390 $tokens[($stackPtr + 1)] = ''; | |
391 } else { | |
392 $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1); | |
393 } | |
394 } | |
395 }//end if | |
396 | |
397 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
398 echo PHP_EOL; | |
399 } | |
400 | |
401 /* | |
402 Parse doc blocks into something that can be easily iterated over. | |
403 */ | |
404 | |
405 if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) { | |
406 $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr); | |
407 foreach ($commentTokens as $commentToken) { | |
408 $finalTokens[$newStackPtr] = $commentToken; | |
409 $newStackPtr++; | |
410 } | |
411 | |
412 continue; | |
413 } | |
414 | |
415 /* | |
416 If this is a double quoted string, PHP will tokenize the whole | |
417 thing which causes problems with the scope map when braces are | |
418 within the string. So we need to merge the tokens together to | |
419 provide a single string. | |
420 */ | |
421 | |
422 if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) { | |
423 // Binary casts need a special token. | |
424 if ($token[0] === 'b"') { | |
425 $finalTokens[$newStackPtr] = array( | |
426 'code' => T_BINARY_CAST, | |
427 'type' => 'T_BINARY_CAST', | |
428 'content' => 'b', | |
429 ); | |
430 $newStackPtr++; | |
431 } | |
432 | |
433 $tokenContent = '"'; | |
434 $nestedVars = array(); | |
435 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { | |
436 $subToken = (array) $tokens[$i]; | |
437 $subTokenIsArray = isset($subToken[1]); | |
438 | |
439 if ($subTokenIsArray === true) { | |
440 $tokenContent .= $subToken[1]; | |
441 if ($subToken[1] === '{' | |
442 && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE | |
443 ) { | |
444 $nestedVars[] = $i; | |
445 } | |
446 } else { | |
447 $tokenContent .= $subToken[0]; | |
448 if ($subToken[0] === '}') { | |
449 array_pop($nestedVars); | |
450 } | |
451 } | |
452 | |
453 if ($subTokenIsArray === false | |
454 && $subToken[0] === '"' | |
455 && empty($nestedVars) === true | |
456 ) { | |
457 // We found the other end of the double quoted string. | |
458 break; | |
459 } | |
460 }//end for | |
461 | |
462 $stackPtr = $i; | |
463 | |
464 // Convert each line within the double quoted string to a | |
465 // new token, so it conforms with other multiple line tokens. | |
466 $tokenLines = explode($eolChar, $tokenContent); | |
467 $numLines = count($tokenLines); | |
468 $newToken = array(); | |
469 | |
470 for ($j = 0; $j < $numLines; $j++) { | |
471 $newToken['content'] = $tokenLines[$j]; | |
472 if ($j === ($numLines - 1)) { | |
473 if ($tokenLines[$j] === '') { | |
474 break; | |
475 } | |
476 } else { | |
477 $newToken['content'] .= $eolChar; | |
478 } | |
479 | |
480 $newToken['code'] = T_DOUBLE_QUOTED_STRING; | |
481 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING'; | |
482 $finalTokens[$newStackPtr] = $newToken; | |
483 $newStackPtr++; | |
484 } | |
485 | |
486 // Continue, as we're done with this token. | |
487 continue; | |
488 }//end if | |
489 | |
490 /* | |
491 If this is a heredoc, PHP will tokenize the whole | |
492 thing which causes problems when heredocs don't | |
493 contain real PHP code, which is almost never. | |
494 We want to leave the start and end heredoc tokens | |
495 alone though. | |
496 */ | |
497 | |
498 if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) { | |
499 // Add the start heredoc token to the final array. | |
500 $finalTokens[$newStackPtr] = self::standardiseToken($token); | |
501 | |
502 // Check if this is actually a nowdoc and use a different token | |
503 // to help the sniffs. | |
504 $nowdoc = false; | |
505 if ($token[1][3] === "'") { | |
506 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC; | |
507 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC'; | |
508 $nowdoc = true; | |
509 } | |
510 | |
511 $tokenContent = ''; | |
512 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { | |
513 $subTokenIsArray = is_array($tokens[$i]); | |
514 if ($subTokenIsArray === true | |
515 && $tokens[$i][0] === T_END_HEREDOC | |
516 ) { | |
517 // We found the other end of the heredoc. | |
518 break; | |
519 } | |
520 | |
521 if ($subTokenIsArray === true) { | |
522 $tokenContent .= $tokens[$i][1]; | |
523 } else { | |
524 $tokenContent .= $tokens[$i]; | |
525 } | |
526 } | |
527 | |
528 if ($i === $numTokens) { | |
529 // We got to the end of the file and never | |
530 // found the closing token, so this probably wasn't | |
531 // a heredoc. | |
532 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
533 $type = $finalTokens[$newStackPtr]['type']; | |
534 echo "\t\t* failed to find the end of the here/nowdoc".PHP_EOL; | |
535 echo "\t\t* token $stackPtr changed from $type to T_STRING".PHP_EOL; | |
536 } | |
537 | |
538 $finalTokens[$newStackPtr]['code'] = T_STRING; | |
539 $finalTokens[$newStackPtr]['type'] = 'T_STRING'; | |
540 $newStackPtr++; | |
541 continue; | |
542 } | |
543 | |
544 $stackPtr = $i; | |
545 $newStackPtr++; | |
546 | |
547 // Convert each line within the heredoc to a | |
548 // new token, so it conforms with other multiple line tokens. | |
549 $tokenLines = explode($eolChar, $tokenContent); | |
550 $numLines = count($tokenLines); | |
551 $newToken = array(); | |
552 | |
553 for ($j = 0; $j < $numLines; $j++) { | |
554 $newToken['content'] = $tokenLines[$j]; | |
555 if ($j === ($numLines - 1)) { | |
556 if ($tokenLines[$j] === '') { | |
557 break; | |
558 } | |
559 } else { | |
560 $newToken['content'] .= $eolChar; | |
561 } | |
562 | |
563 if ($nowdoc === true) { | |
564 $newToken['code'] = T_NOWDOC; | |
565 $newToken['type'] = 'T_NOWDOC'; | |
566 } else { | |
567 $newToken['code'] = T_HEREDOC; | |
568 $newToken['type'] = 'T_HEREDOC'; | |
569 } | |
570 | |
571 $finalTokens[$newStackPtr] = $newToken; | |
572 $newStackPtr++; | |
573 }//end for | |
574 | |
575 // Add the end heredoc token to the final array. | |
576 $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]); | |
577 | |
578 if ($nowdoc === true) { | |
579 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC; | |
580 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC'; | |
581 $nowdoc = true; | |
582 } | |
583 | |
584 $newStackPtr++; | |
585 | |
586 // Continue, as we're done with this token. | |
587 continue; | |
588 }//end if | |
589 | |
590 /* | |
591 Before PHP 5.6, the ... operator was tokenized as three | |
592 T_STRING_CONCAT tokens in a row. So look for and combine | |
593 these tokens in earlier versions. | |
594 */ | |
595 | |
596 if ($tokenIsArray === false | |
597 && $token[0] === '.' | |
598 && isset($tokens[($stackPtr + 1)]) === true | |
599 && isset($tokens[($stackPtr + 2)]) === true | |
600 && $tokens[($stackPtr + 1)] === '.' | |
601 && $tokens[($stackPtr + 2)] === '.' | |
602 ) { | |
603 $newToken = array(); | |
604 $newToken['code'] = T_ELLIPSIS; | |
605 $newToken['type'] = 'T_ELLIPSIS'; | |
606 $newToken['content'] = '...'; | |
607 $finalTokens[$newStackPtr] = $newToken; | |
608 | |
609 $newStackPtr++; | |
610 $stackPtr += 2; | |
611 continue; | |
612 } | |
613 | |
614 /* | |
615 Before PHP 5.6, the ** operator was tokenized as two | |
616 T_MULTIPLY tokens in a row. So look for and combine | |
617 these tokens in earlier versions. | |
618 */ | |
619 | |
620 if ($tokenIsArray === false | |
621 && $token[0] === '*' | |
622 && isset($tokens[($stackPtr + 1)]) === true | |
623 && $tokens[($stackPtr + 1)] === '*' | |
624 ) { | |
625 $newToken = array(); | |
626 $newToken['code'] = T_POW; | |
627 $newToken['type'] = 'T_POW'; | |
628 $newToken['content'] = '**'; | |
629 $finalTokens[$newStackPtr] = $newToken; | |
630 | |
631 $newStackPtr++; | |
632 $stackPtr++; | |
633 continue; | |
634 } | |
635 | |
636 /* | |
637 Before PHP 5.6, the **= operator was tokenized as | |
638 T_MULTIPLY followed by T_MUL_EQUAL. So look for and combine | |
639 these tokens in earlier versions. | |
640 */ | |
641 | |
642 if ($tokenIsArray === false | |
643 && $token[0] === '*' | |
644 && isset($tokens[($stackPtr + 1)]) === true | |
645 && is_array($tokens[($stackPtr + 1)]) === true | |
646 && $tokens[($stackPtr + 1)][1] === '*=' | |
647 ) { | |
648 $newToken = array(); | |
649 $newToken['code'] = T_POW_EQUAL; | |
650 $newToken['type'] = 'T_POW_EQUAL'; | |
651 $newToken['content'] = '**='; | |
652 $finalTokens[$newStackPtr] = $newToken; | |
653 | |
654 $newStackPtr++; | |
655 $stackPtr++; | |
656 continue; | |
657 } | |
658 | |
659 /* | |
660 Before PHP 7, the ??= operator was tokenized as | |
661 T_INLINE_THEN, T_INLINE_THEN, T_EQUAL. | |
662 Between PHP 7.0 and 7.2, the ??= operator was tokenized as | |
663 T_COALESCE, T_EQUAL. | |
664 So look for and combine these tokens in earlier versions. | |
665 */ | |
666 | |
667 if (($tokenIsArray === false | |
668 && $token[0] === '?' | |
669 && isset($tokens[($stackPtr + 1)]) === true | |
670 && $tokens[($stackPtr + 1)][0] === '?' | |
671 && isset($tokens[($stackPtr + 2)]) === true | |
672 && $tokens[($stackPtr + 2)][0] === '=') | |
673 || ($tokenIsArray === true | |
674 && $token[0] === T_COALESCE | |
675 && isset($tokens[($stackPtr + 1)]) === true | |
676 && $tokens[($stackPtr + 1)][0] === '=') | |
677 ) { | |
678 $newToken = array(); | |
679 $newToken['code'] = T_COALESCE_EQUAL; | |
680 $newToken['type'] = 'T_COALESCE_EQUAL'; | |
681 $newToken['content'] = '??='; | |
682 $finalTokens[$newStackPtr] = $newToken; | |
683 | |
684 $newStackPtr++; | |
685 $stackPtr++; | |
686 | |
687 if ($tokenIsArray === false) { | |
688 // Pre PHP 7. | |
689 $stackPtr++; | |
690 } | |
691 | |
692 continue; | |
693 } | |
694 | |
695 /* | |
696 Before PHP 7, the ?? operator was tokenized as | |
697 T_INLINE_THEN followed by T_INLINE_THEN. | |
698 So look for and combine these tokens in earlier versions. | |
699 */ | |
700 | |
701 if ($tokenIsArray === false | |
702 && $token[0] === '?' | |
703 && isset($tokens[($stackPtr + 1)]) === true | |
704 && $tokens[($stackPtr + 1)][0] === '?' | |
705 ) { | |
706 $newToken = array(); | |
707 $newToken['code'] = T_COALESCE; | |
708 $newToken['type'] = 'T_COALESCE'; | |
709 $newToken['content'] = '??'; | |
710 $finalTokens[$newStackPtr] = $newToken; | |
711 | |
712 $newStackPtr++; | |
713 $stackPtr++; | |
714 continue; | |
715 } | |
716 | |
717 /* | |
718 Convert ? to T_NULLABLE OR T_INLINE_THEN | |
719 */ | |
720 | |
721 if ($tokenIsArray === false && $token[0] === '?') { | |
722 $newToken = array(); | |
723 $newToken['content'] = '?'; | |
724 | |
725 for ($i = ($stackPtr - 1); $i >= 0; $i--) { | |
726 if (is_array($tokens[$i]) === true) { | |
727 $tokenType = $tokens[$i][0]; | |
728 } else { | |
729 $tokenType = $tokens[$i]; | |
730 } | |
731 | |
732 if ($tokenType === T_FUNCTION) { | |
733 $newToken['code'] = T_NULLABLE; | |
734 $newToken['type'] = 'T_NULLABLE'; | |
735 break; | |
736 } else if (in_array($tokenType, array(T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, '{', ';')) === true) { | |
737 $newToken['code'] = T_INLINE_THEN; | |
738 $newToken['type'] = 'T_INLINE_THEN'; | |
739 | |
740 $insideInlineIf[] = $stackPtr; | |
741 break; | |
742 } | |
743 } | |
744 | |
745 $finalTokens[$newStackPtr] = $newToken; | |
746 $newStackPtr++; | |
747 continue; | |
748 }//end if | |
749 | |
750 /* | |
751 Tokens after a double colon may be look like scope openers, | |
752 such as when writing code like Foo::NAMESPACE, but they are | |
753 only ever variables or strings. | |
754 */ | |
755 | |
756 if ($stackPtr > 1 | |
757 && (is_array($tokens[($stackPtr - 1)]) === true | |
758 && $tokens[($stackPtr - 1)][0] === T_PAAMAYIM_NEKUDOTAYIM) | |
759 && $tokenIsArray === true | |
760 && $token[0] !== T_STRING | |
761 && $token[0] !== T_VARIABLE | |
762 && $token[0] !== T_DOLLAR | |
763 && isset(PHP_CodeSniffer_Tokens::$emptyTokens[$token[0]]) === false | |
764 ) { | |
765 $newToken = array(); | |
766 $newToken['code'] = T_STRING; | |
767 $newToken['type'] = 'T_STRING'; | |
768 $newToken['content'] = $token[1]; | |
769 $finalTokens[$newStackPtr] = $newToken; | |
770 | |
771 $newStackPtr++; | |
772 continue; | |
773 } | |
774 | |
775 /* | |
776 The string-like token after a function keyword should always be | |
777 tokenized as T_STRING even if it appears to be a different token, | |
778 such as when writing code like: function default(): foo | |
779 so go forward and change the token type before it is processed. | |
780 */ | |
781 | |
782 if ($tokenIsArray === true && $token[0] === T_FUNCTION) { | |
783 for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { | |
784 if (is_array($tokens[$x]) === false | |
785 || isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x][0]]) === false | |
786 ) { | |
787 // Non-empty content. | |
788 break; | |
789 } | |
790 } | |
791 | |
792 if ($x < $numTokens && is_array($tokens[$x]) === true) { | |
793 $tokens[$x][0] = T_STRING; | |
794 } | |
795 } | |
796 | |
797 /* | |
798 Before PHP 7, the <=> operator was tokenized as | |
799 T_IS_SMALLER_OR_EQUAL followed by T_GREATER_THAN. | |
800 So look for and combine these tokens in earlier versions. | |
801 */ | |
802 | |
803 if ($tokenIsArray === true | |
804 && $token[0] === T_IS_SMALLER_OR_EQUAL | |
805 && isset($tokens[($stackPtr + 1)]) === true | |
806 && $tokens[($stackPtr + 1)][0] === '>' | |
807 ) { | |
808 $newToken = array(); | |
809 $newToken['code'] = T_SPACESHIP; | |
810 $newToken['type'] = 'T_SPACESHIP'; | |
811 $newToken['content'] = '<=>'; | |
812 $finalTokens[$newStackPtr] = $newToken; | |
813 | |
814 $newStackPtr++; | |
815 $stackPtr++; | |
816 continue; | |
817 } | |
818 | |
819 /* | |
820 Emulate traits in PHP versions less than 5.4. | |
821 */ | |
822 | |
823 if ($tokenIsArray === true | |
824 && $token[0] === T_STRING | |
825 && strtolower($token[1]) === 'trait' | |
826 && $tokens[($stackPtr - 1)][0] !== T_OBJECT_OPERATOR | |
827 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM | |
828 ) { | |
829 $finalTokens[$newStackPtr] = array( | |
830 'content' => $token[1], | |
831 'code' => T_TRAIT, | |
832 'type' => 'T_TRAIT', | |
833 ); | |
834 | |
835 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
836 echo "\t\t* token $stackPtr changed from T_STRING to T_TRAIT".PHP_EOL; | |
837 } | |
838 | |
839 $newStackPtr++; | |
840 continue; | |
841 } | |
842 | |
843 /* | |
844 PHP doesn't assign a token to goto labels, so we have to. | |
845 These are just string tokens with a single colon after them. Double | |
846 colons are already tokenized and so don't interfere with this check. | |
847 But we do have to account for CASE statements, that look just like | |
848 goto labels. | |
849 */ | |
850 | |
851 if ($tokenIsArray === true | |
852 && $token[0] === T_STRING | |
853 && isset($tokens[($stackPtr + 1)]) === true | |
854 && $tokens[($stackPtr + 1)] === ':' | |
855 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM | |
856 ) { | |
857 $stopTokens = array( | |
858 T_CASE => true, | |
859 T_SEMICOLON => true, | |
860 T_OPEN_CURLY_BRACKET => true, | |
861 T_INLINE_THEN => true, | |
862 ); | |
863 | |
864 for ($x = ($newStackPtr - 1); $x > 0; $x--) { | |
865 if (isset($stopTokens[$finalTokens[$x]['code']]) === true) { | |
866 break; | |
867 } | |
868 } | |
869 | |
870 if ($finalTokens[$x]['code'] !== T_CASE | |
871 && $finalTokens[$x]['code'] !== T_INLINE_THEN | |
872 ) { | |
873 $finalTokens[$newStackPtr] = array( | |
874 'content' => $token[1].':', | |
875 'code' => T_GOTO_LABEL, | |
876 'type' => 'T_GOTO_LABEL', | |
877 ); | |
878 | |
879 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
880 echo "\t\t* token $stackPtr changed from T_STRING to T_GOTO_LABEL".PHP_EOL; | |
881 echo "\t\t* skipping T_COLON token ".($stackPtr + 1).PHP_EOL; | |
882 } | |
883 | |
884 $newStackPtr++; | |
885 $stackPtr++; | |
886 continue; | |
887 } | |
888 }//end if | |
889 | |
890 /* | |
891 HHVM 3.5 tokenizes "else[\s]+if" as a T_ELSEIF token while PHP | |
892 proper only tokenizes "elseif" as a T_ELSEIF token. So split | |
893 up the HHVM token to make it looks like proper PHP. | |
894 */ | |
895 | |
896 if ($tokenIsArray === true | |
897 && $token[0] === T_ELSEIF | |
898 && strtolower($token[1]) !== 'elseif' | |
899 ) { | |
900 $finalTokens[$newStackPtr] = array( | |
901 'content' => substr($token[1], 0, 4), | |
902 'code' => T_ELSE, | |
903 'type' => 'T_ELSE', | |
904 ); | |
905 | |
906 $newStackPtr++; | |
907 $finalTokens[$newStackPtr] = array( | |
908 'content' => substr($token[1], 4, -2), | |
909 'code' => T_WHITESPACE, | |
910 'type' => 'T_WHITESPACE', | |
911 ); | |
912 | |
913 $newStackPtr++; | |
914 $finalTokens[$newStackPtr] = array( | |
915 'content' => substr($token[1], -2), | |
916 'code' => T_IF, | |
917 'type' => 'T_IF', | |
918 ); | |
919 | |
920 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
921 echo "\t\t* token $stackPtr changed from T_ELSEIF to T_ELSE/T_WHITESPACE/T_IF".PHP_EOL; | |
922 } | |
923 | |
924 $newStackPtr++; | |
925 continue; | |
926 }//end if | |
927 | |
928 /* | |
929 HHVM 3.5 and 3.6 tokenizes a hashbang line such as #!/usr/bin/php | |
930 as T_HASHANG while PHP proper uses T_INLINE_HTML. | |
931 */ | |
932 | |
933 if ($tokenIsArray === true && token_name($token[0]) === 'T_HASHBANG') { | |
934 $finalTokens[$newStackPtr] = array( | |
935 'content' => $token[1], | |
936 'code' => T_INLINE_HTML, | |
937 'type' => 'T_INLINE_HTML', | |
938 ); | |
939 | |
940 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
941 echo "\t\t* token $stackPtr changed from T_HASHBANG to T_INLINE_HTML".PHP_EOL; | |
942 } | |
943 | |
944 $newStackPtr++; | |
945 continue; | |
946 }//end if | |
947 | |
948 /* | |
949 If this token has newlines in its content, split each line up | |
950 and create a new token for each line. We do this so it's easier | |
951 to ascertain where errors occur on a line. | |
952 Note that $token[1] is the token's content. | |
953 */ | |
954 | |
955 if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) { | |
956 $tokenLines = explode($eolChar, $token[1]); | |
957 $numLines = count($tokenLines); | |
958 $newToken = array( | |
959 'type' => token_name($token[0]), | |
960 'code' => $token[0], | |
961 'content' => '', | |
962 ); | |
963 | |
964 for ($i = 0; $i < $numLines; $i++) { | |
965 $newToken['content'] = $tokenLines[$i]; | |
966 if ($i === ($numLines - 1)) { | |
967 if ($tokenLines[$i] === '') { | |
968 break; | |
969 } | |
970 } else { | |
971 $newToken['content'] .= $eolChar; | |
972 } | |
973 | |
974 $finalTokens[$newStackPtr] = $newToken; | |
975 $newStackPtr++; | |
976 } | |
977 } else { | |
978 if ($tokenIsArray === true && $token[0] === T_STRING) { | |
979 // Some T_STRING tokens should remain that way | |
980 // due to their context. | |
981 $context = array( | |
982 T_OBJECT_OPERATOR => true, | |
983 T_FUNCTION => true, | |
984 T_CLASS => true, | |
985 T_EXTENDS => true, | |
986 T_IMPLEMENTS => true, | |
987 T_NEW => true, | |
988 T_CONST => true, | |
989 T_NS_SEPARATOR => true, | |
990 T_USE => true, | |
991 T_NAMESPACE => true, | |
992 T_PAAMAYIM_NEKUDOTAYIM => true, | |
993 ); | |
994 if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) { | |
995 // Special case for syntax like: return new self | |
996 // where self should not be a string. | |
997 if ($finalTokens[$lastNotEmptyToken]['code'] === T_NEW | |
998 && strtolower($token[1]) === 'self' | |
999 ) { | |
1000 $finalTokens[$newStackPtr] = array( | |
1001 'content' => $token[1], | |
1002 'code' => T_SELF, | |
1003 'type' => 'T_SELF', | |
1004 ); | |
1005 } else { | |
1006 $finalTokens[$newStackPtr] = array( | |
1007 'content' => $token[1], | |
1008 'code' => T_STRING, | |
1009 'type' => 'T_STRING', | |
1010 ); | |
1011 } | |
1012 | |
1013 $newStackPtr++; | |
1014 continue; | |
1015 }//end if | |
1016 }//end if | |
1017 | |
1018 $newToken = null; | |
1019 if ($tokenIsArray === false) { | |
1020 if (isset(self::$_resolveTokenCache[$token[0]]) === true) { | |
1021 $newToken = self::$_resolveTokenCache[$token[0]]; | |
1022 } | |
1023 } else { | |
1024 $cacheKey = null; | |
1025 if ($token[0] === T_STRING) { | |
1026 $cacheKey = strtolower($token[1]); | |
1027 } else if ($token[0] !== T_CURLY_OPEN) { | |
1028 $cacheKey = $token[0]; | |
1029 } | |
1030 | |
1031 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) { | |
1032 $newToken = self::$_resolveTokenCache[$cacheKey]; | |
1033 $newToken['content'] = $token[1]; | |
1034 } | |
1035 } | |
1036 | |
1037 if ($newToken === null) { | |
1038 $newToken = self::standardiseToken($token); | |
1039 } | |
1040 | |
1041 // Convert colons that are actually the ELSE component of an | |
1042 // inline IF statement. | |
1043 if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) { | |
1044 array_pop($insideInlineIf); | |
1045 $newToken['code'] = T_INLINE_ELSE; | |
1046 $newToken['type'] = 'T_INLINE_ELSE'; | |
1047 } | |
1048 | |
1049 // This is a special condition for T_ARRAY tokens used for | |
1050 // type hinting function arguments as being arrays. We want to keep | |
1051 // the parenthesis map clean, so let's tag these tokens as | |
1052 // T_ARRAY_HINT. | |
1053 if ($newToken['code'] === T_ARRAY) { | |
1054 for ($i = $stackPtr; $i < $numTokens; $i++) { | |
1055 if ($tokens[$i] === '(') { | |
1056 break; | |
1057 } else if ($tokens[$i][0] === T_VARIABLE) { | |
1058 $newToken['code'] = T_ARRAY_HINT; | |
1059 $newToken['type'] = 'T_ARRAY_HINT'; | |
1060 break; | |
1061 } | |
1062 } | |
1063 } | |
1064 | |
1065 // This is a special case when checking PHP 5.5+ code in PHP < 5.5 | |
1066 // where "finally" should be T_FINALLY instead of T_STRING. | |
1067 if ($newToken['code'] === T_STRING | |
1068 && strtolower($newToken['content']) === 'finally' | |
1069 ) { | |
1070 $newToken['code'] = T_FINALLY; | |
1071 $newToken['type'] = 'T_FINALLY'; | |
1072 } | |
1073 | |
1074 // This is a special case for the PHP 5.5 classname::class syntax | |
1075 // where "class" should be T_STRING instead of T_CLASS. | |
1076 if (($newToken['code'] === T_CLASS | |
1077 || $newToken['code'] === T_FUNCTION) | |
1078 && $finalTokens[($newStackPtr - 1)]['code'] === T_DOUBLE_COLON | |
1079 ) { | |
1080 $newToken['code'] = T_STRING; | |
1081 $newToken['type'] = 'T_STRING'; | |
1082 } | |
1083 | |
1084 // This is a special case for PHP 5.6 use function and use const | |
1085 // where "function" and "const" should be T_STRING instead of T_FUNCTION | |
1086 // and T_CONST. | |
1087 if (($newToken['code'] === T_FUNCTION | |
1088 || $newToken['code'] === T_CONST) | |
1089 && $finalTokens[$lastNotEmptyToken]['code'] === T_USE | |
1090 ) { | |
1091 $newToken['code'] = T_STRING; | |
1092 $newToken['type'] = 'T_STRING'; | |
1093 } | |
1094 | |
1095 // This is a special case for use groups in PHP 7+ where leaving | |
1096 // the curly braces as their normal tokens would confuse | |
1097 // the scope map and sniffs. | |
1098 if ($newToken['code'] === T_OPEN_CURLY_BRACKET | |
1099 && $finalTokens[$lastNotEmptyToken]['code'] === T_NS_SEPARATOR | |
1100 ) { | |
1101 $newToken['code'] = T_OPEN_USE_GROUP; | |
1102 $newToken['type'] = 'T_OPEN_USE_GROUP'; | |
1103 $insideUseGroup = true; | |
1104 } | |
1105 | |
1106 if ($insideUseGroup === true && $newToken['code'] === T_CLOSE_CURLY_BRACKET) { | |
1107 $newToken['code'] = T_CLOSE_USE_GROUP; | |
1108 $newToken['type'] = 'T_CLOSE_USE_GROUP'; | |
1109 $insideUseGroup = false; | |
1110 } | |
1111 | |
1112 $finalTokens[$newStackPtr] = $newToken; | |
1113 $newStackPtr++; | |
1114 }//end if | |
1115 }//end for | |
1116 | |
1117 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1118 echo "\t*** END PHP TOKENIZING ***".PHP_EOL; | |
1119 } | |
1120 | |
1121 return $finalTokens; | |
1122 | |
1123 }//end tokenizeString() | |
1124 | |
1125 | |
1126 /** | |
1127 * Performs additional processing after main tokenizing. | |
1128 * | |
1129 * This additional processing checks for CASE statements that are using curly | |
1130 * braces for scope openers and closers. It also turns some T_FUNCTION tokens | |
1131 * into T_CLOSURE when they are not standard function definitions. It also | |
1132 * detects short array syntax and converts those square brackets into new tokens. | |
1133 * It also corrects some usage of the static and class keywords. It also | |
1134 * assigns tokens to function return types. | |
1135 * | |
1136 * @param array $tokens The array of tokens to process. | |
1137 * @param string $eolChar The EOL character to use for splitting strings. | |
1138 * | |
1139 * @return void | |
1140 */ | |
1141 public function processAdditional(&$tokens, $eolChar) | |
1142 { | |
1143 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1144 echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL; | |
1145 } | |
1146 | |
1147 $numTokens = count($tokens); | |
1148 for ($i = ($numTokens - 1); $i >= 0; $i--) { | |
1149 // Check for any unset scope conditions due to alternate IF/ENDIF syntax. | |
1150 if (isset($tokens[$i]['scope_opener']) === true | |
1151 && isset($tokens[$i]['scope_condition']) === false | |
1152 ) { | |
1153 $tokens[$i]['scope_condition'] = $tokens[$tokens[$i]['scope_opener']]['scope_condition']; | |
1154 } | |
1155 | |
1156 if ($tokens[$i]['code'] === T_FUNCTION) { | |
1157 /* | |
1158 Detect functions that are actually closures and | |
1159 assign them a different token. | |
1160 */ | |
1161 | |
1162 if (isset($tokens[$i]['scope_opener']) === true) { | |
1163 for ($x = ($i + 1); $x < $numTokens; $x++) { | |
1164 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false | |
1165 && $tokens[$x]['code'] !== T_BITWISE_AND | |
1166 ) { | |
1167 break; | |
1168 } | |
1169 } | |
1170 | |
1171 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) { | |
1172 $tokens[$i]['code'] = T_CLOSURE; | |
1173 $tokens[$i]['type'] = 'T_CLOSURE'; | |
1174 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1175 $line = $tokens[$i]['line']; | |
1176 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL; | |
1177 } | |
1178 | |
1179 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) { | |
1180 if (isset($tokens[$x]['conditions'][$i]) === false) { | |
1181 continue; | |
1182 } | |
1183 | |
1184 $tokens[$x]['conditions'][$i] = T_CLOSURE; | |
1185 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1186 $type = $tokens[$x]['type']; | |
1187 echo "\t\t* cleaned $x ($type) *".PHP_EOL; | |
1188 } | |
1189 } | |
1190 } | |
1191 | |
1192 $tokenAfterReturnTypeHint = $tokens[$i]['scope_opener']; | |
1193 } else if (isset($tokens[$i]['parenthesis_closer']) === true) { | |
1194 $tokenAfterReturnTypeHint = null; | |
1195 for ($x = ($tokens[$i]['parenthesis_closer'] + 1); $x < $numTokens; $x++) { | |
1196 if ($tokens[$x]['code'] === T_SEMICOLON) { | |
1197 $tokenAfterReturnTypeHint = $x; | |
1198 break; | |
1199 } | |
1200 } | |
1201 | |
1202 if ($tokenAfterReturnTypeHint === null) { | |
1203 // Probably a syntax error. | |
1204 continue; | |
1205 } | |
1206 } else { | |
1207 // Probably a syntax error. | |
1208 continue; | |
1209 }//end if | |
1210 | |
1211 /* | |
1212 Detect function return values and assign them | |
1213 a special token, because PHP doesn't. | |
1214 */ | |
1215 | |
1216 for ($x = ($tokenAfterReturnTypeHint - 1); $x > $i; $x--) { | |
1217 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1218 if (in_array($tokens[$x]['code'], array(T_STRING, T_ARRAY, T_ARRAY_HINT, T_CALLABLE, T_SELF, T_PARENT), true) === true) { | |
1219 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1220 $line = $tokens[$x]['line']; | |
1221 $type = $tokens[$x]['type']; | |
1222 echo "\t* token $x on line $line changed from $type to T_RETURN_TYPE".PHP_EOL; | |
1223 } | |
1224 | |
1225 $tokens[$x]['code'] = T_RETURN_TYPE; | |
1226 $tokens[$x]['type'] = 'T_RETURN_TYPE'; | |
1227 } | |
1228 | |
1229 break; | |
1230 } | |
1231 } | |
1232 | |
1233 continue; | |
1234 } else if ($tokens[$i]['code'] === T_CLASS && isset($tokens[$i]['scope_opener']) === true) { | |
1235 /* | |
1236 Detect anonymous classes and assign them a different token. | |
1237 */ | |
1238 | |
1239 for ($x = ($i + 1); $x < $numTokens; $x++) { | |
1240 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1241 break; | |
1242 } | |
1243 } | |
1244 | |
1245 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS | |
1246 || $tokens[$x]['code'] === T_OPEN_CURLY_BRACKET | |
1247 || $tokens[$x]['code'] === T_EXTENDS | |
1248 || $tokens[$x]['code'] === T_IMPLEMENTS | |
1249 ) { | |
1250 $tokens[$i]['code'] = T_ANON_CLASS; | |
1251 $tokens[$i]['type'] = 'T_ANON_CLASS'; | |
1252 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1253 $line = $tokens[$i]['line']; | |
1254 echo "\t* token $i on line $line changed from T_CLASS to T_ANON_CLASS".PHP_EOL; | |
1255 } | |
1256 | |
1257 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) { | |
1258 if (isset($tokens[$x]['conditions'][$i]) === false) { | |
1259 continue; | |
1260 } | |
1261 | |
1262 $tokens[$x]['conditions'][$i] = T_ANON_CLASS; | |
1263 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1264 $type = $tokens[$x]['type']; | |
1265 echo "\t\t* cleaned $x ($type) *".PHP_EOL; | |
1266 } | |
1267 } | |
1268 } | |
1269 | |
1270 continue; | |
1271 } else if ($tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) { | |
1272 if (isset($tokens[$i]['bracket_closer']) === false) { | |
1273 continue; | |
1274 } | |
1275 | |
1276 // Unless there is a variable or a bracket before this token, | |
1277 // it is the start of an array being defined using the short syntax. | |
1278 $isShortArray = false; | |
1279 $allowed = array( | |
1280 T_CLOSE_SQUARE_BRACKET => T_CLOSE_SQUARE_BRACKET, | |
1281 T_CLOSE_PARENTHESIS => T_CLOSE_PARENTHESIS, | |
1282 T_VARIABLE => T_VARIABLE, | |
1283 T_OBJECT_OPERATOR => T_OBJECT_OPERATOR, | |
1284 T_STRING => T_STRING, | |
1285 ); | |
1286 | |
1287 for ($x = ($i - 1); $x > 0; $x--) { | |
1288 // If we hit a scope opener, the statement has ended | |
1289 // without finding anything, so it's probably an array | |
1290 // using PHP 7.1 short list syntax. | |
1291 if (isset($tokens[$x]['scope_opener']) === true) { | |
1292 $isShortArray = true; | |
1293 break; | |
1294 } | |
1295 | |
1296 if (isset($tokens[$x]['bracket_opener']) === true | |
1297 && $x > $tokens[$x]['bracket_opener'] | |
1298 ) { | |
1299 $x = $tokens[$x]['bracket_opener']; | |
1300 continue; | |
1301 } | |
1302 | |
1303 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1304 if (isset($allowed[$tokens[$x]['code']]) === false) { | |
1305 $isShortArray = true; | |
1306 } | |
1307 | |
1308 break; | |
1309 } | |
1310 }//end for | |
1311 | |
1312 if ($isShortArray === true) { | |
1313 $tokens[$i]['code'] = T_OPEN_SHORT_ARRAY; | |
1314 $tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY'; | |
1315 | |
1316 $closer = $tokens[$i]['bracket_closer']; | |
1317 $tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY; | |
1318 $tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY'; | |
1319 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1320 $line = $tokens[$i]['line']; | |
1321 echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL; | |
1322 $line = $tokens[$closer]['line']; | |
1323 echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL; | |
1324 } | |
1325 } | |
1326 | |
1327 continue; | |
1328 } else if ($tokens[$i]['code'] === T_STATIC) { | |
1329 for ($x = ($i - 1); $x > 0; $x--) { | |
1330 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1331 break; | |
1332 } | |
1333 } | |
1334 | |
1335 if ($tokens[$x]['code'] === T_INSTANCEOF) { | |
1336 $tokens[$i]['code'] = T_STRING; | |
1337 $tokens[$i]['type'] = 'T_STRING'; | |
1338 | |
1339 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1340 $line = $tokens[$i]['line']; | |
1341 echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL; | |
1342 } | |
1343 } | |
1344 | |
1345 continue; | |
1346 } else if ($tokens[$i]['code'] === T_ECHO && $tokens[$i]['content'] === '<?=') { | |
1347 // HHVM tokenizes <?= as T_ECHO but it should be T_OPEN_TAG_WITH_ECHO. | |
1348 $tokens[$i]['code'] = T_OPEN_TAG_WITH_ECHO; | |
1349 $tokens[$i]['type'] = 'T_OPEN_TAG_WITH_ECHO'; | |
1350 | |
1351 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1352 $line = $tokens[$i]['line']; | |
1353 echo "\t* token $i on line $line changed from T_ECHO to T_OPEN_TAG_WITH_ECHO".PHP_EOL; | |
1354 } | |
1355 } else if ($tokens[$i]['code'] === T_TRUE | |
1356 || $tokens[$i]['code'] === T_FALSE | |
1357 || $tokens[$i]['code'] === T_NULL | |
1358 ) { | |
1359 for ($x = ($i + 1); $i < $numTokens; $x++) { | |
1360 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1361 // Non-whitespace content. | |
1362 break; | |
1363 } | |
1364 } | |
1365 | |
1366 $context = array( | |
1367 T_OBJECT_OPERATOR => true, | |
1368 T_NS_SEPARATOR => true, | |
1369 T_PAAMAYIM_NEKUDOTAYIM => true, | |
1370 ); | |
1371 if (isset($context[$tokens[$x]['code']]) === true) { | |
1372 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1373 $line = $tokens[$i]['line']; | |
1374 $type = $tokens[$i]['type']; | |
1375 echo "\t* token $i on line $line changed from $type to T_STRING".PHP_EOL; | |
1376 } | |
1377 | |
1378 $tokens[$i]['code'] = T_STRING; | |
1379 $tokens[$i]['type'] = 'T_STRING'; | |
1380 } | |
1381 } else if ($tokens[$i]['code'] === T_CONST) { | |
1382 // Context sensitive keywords support. | |
1383 for ($x = ($i + 1); $i < $numTokens; $x++) { | |
1384 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1385 // Non-whitespace content. | |
1386 break; | |
1387 } | |
1388 } | |
1389 | |
1390 if ($tokens[$x]['code'] !== T_STRING) { | |
1391 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1392 $line = $tokens[$x]['line']; | |
1393 $type = $tokens[$x]['type']; | |
1394 echo "\t* token $x on line $line changed from $type to T_STRING".PHP_EOL; | |
1395 } | |
1396 | |
1397 $tokens[$x]['code'] = T_STRING; | |
1398 $tokens[$x]['type'] = 'T_STRING'; | |
1399 } | |
1400 }//end if | |
1401 | |
1402 if (($tokens[$i]['code'] !== T_CASE | |
1403 && $tokens[$i]['code'] !== T_DEFAULT) | |
1404 || isset($tokens[$i]['scope_opener']) === false | |
1405 ) { | |
1406 // Only interested in CASE and DEFAULT statements from here on in. | |
1407 continue; | |
1408 } | |
1409 | |
1410 $scopeOpener = $tokens[$i]['scope_opener']; | |
1411 $scopeCloser = $tokens[$i]['scope_closer']; | |
1412 | |
1413 // If the first char after the opener is a curly brace | |
1414 // and that brace has been ignored, it is actually | |
1415 // opening this case statement and the opener and closer are | |
1416 // probably set incorrectly. | |
1417 for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) { | |
1418 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) { | |
1419 // Non-whitespace content. | |
1420 break; | |
1421 } | |
1422 } | |
1423 | |
1424 if ($tokens[$x]['code'] === T_CASE || $tokens[$x]['code'] === T_DEFAULT) { | |
1425 // Special case for multiple CASE statements that share the same | |
1426 // closer. Because we are going backwards through the file, this next | |
1427 // CASE/DEFAULT statement is already fixed, so just use its closer | |
1428 // and don't worry about fixing anything. | |
1429 $newCloser = $tokens[$x]['scope_closer']; | |
1430 $tokens[$i]['scope_closer'] = $newCloser; | |
1431 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1432 $oldType = $tokens[$scopeCloser]['type']; | |
1433 $newType = $tokens[$newCloser]['type']; | |
1434 $line = $tokens[$i]['line']; | |
1435 echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; | |
1436 } | |
1437 | |
1438 continue; | |
1439 } | |
1440 | |
1441 if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET | |
1442 || isset($tokens[$x]['scope_condition']) === true | |
1443 ) { | |
1444 // Not a CASE/DEFAULT with a curly brace opener. | |
1445 continue; | |
1446 } | |
1447 | |
1448 // The closer for this CASE/DEFAULT should be the closing curly brace and | |
1449 // not whatever it already is. The opener needs to be the opening curly | |
1450 // brace so everything matches up. | |
1451 $newCloser = $tokens[$x]['bracket_closer']; | |
1452 foreach (array($i, $x, $newCloser) as $index) { | |
1453 $tokens[$index]['scope_condition'] = $i; | |
1454 $tokens[$index]['scope_opener'] = $x; | |
1455 $tokens[$index]['scope_closer'] = $newCloser; | |
1456 } | |
1457 | |
1458 unset($tokens[$scopeOpener]['scope_condition']); | |
1459 unset($tokens[$scopeOpener]['scope_opener']); | |
1460 unset($tokens[$scopeOpener]['scope_closer']); | |
1461 unset($tokens[$scopeCloser]['scope_condition']); | |
1462 unset($tokens[$scopeCloser]['scope_opener']); | |
1463 unset($tokens[$scopeCloser]['scope_closer']); | |
1464 unset($tokens[$x]['bracket_opener']); | |
1465 unset($tokens[$x]['bracket_closer']); | |
1466 unset($tokens[$newCloser]['bracket_opener']); | |
1467 unset($tokens[$newCloser]['bracket_closer']); | |
1468 $tokens[$scopeCloser]['conditions'][] = $i; | |
1469 | |
1470 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1471 $line = $tokens[$i]['line']; | |
1472 $tokenType = $tokens[$i]['type']; | |
1473 | |
1474 $oldType = $tokens[$scopeOpener]['type']; | |
1475 $newType = $tokens[$x]['type']; | |
1476 echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL; | |
1477 | |
1478 $oldType = $tokens[$scopeCloser]['type']; | |
1479 $newType = $tokens[$newCloser]['type']; | |
1480 echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; | |
1481 } | |
1482 | |
1483 // Now fix up all the tokens that think they are | |
1484 // inside the CASE/DEFAULT statement when they are really outside. | |
1485 for ($x = $newCloser; $x < $scopeCloser; $x++) { | |
1486 foreach ($tokens[$x]['conditions'] as $num => $oldCond) { | |
1487 if ($oldCond === $tokens[$i]['code']) { | |
1488 $oldConditions = $tokens[$x]['conditions']; | |
1489 unset($tokens[$x]['conditions'][$num]); | |
1490 | |
1491 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1492 $type = $tokens[$x]['type']; | |
1493 $oldConds = ''; | |
1494 foreach ($oldConditions as $condition) { | |
1495 $oldConds .= token_name($condition).','; | |
1496 } | |
1497 | |
1498 $oldConds = rtrim($oldConds, ','); | |
1499 | |
1500 $newConds = ''; | |
1501 foreach ($tokens[$x]['conditions'] as $condition) { | |
1502 $newConds .= token_name($condition).','; | |
1503 } | |
1504 | |
1505 $newConds = rtrim($newConds, ','); | |
1506 | |
1507 echo "\t\t* cleaned $x ($type) *".PHP_EOL; | |
1508 echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL; | |
1509 } | |
1510 | |
1511 break; | |
1512 }//end if | |
1513 }//end foreach | |
1514 }//end for | |
1515 }//end for | |
1516 | |
1517 if (PHP_CODESNIFFER_VERBOSITY > 1) { | |
1518 echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL; | |
1519 } | |
1520 | |
1521 }//end processAdditional() | |
1522 | |
1523 | |
1524 /** | |
1525 * Takes a token produced from <code>token_get_all()</code> and produces a | |
1526 * more uniform token. | |
1527 * | |
1528 * @param string|array $token The token to convert. | |
1529 * | |
1530 * @return array The new token. | |
1531 */ | |
1532 public static function standardiseToken($token) | |
1533 { | |
1534 if (isset($token[1]) === false) { | |
1535 if (isset(self::$_resolveTokenCache[$token[0]]) === true) { | |
1536 return self::$_resolveTokenCache[$token[0]]; | |
1537 } | |
1538 } else { | |
1539 $cacheKey = null; | |
1540 if ($token[0] === T_STRING) { | |
1541 $cacheKey = strtolower($token[1]); | |
1542 } else if ($token[0] !== T_CURLY_OPEN) { | |
1543 $cacheKey = $token[0]; | |
1544 } | |
1545 | |
1546 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) { | |
1547 $newToken = self::$_resolveTokenCache[$cacheKey]; | |
1548 $newToken['content'] = $token[1]; | |
1549 return $newToken; | |
1550 } | |
1551 } | |
1552 | |
1553 if (isset($token[1]) === false) { | |
1554 return self::resolveSimpleToken($token[0]); | |
1555 } | |
1556 | |
1557 if ($token[0] === T_STRING) { | |
1558 switch ($cacheKey) { | |
1559 case 'false': | |
1560 $newToken['type'] = 'T_FALSE'; | |
1561 break; | |
1562 case 'true': | |
1563 $newToken['type'] = 'T_TRUE'; | |
1564 break; | |
1565 case 'null': | |
1566 $newToken['type'] = 'T_NULL'; | |
1567 break; | |
1568 case 'self': | |
1569 $newToken['type'] = 'T_SELF'; | |
1570 break; | |
1571 case 'parent': | |
1572 $newToken['type'] = 'T_PARENT'; | |
1573 break; | |
1574 default: | |
1575 $newToken['type'] = 'T_STRING'; | |
1576 break; | |
1577 } | |
1578 | |
1579 $newToken['code'] = constant($newToken['type']); | |
1580 | |
1581 self::$_resolveTokenCache[$cacheKey] = $newToken; | |
1582 } else if ($token[0] === T_CURLY_OPEN) { | |
1583 $newToken = array( | |
1584 'code' => T_OPEN_CURLY_BRACKET, | |
1585 'type' => 'T_OPEN_CURLY_BRACKET', | |
1586 ); | |
1587 } else { | |
1588 $newToken = array( | |
1589 'code' => $token[0], | |
1590 'type' => token_name($token[0]), | |
1591 ); | |
1592 | |
1593 self::$_resolveTokenCache[$token[0]] = $newToken; | |
1594 }//end if | |
1595 | |
1596 $newToken['content'] = $token[1]; | |
1597 return $newToken; | |
1598 | |
1599 }//end standardiseToken() | |
1600 | |
1601 | |
1602 /** | |
1603 * Converts simple tokens into a format that conforms to complex tokens | |
1604 * produced by token_get_all(). | |
1605 * | |
1606 * Simple tokens are tokens that are not in array form when produced from | |
1607 * token_get_all(). | |
1608 * | |
1609 * @param string $token The simple token to convert. | |
1610 * | |
1611 * @return array The new token in array format. | |
1612 */ | |
1613 public static function resolveSimpleToken($token) | |
1614 { | |
1615 $newToken = array(); | |
1616 | |
1617 switch ($token) { | |
1618 case '{': | |
1619 $newToken['type'] = 'T_OPEN_CURLY_BRACKET'; | |
1620 break; | |
1621 case '}': | |
1622 $newToken['type'] = 'T_CLOSE_CURLY_BRACKET'; | |
1623 break; | |
1624 case '[': | |
1625 $newToken['type'] = 'T_OPEN_SQUARE_BRACKET'; | |
1626 break; | |
1627 case ']': | |
1628 $newToken['type'] = 'T_CLOSE_SQUARE_BRACKET'; | |
1629 break; | |
1630 case '(': | |
1631 $newToken['type'] = 'T_OPEN_PARENTHESIS'; | |
1632 break; | |
1633 case ')': | |
1634 $newToken['type'] = 'T_CLOSE_PARENTHESIS'; | |
1635 break; | |
1636 case ':': | |
1637 $newToken['type'] = 'T_COLON'; | |
1638 break; | |
1639 case '.': | |
1640 $newToken['type'] = 'T_STRING_CONCAT'; | |
1641 break; | |
1642 case ';': | |
1643 $newToken['type'] = 'T_SEMICOLON'; | |
1644 break; | |
1645 case '=': | |
1646 $newToken['type'] = 'T_EQUAL'; | |
1647 break; | |
1648 case '*': | |
1649 $newToken['type'] = 'T_MULTIPLY'; | |
1650 break; | |
1651 case '/': | |
1652 $newToken['type'] = 'T_DIVIDE'; | |
1653 break; | |
1654 case '+': | |
1655 $newToken['type'] = 'T_PLUS'; | |
1656 break; | |
1657 case '-': | |
1658 $newToken['type'] = 'T_MINUS'; | |
1659 break; | |
1660 case '%': | |
1661 $newToken['type'] = 'T_MODULUS'; | |
1662 break; | |
1663 case '^': | |
1664 $newToken['type'] = 'T_BITWISE_XOR'; | |
1665 break; | |
1666 case '&': | |
1667 $newToken['type'] = 'T_BITWISE_AND'; | |
1668 break; | |
1669 case '|': | |
1670 $newToken['type'] = 'T_BITWISE_OR'; | |
1671 break; | |
1672 case '<': | |
1673 $newToken['type'] = 'T_LESS_THAN'; | |
1674 break; | |
1675 case '>': | |
1676 $newToken['type'] = 'T_GREATER_THAN'; | |
1677 break; | |
1678 case '!': | |
1679 $newToken['type'] = 'T_BOOLEAN_NOT'; | |
1680 break; | |
1681 case ',': | |
1682 $newToken['type'] = 'T_COMMA'; | |
1683 break; | |
1684 case '@': | |
1685 $newToken['type'] = 'T_ASPERAND'; | |
1686 break; | |
1687 case '$': | |
1688 $newToken['type'] = 'T_DOLLAR'; | |
1689 break; | |
1690 case '`': | |
1691 $newToken['type'] = 'T_BACKTICK'; | |
1692 break; | |
1693 default: | |
1694 $newToken['type'] = 'T_NONE'; | |
1695 break; | |
1696 }//end switch | |
1697 | |
1698 $newToken['code'] = constant($newToken['type']); | |
1699 $newToken['content'] = $token; | |
1700 | |
1701 self::$_resolveTokenCache[$token] = $newToken; | |
1702 return $newToken; | |
1703 | |
1704 }//end resolveSimpleToken() | |
1705 | |
1706 | |
1707 }//end class |