comparison vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/PHP.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 7a779792577d
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2 /**
3 * Tokenizes PHP code.
4 *
5 * PHP version 5
6 *
7 * @category PHP
8 * @package PHP_CodeSniffer
9 * @author Greg Sherwood <gsherwood@squiz.net>
10 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
11 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
12 * @link http://pear.php.net/package/PHP_CodeSniffer
13 */
14
15 /**
16 * Tokenizes PHP code.
17 *
18 * @category PHP
19 * @package PHP_CodeSniffer
20 * @author Greg Sherwood <gsherwood@squiz.net>
21 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
22 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
23 * @version Release: @package_version@
24 * @link http://pear.php.net/package/PHP_CodeSniffer
25 */
26 class PHP_CodeSniffer_Tokenizers_PHP
27 {
28
29 /**
30 * If TRUE, files that appear to be minified will not be processed.
31 *
32 * @var boolean
33 */
34 public $skipMinified = false;
35
36 /**
37 * A list of tokens that are allowed to open a scope.
38 *
39 * This array also contains information about what kind of token the scope
40 * opener uses to open and close the scope, if the token strictly requires
41 * an opener, if the token can share a scope closer, and who it can be shared
42 * with. An example of a token that shares a scope closer is a CASE scope.
43 *
44 * @var array
45 */
46 public $scopeOpeners = array(
47 T_IF => array(
48 'start' => array(
49 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
50 T_COLON => T_COLON,
51 ),
52 'end' => array(
53 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
54 T_ENDIF => T_ENDIF,
55 T_ELSE => T_ELSE,
56 T_ELSEIF => T_ELSEIF,
57 ),
58 'strict' => false,
59 'shared' => false,
60 'with' => array(
61 T_ELSE => T_ELSE,
62 T_ELSEIF => T_ELSEIF,
63 ),
64 ),
65 T_TRY => array(
66 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
67 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
68 'strict' => true,
69 'shared' => false,
70 'with' => array(),
71 ),
72 T_CATCH => array(
73 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
74 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
75 'strict' => true,
76 'shared' => false,
77 'with' => array(),
78 ),
79 T_FINALLY => array(
80 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
81 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
82 'strict' => true,
83 'shared' => false,
84 'with' => array(),
85 ),
86 T_ELSE => array(
87 'start' => array(
88 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
89 T_COLON => T_COLON,
90 ),
91 'end' => array(
92 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
93 T_ENDIF => T_ENDIF,
94 ),
95 'strict' => false,
96 'shared' => false,
97 'with' => array(
98 T_IF => T_IF,
99 T_ELSEIF => T_ELSEIF,
100 ),
101 ),
102 T_ELSEIF => array(
103 'start' => array(
104 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
105 T_COLON => T_COLON,
106 ),
107 'end' => array(
108 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
109 T_ENDIF => T_ENDIF,
110 T_ELSE => T_ELSE,
111 T_ELSEIF => T_ELSEIF,
112 ),
113 'strict' => false,
114 'shared' => false,
115 'with' => array(
116 T_IF => T_IF,
117 T_ELSE => T_ELSE,
118 ),
119 ),
120 T_FOR => array(
121 'start' => array(
122 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
123 T_COLON => T_COLON,
124 ),
125 'end' => array(
126 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
127 T_ENDFOR => T_ENDFOR,
128 ),
129 'strict' => false,
130 'shared' => false,
131 'with' => array(),
132 ),
133 T_FOREACH => array(
134 'start' => array(
135 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
136 T_COLON => T_COLON,
137 ),
138 'end' => array(
139 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
140 T_ENDFOREACH => T_ENDFOREACH,
141 ),
142 'strict' => false,
143 'shared' => false,
144 'with' => array(),
145 ),
146 T_INTERFACE => array(
147 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
148 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
149 'strict' => true,
150 'shared' => false,
151 'with' => array(),
152 ),
153 T_FUNCTION => array(
154 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
155 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
156 'strict' => true,
157 'shared' => false,
158 'with' => array(),
159 ),
160 T_CLASS => array(
161 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
162 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
163 'strict' => true,
164 'shared' => false,
165 'with' => array(),
166 ),
167 T_TRAIT => array(
168 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
169 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
170 'strict' => true,
171 'shared' => false,
172 'with' => array(),
173 ),
174 T_USE => array(
175 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
176 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
177 'strict' => false,
178 'shared' => false,
179 'with' => array(),
180 ),
181 T_DECLARE => array(
182 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
183 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
184 'strict' => false,
185 'shared' => false,
186 'with' => array(),
187 ),
188 T_NAMESPACE => array(
189 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
190 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
191 'strict' => false,
192 'shared' => false,
193 'with' => array(),
194 ),
195 T_WHILE => array(
196 'start' => array(
197 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
198 T_COLON => T_COLON,
199 ),
200 'end' => array(
201 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
202 T_ENDWHILE => T_ENDWHILE,
203 ),
204 'strict' => false,
205 'shared' => false,
206 'with' => array(),
207 ),
208 T_DO => array(
209 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
210 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
211 'strict' => true,
212 'shared' => false,
213 'with' => array(),
214 ),
215 T_SWITCH => array(
216 'start' => array(
217 T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET,
218 T_COLON => T_COLON,
219 ),
220 'end' => array(
221 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
222 T_ENDSWITCH => T_ENDSWITCH,
223 ),
224 'strict' => true,
225 'shared' => false,
226 'with' => array(),
227 ),
228 T_CASE => array(
229 'start' => array(
230 T_COLON => T_COLON,
231 T_SEMICOLON => T_SEMICOLON,
232 ),
233 'end' => array(
234 T_BREAK => T_BREAK,
235 T_RETURN => T_RETURN,
236 T_CONTINUE => T_CONTINUE,
237 T_THROW => T_THROW,
238 T_EXIT => T_EXIT,
239 ),
240 'strict' => true,
241 'shared' => true,
242 'with' => array(
243 T_DEFAULT => T_DEFAULT,
244 T_CASE => T_CASE,
245 T_SWITCH => T_SWITCH,
246 ),
247 ),
248 T_DEFAULT => array(
249 'start' => array(
250 T_COLON => T_COLON,
251 T_SEMICOLON => T_SEMICOLON,
252 ),
253 'end' => array(
254 T_BREAK => T_BREAK,
255 T_RETURN => T_RETURN,
256 T_CONTINUE => T_CONTINUE,
257 T_THROW => T_THROW,
258 T_EXIT => T_EXIT,
259 ),
260 'strict' => true,
261 'shared' => true,
262 'with' => array(
263 T_CASE => T_CASE,
264 T_SWITCH => T_SWITCH,
265 ),
266 ),
267 T_START_HEREDOC => array(
268 'start' => array(T_START_HEREDOC => T_START_HEREDOC),
269 'end' => array(T_END_HEREDOC => T_END_HEREDOC),
270 'strict' => true,
271 'shared' => false,
272 'with' => array(),
273 ),
274 );
275
276 /**
277 * A list of tokens that end the scope.
278 *
279 * This array is just a unique collection of the end tokens
280 * from the _scopeOpeners array. The data is duplicated here to
281 * save time during parsing of the file.
282 *
283 * @var array
284 */
285 public $endScopeTokens = array(
286 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
287 T_ENDIF => T_ENDIF,
288 T_ENDFOR => T_ENDFOR,
289 T_ENDFOREACH => T_ENDFOREACH,
290 T_ENDWHILE => T_ENDWHILE,
291 T_ENDSWITCH => T_ENDSWITCH,
292 T_BREAK => T_BREAK,
293 T_END_HEREDOC => T_END_HEREDOC,
294 );
295
296 /**
297 * A cache of different token types, resolved into arrays.
298 *
299 * @var array()
300 * @see standardiseToken()
301 */
302 private static $_resolveTokenCache = array();
303
304
305 /**
306 * Creates an array of tokens when given some PHP code.
307 *
308 * Starts by using token_get_all() but does a lot of extra processing
309 * to insert information about the context of the token.
310 *
311 * @param string $string The string to tokenize.
312 * @param string $eolChar The EOL character to use for splitting strings.
313 *
314 * @return array
315 */
316 public function tokenizeString($string, $eolChar='\n')
317 {
318 if (PHP_CODESNIFFER_VERBOSITY > 1) {
319 echo "\t*** START PHP TOKENIZING ***".PHP_EOL;
320 $isWin = false;
321 if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
322 $isWin = true;
323 }
324 }
325
326 $tokens = @token_get_all($string);
327 $finalTokens = array();
328
329 $newStackPtr = 0;
330 $numTokens = count($tokens);
331 $lastNotEmptyToken = 0;
332
333 $insideInlineIf = array();
334 $insideUseGroup = false;
335
336 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
337
338 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
339 $token = (array) $tokens[$stackPtr];
340 $tokenIsArray = isset($token[1]);
341
342 if (PHP_CODESNIFFER_VERBOSITY > 1) {
343 if ($tokenIsArray === true) {
344 $type = token_name($token[0]);
345 $content = PHP_CodeSniffer::prepareForOutput($token[1]);
346 } else {
347 $newToken = self::resolveSimpleToken($token[0]);
348 $type = $newToken['type'];
349 $content = PHP_CodeSniffer::prepareForOutput($token[0]);
350 }
351
352 echo "\tProcess token ";
353 if ($tokenIsArray === true) {
354 echo "[$stackPtr]";
355 } else {
356 echo " $stackPtr ";
357 }
358
359 echo ": $type => $content";
360 }//end if
361
362 if ($newStackPtr > 0 && $finalTokens[($newStackPtr - 1)]['code'] !== T_WHITESPACE) {
363 $lastNotEmptyToken = ($newStackPtr - 1);
364 }
365
366 /*
367 If we are using \r\n newline characters, the \r and \n are sometimes
368 split over two tokens. This normally occurs after comments. We need
369 to merge these two characters together so that our line endings are
370 consistent for all lines.
371 */
372
373 if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
374 if (isset($tokens[($stackPtr + 1)]) === true
375 && is_array($tokens[($stackPtr + 1)]) === true
376 && $tokens[($stackPtr + 1)][1][0] === "\n"
377 ) {
378 $token[1] .= "\n";
379 if (PHP_CODESNIFFER_VERBOSITY > 1) {
380 if ($isWin === true) {
381 echo '\n';
382 } else {
383 echo "\033[30;1m\\n\033[0m";
384 }
385 }
386
387 if ($tokens[($stackPtr + 1)][1] === "\n") {
388 // This token's content has been merged into the previous,
389 // so we can skip it.
390 $tokens[($stackPtr + 1)] = '';
391 } else {
392 $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1);
393 }
394 }
395 }//end if
396
397 if (PHP_CODESNIFFER_VERBOSITY > 1) {
398 echo PHP_EOL;
399 }
400
401 /*
402 Parse doc blocks into something that can be easily iterated over.
403 */
404
405 if ($tokenIsArray === true && $token[0] === T_DOC_COMMENT) {
406 $commentTokens = $commentTokenizer->tokenizeString($token[1], $eolChar, $newStackPtr);
407 foreach ($commentTokens as $commentToken) {
408 $finalTokens[$newStackPtr] = $commentToken;
409 $newStackPtr++;
410 }
411
412 continue;
413 }
414
415 /*
416 If this is a double quoted string, PHP will tokenize the whole
417 thing which causes problems with the scope map when braces are
418 within the string. So we need to merge the tokens together to
419 provide a single string.
420 */
421
422 if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) {
423 // Binary casts need a special token.
424 if ($token[0] === 'b"') {
425 $finalTokens[$newStackPtr] = array(
426 'code' => T_BINARY_CAST,
427 'type' => 'T_BINARY_CAST',
428 'content' => 'b',
429 );
430 $newStackPtr++;
431 }
432
433 $tokenContent = '"';
434 $nestedVars = array();
435 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
436 $subToken = (array) $tokens[$i];
437 $subTokenIsArray = isset($subToken[1]);
438
439 if ($subTokenIsArray === true) {
440 $tokenContent .= $subToken[1];
441 if ($subToken[1] === '{'
442 && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE
443 ) {
444 $nestedVars[] = $i;
445 }
446 } else {
447 $tokenContent .= $subToken[0];
448 if ($subToken[0] === '}') {
449 array_pop($nestedVars);
450 }
451 }
452
453 if ($subTokenIsArray === false
454 && $subToken[0] === '"'
455 && empty($nestedVars) === true
456 ) {
457 // We found the other end of the double quoted string.
458 break;
459 }
460 }//end for
461
462 $stackPtr = $i;
463
464 // Convert each line within the double quoted string to a
465 // new token, so it conforms with other multiple line tokens.
466 $tokenLines = explode($eolChar, $tokenContent);
467 $numLines = count($tokenLines);
468 $newToken = array();
469
470 for ($j = 0; $j < $numLines; $j++) {
471 $newToken['content'] = $tokenLines[$j];
472 if ($j === ($numLines - 1)) {
473 if ($tokenLines[$j] === '') {
474 break;
475 }
476 } else {
477 $newToken['content'] .= $eolChar;
478 }
479
480 $newToken['code'] = T_DOUBLE_QUOTED_STRING;
481 $newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
482 $finalTokens[$newStackPtr] = $newToken;
483 $newStackPtr++;
484 }
485
486 // Continue, as we're done with this token.
487 continue;
488 }//end if
489
490 /*
491 If this is a heredoc, PHP will tokenize the whole
492 thing which causes problems when heredocs don't
493 contain real PHP code, which is almost never.
494 We want to leave the start and end heredoc tokens
495 alone though.
496 */
497
498 if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
499 // Add the start heredoc token to the final array.
500 $finalTokens[$newStackPtr] = self::standardiseToken($token);
501
502 // Check if this is actually a nowdoc and use a different token
503 // to help the sniffs.
504 $nowdoc = false;
505 if ($token[1][3] === "'") {
506 $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
507 $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
508 $nowdoc = true;
509 }
510
511 $tokenContent = '';
512 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
513 $subTokenIsArray = is_array($tokens[$i]);
514 if ($subTokenIsArray === true
515 && $tokens[$i][0] === T_END_HEREDOC
516 ) {
517 // We found the other end of the heredoc.
518 break;
519 }
520
521 if ($subTokenIsArray === true) {
522 $tokenContent .= $tokens[$i][1];
523 } else {
524 $tokenContent .= $tokens[$i];
525 }
526 }
527
528 if ($i === $numTokens) {
529 // We got to the end of the file and never
530 // found the closing token, so this probably wasn't
531 // a heredoc.
532 if (PHP_CODESNIFFER_VERBOSITY > 1) {
533 $type = $finalTokens[$newStackPtr]['type'];
534 echo "\t\t* failed to find the end of the here/nowdoc".PHP_EOL;
535 echo "\t\t* token $stackPtr changed from $type to T_STRING".PHP_EOL;
536 }
537
538 $finalTokens[$newStackPtr]['code'] = T_STRING;
539 $finalTokens[$newStackPtr]['type'] = 'T_STRING';
540 $newStackPtr++;
541 continue;
542 }
543
544 $stackPtr = $i;
545 $newStackPtr++;
546
547 // Convert each line within the heredoc to a
548 // new token, so it conforms with other multiple line tokens.
549 $tokenLines = explode($eolChar, $tokenContent);
550 $numLines = count($tokenLines);
551 $newToken = array();
552
553 for ($j = 0; $j < $numLines; $j++) {
554 $newToken['content'] = $tokenLines[$j];
555 if ($j === ($numLines - 1)) {
556 if ($tokenLines[$j] === '') {
557 break;
558 }
559 } else {
560 $newToken['content'] .= $eolChar;
561 }
562
563 if ($nowdoc === true) {
564 $newToken['code'] = T_NOWDOC;
565 $newToken['type'] = 'T_NOWDOC';
566 } else {
567 $newToken['code'] = T_HEREDOC;
568 $newToken['type'] = 'T_HEREDOC';
569 }
570
571 $finalTokens[$newStackPtr] = $newToken;
572 $newStackPtr++;
573 }//end for
574
575 // Add the end heredoc token to the final array.
576 $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]);
577
578 if ($nowdoc === true) {
579 $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
580 $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
581 $nowdoc = true;
582 }
583
584 $newStackPtr++;
585
586 // Continue, as we're done with this token.
587 continue;
588 }//end if
589
590 /*
591 Before PHP 5.6, the ... operator was tokenized as three
592 T_STRING_CONCAT tokens in a row. So look for and combine
593 these tokens in earlier versions.
594 */
595
596 if ($tokenIsArray === false
597 && $token[0] === '.'
598 && isset($tokens[($stackPtr + 1)]) === true
599 && isset($tokens[($stackPtr + 2)]) === true
600 && $tokens[($stackPtr + 1)] === '.'
601 && $tokens[($stackPtr + 2)] === '.'
602 ) {
603 $newToken = array();
604 $newToken['code'] = T_ELLIPSIS;
605 $newToken['type'] = 'T_ELLIPSIS';
606 $newToken['content'] = '...';
607 $finalTokens[$newStackPtr] = $newToken;
608
609 $newStackPtr++;
610 $stackPtr += 2;
611 continue;
612 }
613
614 /*
615 Before PHP 5.6, the ** operator was tokenized as two
616 T_MULTIPLY tokens in a row. So look for and combine
617 these tokens in earlier versions.
618 */
619
620 if ($tokenIsArray === false
621 && $token[0] === '*'
622 && isset($tokens[($stackPtr + 1)]) === true
623 && $tokens[($stackPtr + 1)] === '*'
624 ) {
625 $newToken = array();
626 $newToken['code'] = T_POW;
627 $newToken['type'] = 'T_POW';
628 $newToken['content'] = '**';
629 $finalTokens[$newStackPtr] = $newToken;
630
631 $newStackPtr++;
632 $stackPtr++;
633 continue;
634 }
635
636 /*
637 Before PHP 5.6, the **= operator was tokenized as
638 T_MULTIPLY followed by T_MUL_EQUAL. So look for and combine
639 these tokens in earlier versions.
640 */
641
642 if ($tokenIsArray === false
643 && $token[0] === '*'
644 && isset($tokens[($stackPtr + 1)]) === true
645 && is_array($tokens[($stackPtr + 1)]) === true
646 && $tokens[($stackPtr + 1)][1] === '*='
647 ) {
648 $newToken = array();
649 $newToken['code'] = T_POW_EQUAL;
650 $newToken['type'] = 'T_POW_EQUAL';
651 $newToken['content'] = '**=';
652 $finalTokens[$newStackPtr] = $newToken;
653
654 $newStackPtr++;
655 $stackPtr++;
656 continue;
657 }
658
659 /*
660 Before PHP 7, the ??= operator was tokenized as
661 T_INLINE_THEN, T_INLINE_THEN, T_EQUAL.
662 Between PHP 7.0 and 7.2, the ??= operator was tokenized as
663 T_COALESCE, T_EQUAL.
664 So look for and combine these tokens in earlier versions.
665 */
666
667 if (($tokenIsArray === false
668 && $token[0] === '?'
669 && isset($tokens[($stackPtr + 1)]) === true
670 && $tokens[($stackPtr + 1)][0] === '?'
671 && isset($tokens[($stackPtr + 2)]) === true
672 && $tokens[($stackPtr + 2)][0] === '=')
673 || ($tokenIsArray === true
674 && $token[0] === T_COALESCE
675 && isset($tokens[($stackPtr + 1)]) === true
676 && $tokens[($stackPtr + 1)][0] === '=')
677 ) {
678 $newToken = array();
679 $newToken['code'] = T_COALESCE_EQUAL;
680 $newToken['type'] = 'T_COALESCE_EQUAL';
681 $newToken['content'] = '??=';
682 $finalTokens[$newStackPtr] = $newToken;
683
684 $newStackPtr++;
685 $stackPtr++;
686
687 if ($tokenIsArray === false) {
688 // Pre PHP 7.
689 $stackPtr++;
690 }
691
692 continue;
693 }
694
695 /*
696 Before PHP 7, the ?? operator was tokenized as
697 T_INLINE_THEN followed by T_INLINE_THEN.
698 So look for and combine these tokens in earlier versions.
699 */
700
701 if ($tokenIsArray === false
702 && $token[0] === '?'
703 && isset($tokens[($stackPtr + 1)]) === true
704 && $tokens[($stackPtr + 1)][0] === '?'
705 ) {
706 $newToken = array();
707 $newToken['code'] = T_COALESCE;
708 $newToken['type'] = 'T_COALESCE';
709 $newToken['content'] = '??';
710 $finalTokens[$newStackPtr] = $newToken;
711
712 $newStackPtr++;
713 $stackPtr++;
714 continue;
715 }
716
717 /*
718 Convert ? to T_NULLABLE OR T_INLINE_THEN
719 */
720
721 if ($tokenIsArray === false && $token[0] === '?') {
722 $newToken = array();
723 $newToken['content'] = '?';
724
725 for ($i = ($stackPtr - 1); $i >= 0; $i--) {
726 if (is_array($tokens[$i]) === true) {
727 $tokenType = $tokens[$i][0];
728 } else {
729 $tokenType = $tokens[$i];
730 }
731
732 if ($tokenType === T_FUNCTION) {
733 $newToken['code'] = T_NULLABLE;
734 $newToken['type'] = 'T_NULLABLE';
735 break;
736 } else if (in_array($tokenType, array(T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, '{', ';')) === true) {
737 $newToken['code'] = T_INLINE_THEN;
738 $newToken['type'] = 'T_INLINE_THEN';
739
740 $insideInlineIf[] = $stackPtr;
741 break;
742 }
743 }
744
745 $finalTokens[$newStackPtr] = $newToken;
746 $newStackPtr++;
747 continue;
748 }//end if
749
750 /*
751 Tokens after a double colon may be look like scope openers,
752 such as when writing code like Foo::NAMESPACE, but they are
753 only ever variables or strings.
754 */
755
756 if ($stackPtr > 1
757 && (is_array($tokens[($stackPtr - 1)]) === true
758 && $tokens[($stackPtr - 1)][0] === T_PAAMAYIM_NEKUDOTAYIM)
759 && $tokenIsArray === true
760 && $token[0] !== T_STRING
761 && $token[0] !== T_VARIABLE
762 && $token[0] !== T_DOLLAR
763 && isset(PHP_CodeSniffer_Tokens::$emptyTokens[$token[0]]) === false
764 ) {
765 $newToken = array();
766 $newToken['code'] = T_STRING;
767 $newToken['type'] = 'T_STRING';
768 $newToken['content'] = $token[1];
769 $finalTokens[$newStackPtr] = $newToken;
770
771 $newStackPtr++;
772 continue;
773 }
774
775 /*
776 The string-like token after a function keyword should always be
777 tokenized as T_STRING even if it appears to be a different token,
778 such as when writing code like: function default(): foo
779 so go forward and change the token type before it is processed.
780 */
781
782 if ($tokenIsArray === true && $token[0] === T_FUNCTION) {
783 for ($x = ($stackPtr + 1); $x < $numTokens; $x++) {
784 if (is_array($tokens[$x]) === false
785 || isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x][0]]) === false
786 ) {
787 // Non-empty content.
788 break;
789 }
790 }
791
792 if ($x < $numTokens && is_array($tokens[$x]) === true) {
793 $tokens[$x][0] = T_STRING;
794 }
795 }
796
797 /*
798 Before PHP 7, the <=> operator was tokenized as
799 T_IS_SMALLER_OR_EQUAL followed by T_GREATER_THAN.
800 So look for and combine these tokens in earlier versions.
801 */
802
803 if ($tokenIsArray === true
804 && $token[0] === T_IS_SMALLER_OR_EQUAL
805 && isset($tokens[($stackPtr + 1)]) === true
806 && $tokens[($stackPtr + 1)][0] === '>'
807 ) {
808 $newToken = array();
809 $newToken['code'] = T_SPACESHIP;
810 $newToken['type'] = 'T_SPACESHIP';
811 $newToken['content'] = '<=>';
812 $finalTokens[$newStackPtr] = $newToken;
813
814 $newStackPtr++;
815 $stackPtr++;
816 continue;
817 }
818
819 /*
820 Emulate traits in PHP versions less than 5.4.
821 */
822
823 if ($tokenIsArray === true
824 && $token[0] === T_STRING
825 && strtolower($token[1]) === 'trait'
826 && $tokens[($stackPtr - 1)][0] !== T_OBJECT_OPERATOR
827 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM
828 ) {
829 $finalTokens[$newStackPtr] = array(
830 'content' => $token[1],
831 'code' => T_TRAIT,
832 'type' => 'T_TRAIT',
833 );
834
835 if (PHP_CODESNIFFER_VERBOSITY > 1) {
836 echo "\t\t* token $stackPtr changed from T_STRING to T_TRAIT".PHP_EOL;
837 }
838
839 $newStackPtr++;
840 continue;
841 }
842
843 /*
844 PHP doesn't assign a token to goto labels, so we have to.
845 These are just string tokens with a single colon after them. Double
846 colons are already tokenized and so don't interfere with this check.
847 But we do have to account for CASE statements, that look just like
848 goto labels.
849 */
850
851 if ($tokenIsArray === true
852 && $token[0] === T_STRING
853 && isset($tokens[($stackPtr + 1)]) === true
854 && $tokens[($stackPtr + 1)] === ':'
855 && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM
856 ) {
857 $stopTokens = array(
858 T_CASE => true,
859 T_SEMICOLON => true,
860 T_OPEN_CURLY_BRACKET => true,
861 T_INLINE_THEN => true,
862 );
863
864 for ($x = ($newStackPtr - 1); $x > 0; $x--) {
865 if (isset($stopTokens[$finalTokens[$x]['code']]) === true) {
866 break;
867 }
868 }
869
870 if ($finalTokens[$x]['code'] !== T_CASE
871 && $finalTokens[$x]['code'] !== T_INLINE_THEN
872 ) {
873 $finalTokens[$newStackPtr] = array(
874 'content' => $token[1].':',
875 'code' => T_GOTO_LABEL,
876 'type' => 'T_GOTO_LABEL',
877 );
878
879 if (PHP_CODESNIFFER_VERBOSITY > 1) {
880 echo "\t\t* token $stackPtr changed from T_STRING to T_GOTO_LABEL".PHP_EOL;
881 echo "\t\t* skipping T_COLON token ".($stackPtr + 1).PHP_EOL;
882 }
883
884 $newStackPtr++;
885 $stackPtr++;
886 continue;
887 }
888 }//end if
889
890 /*
891 HHVM 3.5 tokenizes "else[\s]+if" as a T_ELSEIF token while PHP
892 proper only tokenizes "elseif" as a T_ELSEIF token. So split
893 up the HHVM token to make it looks like proper PHP.
894 */
895
896 if ($tokenIsArray === true
897 && $token[0] === T_ELSEIF
898 && strtolower($token[1]) !== 'elseif'
899 ) {
900 $finalTokens[$newStackPtr] = array(
901 'content' => substr($token[1], 0, 4),
902 'code' => T_ELSE,
903 'type' => 'T_ELSE',
904 );
905
906 $newStackPtr++;
907 $finalTokens[$newStackPtr] = array(
908 'content' => substr($token[1], 4, -2),
909 'code' => T_WHITESPACE,
910 'type' => 'T_WHITESPACE',
911 );
912
913 $newStackPtr++;
914 $finalTokens[$newStackPtr] = array(
915 'content' => substr($token[1], -2),
916 'code' => T_IF,
917 'type' => 'T_IF',
918 );
919
920 if (PHP_CODESNIFFER_VERBOSITY > 1) {
921 echo "\t\t* token $stackPtr changed from T_ELSEIF to T_ELSE/T_WHITESPACE/T_IF".PHP_EOL;
922 }
923
924 $newStackPtr++;
925 continue;
926 }//end if
927
928 /*
929 HHVM 3.5 and 3.6 tokenizes a hashbang line such as #!/usr/bin/php
930 as T_HASHANG while PHP proper uses T_INLINE_HTML.
931 */
932
933 if ($tokenIsArray === true && token_name($token[0]) === 'T_HASHBANG') {
934 $finalTokens[$newStackPtr] = array(
935 'content' => $token[1],
936 'code' => T_INLINE_HTML,
937 'type' => 'T_INLINE_HTML',
938 );
939
940 if (PHP_CODESNIFFER_VERBOSITY > 1) {
941 echo "\t\t* token $stackPtr changed from T_HASHBANG to T_INLINE_HTML".PHP_EOL;
942 }
943
944 $newStackPtr++;
945 continue;
946 }//end if
947
948 /*
949 If this token has newlines in its content, split each line up
950 and create a new token for each line. We do this so it's easier
951 to ascertain where errors occur on a line.
952 Note that $token[1] is the token's content.
953 */
954
955 if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
956 $tokenLines = explode($eolChar, $token[1]);
957 $numLines = count($tokenLines);
958 $newToken = array(
959 'type' => token_name($token[0]),
960 'code' => $token[0],
961 'content' => '',
962 );
963
964 for ($i = 0; $i < $numLines; $i++) {
965 $newToken['content'] = $tokenLines[$i];
966 if ($i === ($numLines - 1)) {
967 if ($tokenLines[$i] === '') {
968 break;
969 }
970 } else {
971 $newToken['content'] .= $eolChar;
972 }
973
974 $finalTokens[$newStackPtr] = $newToken;
975 $newStackPtr++;
976 }
977 } else {
978 if ($tokenIsArray === true && $token[0] === T_STRING) {
979 // Some T_STRING tokens should remain that way
980 // due to their context.
981 $context = array(
982 T_OBJECT_OPERATOR => true,
983 T_FUNCTION => true,
984 T_CLASS => true,
985 T_EXTENDS => true,
986 T_IMPLEMENTS => true,
987 T_NEW => true,
988 T_CONST => true,
989 T_NS_SEPARATOR => true,
990 T_USE => true,
991 T_NAMESPACE => true,
992 T_PAAMAYIM_NEKUDOTAYIM => true,
993 );
994 if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) {
995 // Special case for syntax like: return new self
996 // where self should not be a string.
997 if ($finalTokens[$lastNotEmptyToken]['code'] === T_NEW
998 && strtolower($token[1]) === 'self'
999 ) {
1000 $finalTokens[$newStackPtr] = array(
1001 'content' => $token[1],
1002 'code' => T_SELF,
1003 'type' => 'T_SELF',
1004 );
1005 } else {
1006 $finalTokens[$newStackPtr] = array(
1007 'content' => $token[1],
1008 'code' => T_STRING,
1009 'type' => 'T_STRING',
1010 );
1011 }
1012
1013 $newStackPtr++;
1014 continue;
1015 }//end if
1016 }//end if
1017
1018 $newToken = null;
1019 if ($tokenIsArray === false) {
1020 if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
1021 $newToken = self::$_resolveTokenCache[$token[0]];
1022 }
1023 } else {
1024 $cacheKey = null;
1025 if ($token[0] === T_STRING) {
1026 $cacheKey = strtolower($token[1]);
1027 } else if ($token[0] !== T_CURLY_OPEN) {
1028 $cacheKey = $token[0];
1029 }
1030
1031 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
1032 $newToken = self::$_resolveTokenCache[$cacheKey];
1033 $newToken['content'] = $token[1];
1034 }
1035 }
1036
1037 if ($newToken === null) {
1038 $newToken = self::standardiseToken($token);
1039 }
1040
1041 // Convert colons that are actually the ELSE component of an
1042 // inline IF statement.
1043 if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) {
1044 array_pop($insideInlineIf);
1045 $newToken['code'] = T_INLINE_ELSE;
1046 $newToken['type'] = 'T_INLINE_ELSE';
1047 }
1048
1049 // This is a special condition for T_ARRAY tokens used for
1050 // type hinting function arguments as being arrays. We want to keep
1051 // the parenthesis map clean, so let's tag these tokens as
1052 // T_ARRAY_HINT.
1053 if ($newToken['code'] === T_ARRAY) {
1054 for ($i = $stackPtr; $i < $numTokens; $i++) {
1055 if ($tokens[$i] === '(') {
1056 break;
1057 } else if ($tokens[$i][0] === T_VARIABLE) {
1058 $newToken['code'] = T_ARRAY_HINT;
1059 $newToken['type'] = 'T_ARRAY_HINT';
1060 break;
1061 }
1062 }
1063 }
1064
1065 // This is a special case when checking PHP 5.5+ code in PHP < 5.5
1066 // where "finally" should be T_FINALLY instead of T_STRING.
1067 if ($newToken['code'] === T_STRING
1068 && strtolower($newToken['content']) === 'finally'
1069 ) {
1070 $newToken['code'] = T_FINALLY;
1071 $newToken['type'] = 'T_FINALLY';
1072 }
1073
1074 // This is a special case for the PHP 5.5 classname::class syntax
1075 // where "class" should be T_STRING instead of T_CLASS.
1076 if (($newToken['code'] === T_CLASS
1077 || $newToken['code'] === T_FUNCTION)
1078 && $finalTokens[($newStackPtr - 1)]['code'] === T_DOUBLE_COLON
1079 ) {
1080 $newToken['code'] = T_STRING;
1081 $newToken['type'] = 'T_STRING';
1082 }
1083
1084 // This is a special case for PHP 5.6 use function and use const
1085 // where "function" and "const" should be T_STRING instead of T_FUNCTION
1086 // and T_CONST.
1087 if (($newToken['code'] === T_FUNCTION
1088 || $newToken['code'] === T_CONST)
1089 && $finalTokens[$lastNotEmptyToken]['code'] === T_USE
1090 ) {
1091 $newToken['code'] = T_STRING;
1092 $newToken['type'] = 'T_STRING';
1093 }
1094
1095 // This is a special case for use groups in PHP 7+ where leaving
1096 // the curly braces as their normal tokens would confuse
1097 // the scope map and sniffs.
1098 if ($newToken['code'] === T_OPEN_CURLY_BRACKET
1099 && $finalTokens[$lastNotEmptyToken]['code'] === T_NS_SEPARATOR
1100 ) {
1101 $newToken['code'] = T_OPEN_USE_GROUP;
1102 $newToken['type'] = 'T_OPEN_USE_GROUP';
1103 $insideUseGroup = true;
1104 }
1105
1106 if ($insideUseGroup === true && $newToken['code'] === T_CLOSE_CURLY_BRACKET) {
1107 $newToken['code'] = T_CLOSE_USE_GROUP;
1108 $newToken['type'] = 'T_CLOSE_USE_GROUP';
1109 $insideUseGroup = false;
1110 }
1111
1112 $finalTokens[$newStackPtr] = $newToken;
1113 $newStackPtr++;
1114 }//end if
1115 }//end for
1116
1117 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1118 echo "\t*** END PHP TOKENIZING ***".PHP_EOL;
1119 }
1120
1121 return $finalTokens;
1122
1123 }//end tokenizeString()
1124
1125
1126 /**
1127 * Performs additional processing after main tokenizing.
1128 *
1129 * This additional processing checks for CASE statements that are using curly
1130 * braces for scope openers and closers. It also turns some T_FUNCTION tokens
1131 * into T_CLOSURE when they are not standard function definitions. It also
1132 * detects short array syntax and converts those square brackets into new tokens.
1133 * It also corrects some usage of the static and class keywords. It also
1134 * assigns tokens to function return types.
1135 *
1136 * @param array $tokens The array of tokens to process.
1137 * @param string $eolChar The EOL character to use for splitting strings.
1138 *
1139 * @return void
1140 */
1141 public function processAdditional(&$tokens, $eolChar)
1142 {
1143 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1144 echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL;
1145 }
1146
1147 $numTokens = count($tokens);
1148 for ($i = ($numTokens - 1); $i >= 0; $i--) {
1149 // Check for any unset scope conditions due to alternate IF/ENDIF syntax.
1150 if (isset($tokens[$i]['scope_opener']) === true
1151 && isset($tokens[$i]['scope_condition']) === false
1152 ) {
1153 $tokens[$i]['scope_condition'] = $tokens[$tokens[$i]['scope_opener']]['scope_condition'];
1154 }
1155
1156 if ($tokens[$i]['code'] === T_FUNCTION) {
1157 /*
1158 Detect functions that are actually closures and
1159 assign them a different token.
1160 */
1161
1162 if (isset($tokens[$i]['scope_opener']) === true) {
1163 for ($x = ($i + 1); $x < $numTokens; $x++) {
1164 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false
1165 && $tokens[$x]['code'] !== T_BITWISE_AND
1166 ) {
1167 break;
1168 }
1169 }
1170
1171 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
1172 $tokens[$i]['code'] = T_CLOSURE;
1173 $tokens[$i]['type'] = 'T_CLOSURE';
1174 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1175 $line = $tokens[$i]['line'];
1176 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
1177 }
1178
1179 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1180 if (isset($tokens[$x]['conditions'][$i]) === false) {
1181 continue;
1182 }
1183
1184 $tokens[$x]['conditions'][$i] = T_CLOSURE;
1185 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1186 $type = $tokens[$x]['type'];
1187 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1188 }
1189 }
1190 }
1191
1192 $tokenAfterReturnTypeHint = $tokens[$i]['scope_opener'];
1193 } else if (isset($tokens[$i]['parenthesis_closer']) === true) {
1194 $tokenAfterReturnTypeHint = null;
1195 for ($x = ($tokens[$i]['parenthesis_closer'] + 1); $x < $numTokens; $x++) {
1196 if ($tokens[$x]['code'] === T_SEMICOLON) {
1197 $tokenAfterReturnTypeHint = $x;
1198 break;
1199 }
1200 }
1201
1202 if ($tokenAfterReturnTypeHint === null) {
1203 // Probably a syntax error.
1204 continue;
1205 }
1206 } else {
1207 // Probably a syntax error.
1208 continue;
1209 }//end if
1210
1211 /*
1212 Detect function return values and assign them
1213 a special token, because PHP doesn't.
1214 */
1215
1216 for ($x = ($tokenAfterReturnTypeHint - 1); $x > $i; $x--) {
1217 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1218 if (in_array($tokens[$x]['code'], array(T_STRING, T_ARRAY, T_ARRAY_HINT, T_CALLABLE, T_SELF, T_PARENT), true) === true) {
1219 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1220 $line = $tokens[$x]['line'];
1221 $type = $tokens[$x]['type'];
1222 echo "\t* token $x on line $line changed from $type to T_RETURN_TYPE".PHP_EOL;
1223 }
1224
1225 $tokens[$x]['code'] = T_RETURN_TYPE;
1226 $tokens[$x]['type'] = 'T_RETURN_TYPE';
1227 }
1228
1229 break;
1230 }
1231 }
1232
1233 continue;
1234 } else if ($tokens[$i]['code'] === T_CLASS && isset($tokens[$i]['scope_opener']) === true) {
1235 /*
1236 Detect anonymous classes and assign them a different token.
1237 */
1238
1239 for ($x = ($i + 1); $x < $numTokens; $x++) {
1240 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1241 break;
1242 }
1243 }
1244
1245 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS
1246 || $tokens[$x]['code'] === T_OPEN_CURLY_BRACKET
1247 || $tokens[$x]['code'] === T_EXTENDS
1248 || $tokens[$x]['code'] === T_IMPLEMENTS
1249 ) {
1250 $tokens[$i]['code'] = T_ANON_CLASS;
1251 $tokens[$i]['type'] = 'T_ANON_CLASS';
1252 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1253 $line = $tokens[$i]['line'];
1254 echo "\t* token $i on line $line changed from T_CLASS to T_ANON_CLASS".PHP_EOL;
1255 }
1256
1257 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1258 if (isset($tokens[$x]['conditions'][$i]) === false) {
1259 continue;
1260 }
1261
1262 $tokens[$x]['conditions'][$i] = T_ANON_CLASS;
1263 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1264 $type = $tokens[$x]['type'];
1265 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1266 }
1267 }
1268 }
1269
1270 continue;
1271 } else if ($tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) {
1272 if (isset($tokens[$i]['bracket_closer']) === false) {
1273 continue;
1274 }
1275
1276 // Unless there is a variable or a bracket before this token,
1277 // it is the start of an array being defined using the short syntax.
1278 $isShortArray = false;
1279 $allowed = array(
1280 T_CLOSE_SQUARE_BRACKET => T_CLOSE_SQUARE_BRACKET,
1281 T_CLOSE_PARENTHESIS => T_CLOSE_PARENTHESIS,
1282 T_VARIABLE => T_VARIABLE,
1283 T_OBJECT_OPERATOR => T_OBJECT_OPERATOR,
1284 T_STRING => T_STRING,
1285 );
1286
1287 for ($x = ($i - 1); $x > 0; $x--) {
1288 // If we hit a scope opener, the statement has ended
1289 // without finding anything, so it's probably an array
1290 // using PHP 7.1 short list syntax.
1291 if (isset($tokens[$x]['scope_opener']) === true) {
1292 $isShortArray = true;
1293 break;
1294 }
1295
1296 if (isset($tokens[$x]['bracket_opener']) === true
1297 && $x > $tokens[$x]['bracket_opener']
1298 ) {
1299 $x = $tokens[$x]['bracket_opener'];
1300 continue;
1301 }
1302
1303 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1304 if (isset($allowed[$tokens[$x]['code']]) === false) {
1305 $isShortArray = true;
1306 }
1307
1308 break;
1309 }
1310 }//end for
1311
1312 if ($isShortArray === true) {
1313 $tokens[$i]['code'] = T_OPEN_SHORT_ARRAY;
1314 $tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY';
1315
1316 $closer = $tokens[$i]['bracket_closer'];
1317 $tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY;
1318 $tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY';
1319 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1320 $line = $tokens[$i]['line'];
1321 echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL;
1322 $line = $tokens[$closer]['line'];
1323 echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL;
1324 }
1325 }
1326
1327 continue;
1328 } else if ($tokens[$i]['code'] === T_STATIC) {
1329 for ($x = ($i - 1); $x > 0; $x--) {
1330 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1331 break;
1332 }
1333 }
1334
1335 if ($tokens[$x]['code'] === T_INSTANCEOF) {
1336 $tokens[$i]['code'] = T_STRING;
1337 $tokens[$i]['type'] = 'T_STRING';
1338
1339 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1340 $line = $tokens[$i]['line'];
1341 echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL;
1342 }
1343 }
1344
1345 continue;
1346 } else if ($tokens[$i]['code'] === T_ECHO && $tokens[$i]['content'] === '<?=') {
1347 // HHVM tokenizes <?= as T_ECHO but it should be T_OPEN_TAG_WITH_ECHO.
1348 $tokens[$i]['code'] = T_OPEN_TAG_WITH_ECHO;
1349 $tokens[$i]['type'] = 'T_OPEN_TAG_WITH_ECHO';
1350
1351 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1352 $line = $tokens[$i]['line'];
1353 echo "\t* token $i on line $line changed from T_ECHO to T_OPEN_TAG_WITH_ECHO".PHP_EOL;
1354 }
1355 } else if ($tokens[$i]['code'] === T_TRUE
1356 || $tokens[$i]['code'] === T_FALSE
1357 || $tokens[$i]['code'] === T_NULL
1358 ) {
1359 for ($x = ($i + 1); $i < $numTokens; $x++) {
1360 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1361 // Non-whitespace content.
1362 break;
1363 }
1364 }
1365
1366 $context = array(
1367 T_OBJECT_OPERATOR => true,
1368 T_NS_SEPARATOR => true,
1369 T_PAAMAYIM_NEKUDOTAYIM => true,
1370 );
1371 if (isset($context[$tokens[$x]['code']]) === true) {
1372 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1373 $line = $tokens[$i]['line'];
1374 $type = $tokens[$i]['type'];
1375 echo "\t* token $i on line $line changed from $type to T_STRING".PHP_EOL;
1376 }
1377
1378 $tokens[$i]['code'] = T_STRING;
1379 $tokens[$i]['type'] = 'T_STRING';
1380 }
1381 } else if ($tokens[$i]['code'] === T_CONST) {
1382 // Context sensitive keywords support.
1383 for ($x = ($i + 1); $i < $numTokens; $x++) {
1384 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1385 // Non-whitespace content.
1386 break;
1387 }
1388 }
1389
1390 if ($tokens[$x]['code'] !== T_STRING) {
1391 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1392 $line = $tokens[$x]['line'];
1393 $type = $tokens[$x]['type'];
1394 echo "\t* token $x on line $line changed from $type to T_STRING".PHP_EOL;
1395 }
1396
1397 $tokens[$x]['code'] = T_STRING;
1398 $tokens[$x]['type'] = 'T_STRING';
1399 }
1400 }//end if
1401
1402 if (($tokens[$i]['code'] !== T_CASE
1403 && $tokens[$i]['code'] !== T_DEFAULT)
1404 || isset($tokens[$i]['scope_opener']) === false
1405 ) {
1406 // Only interested in CASE and DEFAULT statements from here on in.
1407 continue;
1408 }
1409
1410 $scopeOpener = $tokens[$i]['scope_opener'];
1411 $scopeCloser = $tokens[$i]['scope_closer'];
1412
1413 // If the first char after the opener is a curly brace
1414 // and that brace has been ignored, it is actually
1415 // opening this case statement and the opener and closer are
1416 // probably set incorrectly.
1417 for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) {
1418 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1419 // Non-whitespace content.
1420 break;
1421 }
1422 }
1423
1424 if ($tokens[$x]['code'] === T_CASE || $tokens[$x]['code'] === T_DEFAULT) {
1425 // Special case for multiple CASE statements that share the same
1426 // closer. Because we are going backwards through the file, this next
1427 // CASE/DEFAULT statement is already fixed, so just use its closer
1428 // and don't worry about fixing anything.
1429 $newCloser = $tokens[$x]['scope_closer'];
1430 $tokens[$i]['scope_closer'] = $newCloser;
1431 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1432 $oldType = $tokens[$scopeCloser]['type'];
1433 $newType = $tokens[$newCloser]['type'];
1434 $line = $tokens[$i]['line'];
1435 echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
1436 }
1437
1438 continue;
1439 }
1440
1441 if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET
1442 || isset($tokens[$x]['scope_condition']) === true
1443 ) {
1444 // Not a CASE/DEFAULT with a curly brace opener.
1445 continue;
1446 }
1447
1448 // The closer for this CASE/DEFAULT should be the closing curly brace and
1449 // not whatever it already is. The opener needs to be the opening curly
1450 // brace so everything matches up.
1451 $newCloser = $tokens[$x]['bracket_closer'];
1452 foreach (array($i, $x, $newCloser) as $index) {
1453 $tokens[$index]['scope_condition'] = $i;
1454 $tokens[$index]['scope_opener'] = $x;
1455 $tokens[$index]['scope_closer'] = $newCloser;
1456 }
1457
1458 unset($tokens[$scopeOpener]['scope_condition']);
1459 unset($tokens[$scopeOpener]['scope_opener']);
1460 unset($tokens[$scopeOpener]['scope_closer']);
1461 unset($tokens[$scopeCloser]['scope_condition']);
1462 unset($tokens[$scopeCloser]['scope_opener']);
1463 unset($tokens[$scopeCloser]['scope_closer']);
1464 unset($tokens[$x]['bracket_opener']);
1465 unset($tokens[$x]['bracket_closer']);
1466 unset($tokens[$newCloser]['bracket_opener']);
1467 unset($tokens[$newCloser]['bracket_closer']);
1468 $tokens[$scopeCloser]['conditions'][] = $i;
1469
1470 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1471 $line = $tokens[$i]['line'];
1472 $tokenType = $tokens[$i]['type'];
1473
1474 $oldType = $tokens[$scopeOpener]['type'];
1475 $newType = $tokens[$x]['type'];
1476 echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL;
1477
1478 $oldType = $tokens[$scopeCloser]['type'];
1479 $newType = $tokens[$newCloser]['type'];
1480 echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
1481 }
1482
1483 // Now fix up all the tokens that think they are
1484 // inside the CASE/DEFAULT statement when they are really outside.
1485 for ($x = $newCloser; $x < $scopeCloser; $x++) {
1486 foreach ($tokens[$x]['conditions'] as $num => $oldCond) {
1487 if ($oldCond === $tokens[$i]['code']) {
1488 $oldConditions = $tokens[$x]['conditions'];
1489 unset($tokens[$x]['conditions'][$num]);
1490
1491 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1492 $type = $tokens[$x]['type'];
1493 $oldConds = '';
1494 foreach ($oldConditions as $condition) {
1495 $oldConds .= token_name($condition).',';
1496 }
1497
1498 $oldConds = rtrim($oldConds, ',');
1499
1500 $newConds = '';
1501 foreach ($tokens[$x]['conditions'] as $condition) {
1502 $newConds .= token_name($condition).',';
1503 }
1504
1505 $newConds = rtrim($newConds, ',');
1506
1507 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1508 echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL;
1509 }
1510
1511 break;
1512 }//end if
1513 }//end foreach
1514 }//end for
1515 }//end for
1516
1517 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1518 echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL;
1519 }
1520
1521 }//end processAdditional()
1522
1523
1524 /**
1525 * Takes a token produced from <code>token_get_all()</code> and produces a
1526 * more uniform token.
1527 *
1528 * @param string|array $token The token to convert.
1529 *
1530 * @return array The new token.
1531 */
1532 public static function standardiseToken($token)
1533 {
1534 if (isset($token[1]) === false) {
1535 if (isset(self::$_resolveTokenCache[$token[0]]) === true) {
1536 return self::$_resolveTokenCache[$token[0]];
1537 }
1538 } else {
1539 $cacheKey = null;
1540 if ($token[0] === T_STRING) {
1541 $cacheKey = strtolower($token[1]);
1542 } else if ($token[0] !== T_CURLY_OPEN) {
1543 $cacheKey = $token[0];
1544 }
1545
1546 if ($cacheKey !== null && isset(self::$_resolveTokenCache[$cacheKey]) === true) {
1547 $newToken = self::$_resolveTokenCache[$cacheKey];
1548 $newToken['content'] = $token[1];
1549 return $newToken;
1550 }
1551 }
1552
1553 if (isset($token[1]) === false) {
1554 return self::resolveSimpleToken($token[0]);
1555 }
1556
1557 if ($token[0] === T_STRING) {
1558 switch ($cacheKey) {
1559 case 'false':
1560 $newToken['type'] = 'T_FALSE';
1561 break;
1562 case 'true':
1563 $newToken['type'] = 'T_TRUE';
1564 break;
1565 case 'null':
1566 $newToken['type'] = 'T_NULL';
1567 break;
1568 case 'self':
1569 $newToken['type'] = 'T_SELF';
1570 break;
1571 case 'parent':
1572 $newToken['type'] = 'T_PARENT';
1573 break;
1574 default:
1575 $newToken['type'] = 'T_STRING';
1576 break;
1577 }
1578
1579 $newToken['code'] = constant($newToken['type']);
1580
1581 self::$_resolveTokenCache[$cacheKey] = $newToken;
1582 } else if ($token[0] === T_CURLY_OPEN) {
1583 $newToken = array(
1584 'code' => T_OPEN_CURLY_BRACKET,
1585 'type' => 'T_OPEN_CURLY_BRACKET',
1586 );
1587 } else {
1588 $newToken = array(
1589 'code' => $token[0],
1590 'type' => token_name($token[0]),
1591 );
1592
1593 self::$_resolveTokenCache[$token[0]] = $newToken;
1594 }//end if
1595
1596 $newToken['content'] = $token[1];
1597 return $newToken;
1598
1599 }//end standardiseToken()
1600
1601
1602 /**
1603 * Converts simple tokens into a format that conforms to complex tokens
1604 * produced by token_get_all().
1605 *
1606 * Simple tokens are tokens that are not in array form when produced from
1607 * token_get_all().
1608 *
1609 * @param string $token The simple token to convert.
1610 *
1611 * @return array The new token in array format.
1612 */
1613 public static function resolveSimpleToken($token)
1614 {
1615 $newToken = array();
1616
1617 switch ($token) {
1618 case '{':
1619 $newToken['type'] = 'T_OPEN_CURLY_BRACKET';
1620 break;
1621 case '}':
1622 $newToken['type'] = 'T_CLOSE_CURLY_BRACKET';
1623 break;
1624 case '[':
1625 $newToken['type'] = 'T_OPEN_SQUARE_BRACKET';
1626 break;
1627 case ']':
1628 $newToken['type'] = 'T_CLOSE_SQUARE_BRACKET';
1629 break;
1630 case '(':
1631 $newToken['type'] = 'T_OPEN_PARENTHESIS';
1632 break;
1633 case ')':
1634 $newToken['type'] = 'T_CLOSE_PARENTHESIS';
1635 break;
1636 case ':':
1637 $newToken['type'] = 'T_COLON';
1638 break;
1639 case '.':
1640 $newToken['type'] = 'T_STRING_CONCAT';
1641 break;
1642 case ';':
1643 $newToken['type'] = 'T_SEMICOLON';
1644 break;
1645 case '=':
1646 $newToken['type'] = 'T_EQUAL';
1647 break;
1648 case '*':
1649 $newToken['type'] = 'T_MULTIPLY';
1650 break;
1651 case '/':
1652 $newToken['type'] = 'T_DIVIDE';
1653 break;
1654 case '+':
1655 $newToken['type'] = 'T_PLUS';
1656 break;
1657 case '-':
1658 $newToken['type'] = 'T_MINUS';
1659 break;
1660 case '%':
1661 $newToken['type'] = 'T_MODULUS';
1662 break;
1663 case '^':
1664 $newToken['type'] = 'T_BITWISE_XOR';
1665 break;
1666 case '&':
1667 $newToken['type'] = 'T_BITWISE_AND';
1668 break;
1669 case '|':
1670 $newToken['type'] = 'T_BITWISE_OR';
1671 break;
1672 case '<':
1673 $newToken['type'] = 'T_LESS_THAN';
1674 break;
1675 case '>':
1676 $newToken['type'] = 'T_GREATER_THAN';
1677 break;
1678 case '!':
1679 $newToken['type'] = 'T_BOOLEAN_NOT';
1680 break;
1681 case ',':
1682 $newToken['type'] = 'T_COMMA';
1683 break;
1684 case '@':
1685 $newToken['type'] = 'T_ASPERAND';
1686 break;
1687 case '$':
1688 $newToken['type'] = 'T_DOLLAR';
1689 break;
1690 case '`':
1691 $newToken['type'] = 'T_BACKTICK';
1692 break;
1693 default:
1694 $newToken['type'] = 'T_NONE';
1695 break;
1696 }//end switch
1697
1698 $newToken['code'] = constant($newToken['type']);
1699 $newToken['content'] = $token;
1700
1701 self::$_resolveTokenCache[$token] = $newToken;
1702 return $newToken;
1703
1704 }//end resolveSimpleToken()
1705
1706
1707 }//end class