Chris@17: Chris@17: * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600) Chris@17: * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence Chris@17: */ Chris@17: Chris@17: namespace PHP_CodeSniffer\Tokenizers; Chris@17: Chris@17: use PHP_CodeSniffer\Util; Chris@17: Chris@17: class PHP extends Tokenizer Chris@17: { Chris@17: Chris@17: Chris@17: /** Chris@17: * A list of tokens that are allowed to open a scope. Chris@17: * Chris@17: * This array also contains information about what kind of token the scope Chris@17: * opener uses to open and close the scope, if the token strictly requires Chris@17: * an opener, if the token can share a scope closer, and who it can be shared Chris@17: * with. An example of a token that shares a scope closer is a CASE scope. Chris@17: * Chris@17: * @var array Chris@17: */ Chris@17: public $scopeOpeners = [ Chris@17: T_IF => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDIF => T_ENDIF, Chris@17: T_ELSE => T_ELSE, Chris@17: T_ELSEIF => T_ELSEIF, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [ Chris@17: T_ELSE => T_ELSE, Chris@17: T_ELSEIF => T_ELSEIF, Chris@17: ], Chris@17: ], Chris@17: T_TRY => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_CATCH => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_FINALLY => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_ELSE => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDIF => T_ENDIF, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [ Chris@17: T_IF => T_IF, Chris@17: T_ELSEIF => T_ELSEIF, Chris@17: ], Chris@17: ], Chris@17: T_ELSEIF => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDIF => T_ENDIF, Chris@17: T_ELSE => T_ELSE, Chris@17: T_ELSEIF => T_ELSEIF, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [ Chris@17: T_IF => T_IF, Chris@17: T_ELSE => T_ELSE, Chris@17: ], Chris@17: ], Chris@17: T_FOR => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDFOR => T_ENDFOR, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_FOREACH => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDFOREACH => T_ENDFOREACH, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_INTERFACE => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_FUNCTION => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_CLASS => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_TRAIT => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_USE => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_DECLARE => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_NAMESPACE => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_WHILE => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDWHILE => T_ENDWHILE, Chris@17: ], Chris@17: 'strict' => false, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_DO => [ Chris@17: 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET], Chris@17: 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_SWITCH => [ Chris@17: 'start' => [ Chris@17: T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET, Chris@17: T_COLON => T_COLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDSWITCH => T_ENDSWITCH, Chris@17: ], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_CASE => [ Chris@17: 'start' => [ Chris@17: T_COLON => T_COLON, Chris@17: T_SEMICOLON => T_SEMICOLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_BREAK => T_BREAK, Chris@17: T_RETURN => T_RETURN, Chris@17: T_CONTINUE => T_CONTINUE, Chris@17: T_THROW => T_THROW, Chris@17: T_EXIT => T_EXIT, Chris@17: ], Chris@17: 'strict' => true, Chris@17: 'shared' => true, Chris@17: 'with' => [ Chris@17: T_DEFAULT => T_DEFAULT, Chris@17: T_CASE => T_CASE, Chris@17: T_SWITCH => T_SWITCH, Chris@17: ], Chris@17: ], Chris@17: T_DEFAULT => [ Chris@17: 'start' => [ Chris@17: T_COLON => T_COLON, Chris@17: T_SEMICOLON => T_SEMICOLON, Chris@17: ], Chris@17: 'end' => [ Chris@17: T_BREAK => T_BREAK, Chris@17: T_RETURN => T_RETURN, Chris@17: T_CONTINUE => T_CONTINUE, Chris@17: T_THROW => T_THROW, Chris@17: T_EXIT => T_EXIT, Chris@17: ], Chris@17: 'strict' => true, Chris@17: 'shared' => true, Chris@17: 'with' => [ Chris@17: T_CASE => T_CASE, Chris@17: T_SWITCH => T_SWITCH, Chris@17: ], Chris@17: ], Chris@17: T_START_HEREDOC => [ Chris@17: 'start' => [T_START_HEREDOC => T_START_HEREDOC], Chris@17: 'end' => [T_END_HEREDOC => T_END_HEREDOC], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: T_START_NOWDOC => [ Chris@17: 'start' => [T_START_NOWDOC => T_START_NOWDOC], Chris@17: 'end' => [T_END_NOWDOC => T_END_NOWDOC], Chris@17: 'strict' => true, Chris@17: 'shared' => false, Chris@17: 'with' => [], Chris@17: ], Chris@17: ]; Chris@17: Chris@17: /** Chris@17: * A list of tokens that end the scope. Chris@17: * Chris@17: * This array is just a unique collection of the end tokens Chris@18: * from the scopeOpeners array. The data is duplicated here to Chris@17: * save time during parsing of the file. Chris@17: * Chris@17: * @var array Chris@17: */ Chris@17: public $endScopeTokens = [ Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_ENDIF => T_ENDIF, Chris@17: T_ENDFOR => T_ENDFOR, Chris@17: T_ENDFOREACH => T_ENDFOREACH, Chris@17: T_ENDWHILE => T_ENDWHILE, Chris@17: T_ENDSWITCH => T_ENDSWITCH, Chris@17: T_BREAK => T_BREAK, Chris@17: T_END_HEREDOC => T_END_HEREDOC, Chris@17: ]; Chris@17: Chris@17: /** Chris@17: * Known lengths of tokens. Chris@17: * Chris@17: * @var array Chris@17: */ Chris@17: public $knownLengths = [ Chris@17: T_ABSTRACT => 8, Chris@17: T_AND_EQUAL => 2, Chris@17: T_ARRAY => 5, Chris@17: T_AS => 2, Chris@17: T_BOOLEAN_AND => 2, Chris@17: T_BOOLEAN_OR => 2, Chris@17: T_BREAK => 5, Chris@17: T_CALLABLE => 8, Chris@17: T_CASE => 4, Chris@17: T_CATCH => 5, Chris@17: T_CLASS => 5, Chris@17: T_CLASS_C => 9, Chris@17: T_CLONE => 5, Chris@17: T_CONCAT_EQUAL => 2, Chris@17: T_CONST => 5, Chris@17: T_CONTINUE => 8, Chris@17: T_CURLY_OPEN => 2, Chris@17: T_DEC => 2, Chris@17: T_DECLARE => 7, Chris@17: T_DEFAULT => 7, Chris@17: T_DIR => 7, Chris@17: T_DIV_EQUAL => 2, Chris@17: T_DO => 2, Chris@17: T_DOLLAR_OPEN_CURLY_BRACES => 2, Chris@17: T_DOUBLE_ARROW => 2, Chris@17: T_DOUBLE_COLON => 2, Chris@17: T_ECHO => 4, Chris@17: T_ELSE => 4, Chris@17: T_ELSEIF => 6, Chris@17: T_EMPTY => 5, Chris@17: T_ENDDECLARE => 10, Chris@17: T_ENDFOR => 6, Chris@17: T_ENDFOREACH => 10, Chris@17: T_ENDIF => 5, Chris@17: T_ENDSWITCH => 9, Chris@17: T_ENDWHILE => 8, Chris@17: T_EVAL => 4, Chris@17: T_EXTENDS => 7, Chris@17: T_FILE => 8, Chris@17: T_FINAL => 5, Chris@17: T_FINALLY => 7, Chris@17: T_FOR => 3, Chris@17: T_FOREACH => 7, Chris@17: T_FUNCTION => 8, Chris@17: T_FUNC_C => 12, Chris@17: T_GLOBAL => 6, Chris@17: T_GOTO => 4, Chris@17: T_HALT_COMPILER => 15, Chris@17: T_IF => 2, Chris@17: T_IMPLEMENTS => 10, Chris@17: T_INC => 2, Chris@17: T_INCLUDE => 7, Chris@17: T_INCLUDE_ONCE => 12, Chris@17: T_INSTANCEOF => 10, Chris@17: T_INSTEADOF => 9, Chris@17: T_INTERFACE => 9, Chris@17: T_ISSET => 5, Chris@17: T_IS_EQUAL => 2, Chris@17: T_IS_GREATER_OR_EQUAL => 2, Chris@17: T_IS_IDENTICAL => 3, Chris@17: T_IS_NOT_EQUAL => 2, Chris@17: T_IS_NOT_IDENTICAL => 3, Chris@17: T_IS_SMALLER_OR_EQUAL => 2, Chris@17: T_LINE => 8, Chris@17: T_LIST => 4, Chris@17: T_LOGICAL_AND => 3, Chris@17: T_LOGICAL_OR => 2, Chris@17: T_LOGICAL_XOR => 3, Chris@17: T_METHOD_C => 10, Chris@17: T_MINUS_EQUAL => 2, Chris@17: T_POW_EQUAL => 3, Chris@17: T_MOD_EQUAL => 2, Chris@17: T_MUL_EQUAL => 2, Chris@17: T_NAMESPACE => 9, Chris@17: T_NS_C => 13, Chris@17: T_NS_SEPARATOR => 1, Chris@17: T_NEW => 3, Chris@17: T_OBJECT_OPERATOR => 2, Chris@17: T_OPEN_TAG_WITH_ECHO => 3, Chris@17: T_OR_EQUAL => 2, Chris@17: T_PLUS_EQUAL => 2, Chris@17: T_PRINT => 5, Chris@17: T_PRIVATE => 7, Chris@17: T_PUBLIC => 6, Chris@17: T_PROTECTED => 9, Chris@17: T_REQUIRE => 7, Chris@17: T_REQUIRE_ONCE => 12, Chris@17: T_RETURN => 6, Chris@17: T_STATIC => 6, Chris@17: T_SWITCH => 6, Chris@17: T_THROW => 5, Chris@17: T_TRAIT => 5, Chris@17: T_TRAIT_C => 9, Chris@17: T_TRY => 3, Chris@17: T_UNSET => 5, Chris@17: T_USE => 3, Chris@17: T_VAR => 3, Chris@17: T_WHILE => 5, Chris@17: T_XOR_EQUAL => 2, Chris@17: T_YIELD => 5, Chris@17: T_OPEN_CURLY_BRACKET => 1, Chris@17: T_CLOSE_CURLY_BRACKET => 1, Chris@17: T_OPEN_SQUARE_BRACKET => 1, Chris@17: T_CLOSE_SQUARE_BRACKET => 1, Chris@17: T_OPEN_PARENTHESIS => 1, Chris@17: T_CLOSE_PARENTHESIS => 1, Chris@17: T_COLON => 1, Chris@17: T_STRING_CONCAT => 1, Chris@17: T_INLINE_THEN => 1, Chris@17: T_INLINE_ELSE => 1, Chris@17: T_NULLABLE => 1, Chris@17: T_NULL => 4, Chris@17: T_FALSE => 5, Chris@17: T_TRUE => 4, Chris@17: T_SEMICOLON => 1, Chris@17: T_EQUAL => 1, Chris@17: T_MULTIPLY => 1, Chris@17: T_DIVIDE => 1, Chris@17: T_PLUS => 1, Chris@17: T_MINUS => 1, Chris@17: T_MODULUS => 1, Chris@17: T_POW => 2, Chris@17: T_SPACESHIP => 3, Chris@17: T_COALESCE => 2, Chris@17: T_COALESCE_EQUAL => 3, Chris@17: T_BITWISE_AND => 1, Chris@17: T_BITWISE_OR => 1, Chris@17: T_BITWISE_XOR => 1, Chris@17: T_SL => 2, Chris@17: T_SR => 2, Chris@17: T_SL_EQUAL => 3, Chris@17: T_SR_EQUAL => 3, Chris@17: T_GREATER_THAN => 1, Chris@17: T_LESS_THAN => 1, Chris@17: T_BOOLEAN_NOT => 1, Chris@17: T_SELF => 4, Chris@17: T_PARENT => 6, Chris@17: T_COMMA => 1, Chris@17: T_THIS => 4, Chris@17: T_CLOSURE => 8, Chris@17: T_BACKTICK => 1, Chris@17: T_OPEN_SHORT_ARRAY => 1, Chris@17: T_CLOSE_SHORT_ARRAY => 1, Chris@17: ]; Chris@17: Chris@17: Chris@17: /** Chris@17: * A cache of different token types, resolved into arrays. Chris@17: * Chris@17: * @var array Chris@17: * @see standardiseToken() Chris@17: */ Chris@17: private static $resolveTokenCache = []; Chris@17: Chris@17: Chris@17: /** Chris@17: * Creates an array of tokens when given some PHP code. Chris@17: * Chris@17: * Starts by using token_get_all() but does a lot of extra processing Chris@17: * to insert information about the context of the token. Chris@17: * Chris@17: * @param string $string The string to tokenize. Chris@17: * Chris@17: * @return array Chris@17: */ Chris@17: protected function tokenize($string) Chris@17: { Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t*** START PHP TOKENIZING ***".PHP_EOL; Chris@17: $isWin = false; Chris@17: if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') { Chris@17: $isWin = true; Chris@17: } Chris@17: } Chris@17: Chris@17: $tokens = @token_get_all($string); Chris@17: $finalTokens = []; Chris@17: Chris@17: $newStackPtr = 0; Chris@17: $numTokens = count($tokens); Chris@17: $lastNotEmptyToken = 0; Chris@17: Chris@17: $insideInlineIf = []; Chris@17: $insideUseGroup = false; Chris@17: Chris@17: $commentTokenizer = new Comment(); Chris@17: Chris@17: for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { Chris@17: // Special case for tokens we have needed to blank out. Chris@17: if ($tokens[$stackPtr] === null) { Chris@17: continue; Chris@17: } Chris@17: Chris@17: $token = (array) $tokens[$stackPtr]; Chris@17: $tokenIsArray = isset($token[1]); Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: if ($tokenIsArray === true) { Chris@17: $type = Util\Tokens::tokenName($token[0]); Chris@17: $content = Util\Common::prepareForOutput($token[1]); Chris@17: } else { Chris@17: $newToken = self::resolveSimpleToken($token[0]); Chris@17: $type = $newToken['type']; Chris@17: $content = Util\Common::prepareForOutput($token[0]); Chris@17: } Chris@17: Chris@17: echo "\tProcess token "; Chris@17: if ($tokenIsArray === true) { Chris@17: echo "[$stackPtr]"; Chris@17: } else { Chris@17: echo " $stackPtr "; Chris@17: } Chris@17: Chris@17: echo ": $type => $content"; Chris@17: }//end if Chris@17: Chris@18: if ($newStackPtr > 0 Chris@18: && isset(Util\Tokens::$emptyTokens[$finalTokens[($newStackPtr - 1)]['code']]) === false Chris@18: ) { Chris@17: $lastNotEmptyToken = ($newStackPtr - 1); Chris@17: } Chris@17: Chris@17: /* Chris@17: If we are using \r\n newline characters, the \r and \n are sometimes Chris@17: split over two tokens. This normally occurs after comments. We need Chris@17: to merge these two characters together so that our line endings are Chris@17: consistent for all lines. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true && substr($token[1], -1) === "\r") { Chris@17: if (isset($tokens[($stackPtr + 1)]) === true Chris@17: && is_array($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][1][0] === "\n" Chris@17: ) { Chris@17: $token[1] .= "\n"; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: if ($isWin === true) { Chris@17: echo '\n'; Chris@17: } else { Chris@17: echo "\033[30;1m\\n\033[0m"; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($tokens[($stackPtr + 1)][1] === "\n") { Chris@17: // This token's content has been merged into the previous, Chris@17: // so we can skip it. Chris@17: $tokens[($stackPtr + 1)] = ''; Chris@17: } else { Chris@17: $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1); Chris@17: } Chris@17: } Chris@17: }//end if Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo PHP_EOL; Chris@17: } Chris@17: Chris@17: /* Chris@17: Parse doc blocks into something that can be easily iterated over. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && ($token[0] === T_DOC_COMMENT Chris@17: || ($token[0] === T_COMMENT && strpos($token[1], '/**') === 0)) Chris@17: ) { Chris@17: $commentTokens = $commentTokenizer->tokenizeString($token[1], $this->eolChar, $newStackPtr); Chris@17: foreach ($commentTokens as $commentToken) { Chris@17: $finalTokens[$newStackPtr] = $commentToken; Chris@17: $newStackPtr++; Chris@17: } Chris@17: Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: If this is a double quoted string, PHP will tokenize the whole Chris@17: thing which causes problems with the scope map when braces are Chris@17: within the string. So we need to merge the tokens together to Chris@17: provide a single string. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false && ($token[0] === '"' || $token[0] === 'b"')) { Chris@17: // Binary casts need a special token. Chris@17: if ($token[0] === 'b"') { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'code' => T_BINARY_CAST, Chris@17: 'type' => 'T_BINARY_CAST', Chris@17: 'content' => 'b', Chris@17: ]; Chris@17: $newStackPtr++; Chris@17: } Chris@17: Chris@17: $tokenContent = '"'; Chris@17: $nestedVars = []; Chris@17: for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { Chris@17: $subToken = (array) $tokens[$i]; Chris@17: $subTokenIsArray = isset($subToken[1]); Chris@17: Chris@17: if ($subTokenIsArray === true) { Chris@17: $tokenContent .= $subToken[1]; Chris@17: if ($subToken[1] === '{' Chris@17: && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE Chris@17: ) { Chris@17: $nestedVars[] = $i; Chris@17: } Chris@17: } else { Chris@17: $tokenContent .= $subToken[0]; Chris@17: if ($subToken[0] === '}') { Chris@17: array_pop($nestedVars); Chris@17: } Chris@17: } Chris@17: Chris@17: if ($subTokenIsArray === false Chris@17: && $subToken[0] === '"' Chris@17: && empty($nestedVars) === true Chris@17: ) { Chris@17: // We found the other end of the double quoted string. Chris@17: break; Chris@17: } Chris@17: }//end for Chris@17: Chris@17: $stackPtr = $i; Chris@17: Chris@17: // Convert each line within the double quoted string to a Chris@17: // new token, so it conforms with other multiple line tokens. Chris@17: $tokenLines = explode($this->eolChar, $tokenContent); Chris@17: $numLines = count($tokenLines); Chris@17: $newToken = []; Chris@17: Chris@17: for ($j = 0; $j < $numLines; $j++) { Chris@17: $newToken['content'] = $tokenLines[$j]; Chris@17: if ($j === ($numLines - 1)) { Chris@17: if ($tokenLines[$j] === '') { Chris@17: break; Chris@17: } Chris@17: } else { Chris@17: $newToken['content'] .= $this->eolChar; Chris@17: } Chris@17: Chris@17: $newToken['code'] = T_DOUBLE_QUOTED_STRING; Chris@17: $newToken['type'] = 'T_DOUBLE_QUOTED_STRING'; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: $newStackPtr++; Chris@17: } Chris@17: Chris@17: // Continue, as we're done with this token. Chris@17: continue; Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: Detect binary casting and assign the casts their own token. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && $token[0] === T_CONSTANT_ENCAPSED_STRING Chris@17: && (substr($token[1], 0, 2) === 'b"' Chris@17: || substr($token[1], 0, 2) === "b'") Chris@17: ) { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'code' => T_BINARY_CAST, Chris@17: 'type' => 'T_BINARY_CAST', Chris@17: 'content' => 'b', Chris@17: ]; Chris@17: $newStackPtr++; Chris@17: $token[1] = substr($token[1], 1); Chris@17: } Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && $token[0] === T_STRING_CAST Chris@17: && preg_match('`^\(\s*binary\s*\)$`i', $token[1]) === 1 Chris@17: ) { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'code' => T_BINARY_CAST, Chris@17: 'type' => 'T_BINARY_CAST', Chris@17: 'content' => $token[1], Chris@17: ]; Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: If this is a heredoc, PHP will tokenize the whole Chris@17: thing which causes problems when heredocs don't Chris@17: contain real PHP code, which is almost never. Chris@17: We want to leave the start and end heredoc tokens Chris@17: alone though. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) { Chris@17: // Add the start heredoc token to the final array. Chris@17: $finalTokens[$newStackPtr] = self::standardiseToken($token); Chris@17: Chris@17: // Check if this is actually a nowdoc and use a different token Chris@17: // to help the sniffs. Chris@17: $nowdoc = false; Chris@17: if (strpos($token[1], "'") !== false) { Chris@17: $finalTokens[$newStackPtr]['code'] = T_START_NOWDOC; Chris@17: $finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC'; Chris@17: $nowdoc = true; Chris@17: } Chris@17: Chris@17: $tokenContent = ''; Chris@17: for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { Chris@17: $subTokenIsArray = is_array($tokens[$i]); Chris@17: if ($subTokenIsArray === true Chris@17: && $tokens[$i][0] === T_END_HEREDOC Chris@17: ) { Chris@17: // We found the other end of the heredoc. Chris@17: break; Chris@17: } Chris@17: Chris@17: if ($subTokenIsArray === true) { Chris@17: $tokenContent .= $tokens[$i][1]; Chris@17: } else { Chris@17: $tokenContent .= $tokens[$i]; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($i === $numTokens) { Chris@17: // We got to the end of the file and never Chris@17: // found the closing token, so this probably wasn't Chris@17: // a heredoc. Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $type = $finalTokens[$newStackPtr]['type']; Chris@17: echo "\t\t* failed to find the end of the here/nowdoc".PHP_EOL; Chris@17: echo "\t\t* token $stackPtr changed from $type to T_STRING".PHP_EOL; Chris@17: } Chris@17: Chris@17: $finalTokens[$newStackPtr]['code'] = T_STRING; Chris@17: $finalTokens[$newStackPtr]['type'] = 'T_STRING'; Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: $stackPtr = $i; Chris@17: $newStackPtr++; Chris@17: Chris@17: // Convert each line within the heredoc to a Chris@17: // new token, so it conforms with other multiple line tokens. Chris@17: $tokenLines = explode($this->eolChar, $tokenContent); Chris@17: $numLines = count($tokenLines); Chris@17: $newToken = []; Chris@17: Chris@17: for ($j = 0; $j < $numLines; $j++) { Chris@17: $newToken['content'] = $tokenLines[$j]; Chris@17: if ($j === ($numLines - 1)) { Chris@17: if ($tokenLines[$j] === '') { Chris@17: break; Chris@17: } Chris@17: } else { Chris@17: $newToken['content'] .= $this->eolChar; Chris@17: } Chris@17: Chris@17: if ($nowdoc === true) { Chris@17: $newToken['code'] = T_NOWDOC; Chris@17: $newToken['type'] = 'T_NOWDOC'; Chris@17: } else { Chris@17: $newToken['code'] = T_HEREDOC; Chris@17: $newToken['type'] = 'T_HEREDOC'; Chris@17: } Chris@17: Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: $newStackPtr++; Chris@17: }//end for Chris@17: Chris@17: // Add the end heredoc token to the final array. Chris@17: $finalTokens[$newStackPtr] = self::standardiseToken($tokens[$stackPtr]); Chris@17: Chris@17: if ($nowdoc === true) { Chris@17: $finalTokens[$newStackPtr]['code'] = T_END_NOWDOC; Chris@17: $finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC'; Chris@17: } Chris@17: Chris@17: $newStackPtr++; Chris@17: Chris@17: // Continue, as we're done with this token. Chris@17: continue; Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: Before PHP 7.0, the "yield from" was tokenized as Chris@17: T_YIELD, T_WHITESPACE and T_STRING. So look for Chris@17: and change this token in earlier versions. Chris@17: */ Chris@17: Chris@17: if (PHP_VERSION_ID < 70000 Chris@17: && PHP_VERSION_ID >= 50500 Chris@17: && $tokenIsArray === true Chris@17: && $token[0] === T_YIELD Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && isset($tokens[($stackPtr + 2)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === T_WHITESPACE Chris@17: && $tokens[($stackPtr + 2)][0] === T_STRING Chris@17: && strtolower($tokens[($stackPtr + 2)][1]) === 'from' Chris@17: ) { Chris@17: // Could be multi-line, so just the token stack. Chris@18: $token[0] = T_YIELD_FROM; Chris@18: $token[1] .= $tokens[($stackPtr + 1)][1].$tokens[($stackPtr + 2)][1]; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: for ($i = ($stackPtr + 1); $i <= ($stackPtr + 2); $i++) { Chris@17: $type = Util\Tokens::tokenName($tokens[$i][0]); Chris@17: $content = Util\Common::prepareForOutput($tokens[$i][1]); Chris@17: echo "\t\t* token $i merged into T_YIELD_FROM; was: $type => $content".PHP_EOL; Chris@17: } Chris@17: } Chris@17: Chris@17: $tokens[($stackPtr + 1)] = null; Chris@17: $tokens[($stackPtr + 2)] = null; Chris@17: } Chris@17: Chris@17: /* Chris@17: Before PHP 5.5, the yield keyword was tokenized as Chris@17: T_STRING. So look for and change this token in Chris@17: earlier versions. Chris@17: Checks also if it is just "yield" or "yield from". Chris@17: */ Chris@17: Chris@17: if (PHP_VERSION_ID < 50500 Chris@17: && $tokenIsArray === true Chris@17: && $token[0] === T_STRING Chris@17: && strtolower($token[1]) === 'yield' Chris@17: ) { Chris@17: if (isset($tokens[($stackPtr + 1)]) === true Chris@17: && isset($tokens[($stackPtr + 2)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === T_WHITESPACE Chris@17: && $tokens[($stackPtr + 2)][0] === T_STRING Chris@17: && strtolower($tokens[($stackPtr + 2)][1]) === 'from' Chris@17: ) { Chris@17: // Could be multi-line, so just just the token stack. Chris@18: $token[0] = T_YIELD_FROM; Chris@18: $token[1] .= $tokens[($stackPtr + 1)][1].$tokens[($stackPtr + 2)][1]; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: for ($i = ($stackPtr + 1); $i <= ($stackPtr + 2); $i++) { Chris@17: $type = Util\Tokens::tokenName($tokens[$i][0]); Chris@17: $content = Util\Common::prepareForOutput($tokens[$i][1]); Chris@17: echo "\t\t* token $i merged into T_YIELD_FROM; was: $type => $content".PHP_EOL; Chris@17: } Chris@17: } Chris@17: Chris@17: $tokens[($stackPtr + 1)] = null; Chris@17: $tokens[($stackPtr + 2)] = null; Chris@17: } else { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_YIELD; Chris@17: $newToken['type'] = 'T_YIELD'; Chris@17: $newToken['content'] = $token[1]; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: }//end if Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: Before PHP 5.6, the ... operator was tokenized as three Chris@17: T_STRING_CONCAT tokens in a row. So look for and combine Chris@17: these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false Chris@17: && $token[0] === '.' Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && isset($tokens[($stackPtr + 2)]) === true Chris@17: && $tokens[($stackPtr + 1)] === '.' Chris@17: && $tokens[($stackPtr + 2)] === '.' Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_ELLIPSIS; Chris@17: $newToken['type'] = 'T_ELLIPSIS'; Chris@17: $newToken['content'] = '...'; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr += 2; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: Before PHP 5.6, the ** operator was tokenized as two Chris@17: T_MULTIPLY tokens in a row. So look for and combine Chris@17: these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false Chris@17: && $token[0] === '*' Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)] === '*' Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_POW; Chris@17: $newToken['type'] = 'T_POW'; Chris@17: $newToken['content'] = '**'; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: Before PHP 5.6, the **= operator was tokenized as Chris@17: T_MULTIPLY followed by T_MUL_EQUAL. So look for and combine Chris@17: these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false Chris@17: && $token[0] === '*' Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && is_array($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][1] === '*=' Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_POW_EQUAL; Chris@17: $newToken['type'] = 'T_POW_EQUAL'; Chris@17: $newToken['content'] = '**='; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: Before PHP 7, the ??= operator was tokenized as Chris@17: T_INLINE_THEN, T_INLINE_THEN, T_EQUAL. Chris@17: Between PHP 7.0 and 7.2, the ??= operator was tokenized as Chris@17: T_COALESCE, T_EQUAL. Chris@17: So look for and combine these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if (($tokenIsArray === false Chris@17: && $token[0] === '?' Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === '?' Chris@17: && isset($tokens[($stackPtr + 2)]) === true Chris@17: && $tokens[($stackPtr + 2)][0] === '=') Chris@17: || ($tokenIsArray === true Chris@17: && $token[0] === T_COALESCE Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === '=') Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_COALESCE_EQUAL; Chris@17: $newToken['type'] = 'T_COALESCE_EQUAL'; Chris@17: $newToken['content'] = '??='; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: Chris@17: if ($tokenIsArray === false) { Chris@17: // Pre PHP 7. Chris@17: $stackPtr++; Chris@17: } Chris@17: Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: Before PHP 7, the ?? operator was tokenized as Chris@17: T_INLINE_THEN followed by T_INLINE_THEN. Chris@17: So look for and combine these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false Chris@17: && $token[0] === '?' Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === '?' Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_COALESCE; Chris@17: $newToken['type'] = 'T_COALESCE'; Chris@17: $newToken['content'] = '??'; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: Convert ? to T_NULLABLE OR T_INLINE_THEN Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === false && $token[0] === '?') { Chris@17: $newToken = []; Chris@17: $newToken['content'] = '?'; Chris@17: Chris@17: $prevNonEmpty = null; Chris@17: for ($i = ($stackPtr - 1); $i >= 0; $i--) { Chris@17: if (is_array($tokens[$i]) === true) { Chris@17: $tokenType = $tokens[$i][0]; Chris@17: } else { Chris@17: $tokenType = $tokens[$i]; Chris@17: } Chris@17: Chris@17: if ($prevNonEmpty === null Chris@17: && isset(Util\Tokens::$emptyTokens[$tokenType]) === false Chris@17: ) { Chris@17: // Found the previous non-empty token. Chris@17: if ($tokenType === ':' || $tokenType === ',') { Chris@17: $newToken['code'] = T_NULLABLE; Chris@17: $newToken['type'] = 'T_NULLABLE'; Chris@17: break; Chris@17: } Chris@17: Chris@17: $prevNonEmpty = $tokenType; Chris@17: } Chris@17: Chris@17: if ($tokenType === T_FUNCTION) { Chris@17: $newToken['code'] = T_NULLABLE; Chris@17: $newToken['type'] = 'T_NULLABLE'; Chris@17: break; Chris@18: } else if (in_array($tokenType, [T_OPEN_TAG, T_OPEN_TAG_WITH_ECHO, '=', '{', ';'], true) === true) { Chris@17: $newToken['code'] = T_INLINE_THEN; Chris@17: $newToken['type'] = 'T_INLINE_THEN'; Chris@17: Chris@17: $insideInlineIf[] = $stackPtr; Chris@17: break; Chris@17: } Chris@17: }//end for Chris@17: Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: Tokens after a double colon may be look like scope openers, Chris@17: such as when writing code like Foo::NAMESPACE, but they are Chris@17: only ever variables or strings. Chris@17: */ Chris@17: Chris@17: if ($stackPtr > 1 Chris@17: && (is_array($tokens[($stackPtr - 1)]) === true Chris@17: && $tokens[($stackPtr - 1)][0] === T_PAAMAYIM_NEKUDOTAYIM) Chris@17: && $tokenIsArray === true Chris@17: && $token[0] !== T_STRING Chris@17: && $token[0] !== T_VARIABLE Chris@17: && $token[0] !== T_DOLLAR Chris@17: && isset(Util\Tokens::$emptyTokens[$token[0]]) === false Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_STRING; Chris@17: $newToken['type'] = 'T_STRING'; Chris@17: $newToken['content'] = $token[1]; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: The string-like token after a function keyword should always be Chris@17: tokenized as T_STRING even if it appears to be a different token, Chris@17: such as when writing code like: function default(): foo Chris@17: so go forward and change the token type before it is processed. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && $token[0] === T_FUNCTION Chris@17: && $finalTokens[$lastNotEmptyToken]['code'] !== T_USE Chris@17: ) { Chris@17: for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === false Chris@17: || isset(Util\Tokens::$emptyTokens[$tokens[$x][0]]) === false Chris@17: ) { Chris@17: // Non-empty content. Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($x < $numTokens && is_array($tokens[$x]) === true) { Chris@17: $tokens[$x][0] = T_STRING; Chris@17: } Chris@17: Chris@17: /* Chris@17: This is a special condition for T_ARRAY tokens used for Chris@17: function return types. We want to keep the parenthesis map clean, Chris@17: so let's tag these tokens as T_STRING. Chris@17: */ Chris@17: Chris@17: // Go looking for the colon to start the return type hint. Chris@17: // Start by finding the closing parenthesis of the function. Chris@17: $parenthesisStack = []; Chris@17: $parenthesisCloser = false; Chris@17: for ($x = ($stackPtr + 1); $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === false && $tokens[$x] === '(') { Chris@17: $parenthesisStack[] = $x; Chris@17: } else if (is_array($tokens[$x]) === false && $tokens[$x] === ')') { Chris@17: array_pop($parenthesisStack); Chris@17: if (empty($parenthesisStack) === true) { Chris@17: $parenthesisCloser = $x; Chris@17: break; Chris@17: } Chris@17: } Chris@17: } Chris@17: Chris@17: if ($parenthesisCloser !== false) { Chris@17: for ($x = ($parenthesisCloser + 1); $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === false Chris@17: || isset(Util\Tokens::$emptyTokens[$tokens[$x][0]]) === false Chris@17: ) { Chris@17: // Non-empty content. Chris@17: if (is_array($tokens[$x]) === true && $tokens[$x][0] === T_USE) { Chris@17: // Found a use statements, so search ahead for the closing parenthesis. Chris@17: for ($x += 1; $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === false && $tokens[$x] === ')') { Chris@17: continue(2); Chris@17: } Chris@17: } Chris@17: } Chris@17: Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if (isset($tokens[$x]) === true Chris@17: && is_array($tokens[$x]) === false Chris@17: && $tokens[$x] === ':' Chris@17: ) { Chris@17: $allowed = [ Chris@17: T_STRING => T_STRING, Chris@17: T_ARRAY => T_ARRAY, Chris@17: T_CALLABLE => T_CALLABLE, Chris@17: T_SELF => T_SELF, Chris@17: T_PARENT => T_PARENT, Chris@17: T_NS_SEPARATOR => T_NS_SEPARATOR, Chris@17: ]; Chris@17: Chris@17: $allowed += Util\Tokens::$emptyTokens; Chris@17: Chris@17: // Find the start of the return type. Chris@17: for ($x += 1; $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === true Chris@17: && isset(Util\Tokens::$emptyTokens[$tokens[$x][0]]) === true Chris@17: ) { Chris@18: // Whitespace or comments before the return type. Chris@17: continue; Chris@17: } Chris@17: Chris@17: if (is_array($tokens[$x]) === false && $tokens[$x] === '?') { Chris@17: // Found a nullable operator, so skip it. Chris@17: // But also covert the token to save the tokenizer Chris@17: // a bit of time later on. Chris@17: $tokens[$x] = [ Chris@17: T_NULLABLE, Chris@17: '?', Chris@17: ]; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t\t* token $x changed from ? to T_NULLABLE".PHP_EOL; Chris@17: } Chris@17: Chris@17: continue; Chris@17: } Chris@17: Chris@17: break; Chris@17: }//end for Chris@17: Chris@17: // Any T_ARRAY tokens we find between here and the next Chris@17: // token that can't be part of the return type need to be Chris@17: // converted to T_STRING tokens. Chris@17: for ($x; $x < $numTokens; $x++) { Chris@17: if (is_array($tokens[$x]) === false || isset($allowed[$tokens[$x][0]]) === false) { Chris@17: break; Chris@17: } else if ($tokens[$x][0] === T_ARRAY) { Chris@17: $tokens[$x][0] = T_STRING; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t\t* token $x changed from T_ARRAY to T_STRING".PHP_EOL; Chris@17: } Chris@17: } Chris@17: } Chris@17: }//end if Chris@17: }//end if Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: Before PHP 7, the <=> operator was tokenized as Chris@17: T_IS_SMALLER_OR_EQUAL followed by T_GREATER_THAN. Chris@17: So look for and combine these tokens in earlier versions. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && $token[0] === T_IS_SMALLER_OR_EQUAL Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)][0] === '>' Chris@17: ) { Chris@17: $newToken = []; Chris@17: $newToken['code'] = T_SPACESHIP; Chris@17: $newToken['type'] = 'T_SPACESHIP'; Chris@17: $newToken['content'] = '<=>'; Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: continue; Chris@17: } Chris@17: Chris@17: /* Chris@17: PHP doesn't assign a token to goto labels, so we have to. Chris@17: These are just string tokens with a single colon after them. Double Chris@17: colons are already tokenized and so don't interfere with this check. Chris@17: But we do have to account for CASE statements, that look just like Chris@17: goto labels. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true Chris@17: && $token[0] === T_STRING Chris@17: && isset($tokens[($stackPtr + 1)]) === true Chris@17: && $tokens[($stackPtr + 1)] === ':' Chris@17: && $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM Chris@17: ) { Chris@17: $stopTokens = [ Chris@17: T_CASE => true, Chris@17: T_SEMICOLON => true, Chris@17: T_OPEN_CURLY_BRACKET => true, Chris@17: T_INLINE_THEN => true, Chris@17: ]; Chris@17: Chris@17: for ($x = ($newStackPtr - 1); $x > 0; $x--) { Chris@17: if (isset($stopTokens[$finalTokens[$x]['code']]) === true) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($finalTokens[$x]['code'] !== T_CASE Chris@17: && $finalTokens[$x]['code'] !== T_INLINE_THEN Chris@17: ) { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'content' => $token[1].':', Chris@17: 'code' => T_GOTO_LABEL, Chris@17: 'type' => 'T_GOTO_LABEL', Chris@17: ]; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t\t* token $stackPtr changed from T_STRING to T_GOTO_LABEL".PHP_EOL; Chris@17: echo "\t\t* skipping T_COLON token ".($stackPtr + 1).PHP_EOL; Chris@17: } Chris@17: Chris@17: $newStackPtr++; Chris@17: $stackPtr++; Chris@17: continue; Chris@17: } Chris@17: }//end if Chris@17: Chris@17: /* Chris@17: If this token has newlines in its content, split each line up Chris@17: and create a new token for each line. We do this so it's easier Chris@17: to ascertain where errors occur on a line. Chris@17: Note that $token[1] is the token's content. Chris@17: */ Chris@17: Chris@17: if ($tokenIsArray === true && strpos($token[1], $this->eolChar) !== false) { Chris@17: $tokenLines = explode($this->eolChar, $token[1]); Chris@17: $numLines = count($tokenLines); Chris@17: $newToken = [ Chris@17: 'type' => Util\Tokens::tokenName($token[0]), Chris@17: 'code' => $token[0], Chris@17: 'content' => '', Chris@17: ]; Chris@17: Chris@17: for ($i = 0; $i < $numLines; $i++) { Chris@17: $newToken['content'] = $tokenLines[$i]; Chris@17: if ($i === ($numLines - 1)) { Chris@17: if ($tokenLines[$i] === '') { Chris@17: break; Chris@17: } Chris@17: } else { Chris@17: $newToken['content'] .= $this->eolChar; Chris@17: } Chris@17: Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: $newStackPtr++; Chris@17: } Chris@17: } else { Chris@17: if ($tokenIsArray === true && $token[0] === T_STRING) { Chris@17: // Some T_STRING tokens should remain that way Chris@17: // due to their context. Chris@17: $context = [ Chris@17: T_OBJECT_OPERATOR => true, Chris@17: T_FUNCTION => true, Chris@17: T_CLASS => true, Chris@17: T_EXTENDS => true, Chris@17: T_IMPLEMENTS => true, Chris@17: T_NEW => true, Chris@17: T_CONST => true, Chris@17: T_NS_SEPARATOR => true, Chris@17: T_USE => true, Chris@17: T_NAMESPACE => true, Chris@17: T_PAAMAYIM_NEKUDOTAYIM => true, Chris@17: ]; Chris@18: Chris@17: if (isset($context[$finalTokens[$lastNotEmptyToken]['code']]) === true) { Chris@17: // Special case for syntax like: return new self Chris@17: // where self should not be a string. Chris@17: if ($finalTokens[$lastNotEmptyToken]['code'] === T_NEW Chris@17: && strtolower($token[1]) === 'self' Chris@17: ) { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'content' => $token[1], Chris@17: 'code' => T_SELF, Chris@17: 'type' => 'T_SELF', Chris@17: ]; Chris@17: } else { Chris@17: $finalTokens[$newStackPtr] = [ Chris@17: 'content' => $token[1], Chris@17: 'code' => T_STRING, Chris@17: 'type' => 'T_STRING', Chris@17: ]; Chris@17: } Chris@17: Chris@17: $newStackPtr++; Chris@17: continue; Chris@17: }//end if Chris@17: }//end if Chris@17: Chris@17: $newToken = null; Chris@17: if ($tokenIsArray === false) { Chris@17: if (isset(self::$resolveTokenCache[$token[0]]) === true) { Chris@17: $newToken = self::$resolveTokenCache[$token[0]]; Chris@17: } Chris@17: } else { Chris@17: $cacheKey = null; Chris@17: if ($token[0] === T_STRING) { Chris@17: $cacheKey = strtolower($token[1]); Chris@17: } else if ($token[0] !== T_CURLY_OPEN) { Chris@17: $cacheKey = $token[0]; Chris@17: } Chris@17: Chris@17: if ($cacheKey !== null && isset(self::$resolveTokenCache[$cacheKey]) === true) { Chris@17: $newToken = self::$resolveTokenCache[$cacheKey]; Chris@17: $newToken['content'] = $token[1]; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($newToken === null) { Chris@17: $newToken = self::standardiseToken($token); Chris@17: } Chris@17: Chris@17: // Convert colons that are actually the ELSE component of an Chris@17: // inline IF statement. Chris@17: if (empty($insideInlineIf) === false && $newToken['code'] === T_COLON) { Chris@17: // Make sure this isn't the return type separator of a closure. Chris@17: $isReturnType = false; Chris@17: for ($i = ($stackPtr - 1); $i > 0; $i--) { Chris@17: if (is_array($tokens[$i]) === false Chris@17: || ($tokens[$i][0] !== T_DOC_COMMENT Chris@17: && $tokens[$i][0] !== T_COMMENT Chris@17: && $tokens[$i][0] !== T_WHITESPACE) Chris@17: ) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($tokens[$i] === ')') { Chris@17: $parenCount = 1; Chris@17: for ($i--; $i > 0; $i--) { Chris@17: if ($tokens[$i] === '(') { Chris@17: $parenCount--; Chris@17: if ($parenCount === 0) { Chris@17: break; Chris@17: } Chris@17: } else if ($tokens[$i] === ')') { Chris@17: $parenCount++; Chris@17: } Chris@17: } Chris@17: Chris@17: // We've found the open parenthesis, so if the previous Chris@17: // non-empty token is FUNCTION or USE, this is a closure. Chris@17: for ($i--; $i > 0; $i--) { Chris@17: if (is_array($tokens[$i]) === false Chris@17: || ($tokens[$i][0] !== T_DOC_COMMENT Chris@17: && $tokens[$i][0] !== T_COMMENT Chris@17: && $tokens[$i][0] !== T_WHITESPACE) Chris@17: ) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($tokens[$i][0] === T_FUNCTION || $tokens[$i][0] === T_USE) { Chris@17: $isReturnType = true; Chris@17: } Chris@17: }//end if Chris@17: Chris@17: if ($isReturnType === false) { Chris@17: array_pop($insideInlineIf); Chris@17: $newToken['code'] = T_INLINE_ELSE; Chris@17: $newToken['type'] = 'T_INLINE_ELSE'; Chris@17: } Chris@17: }//end if Chris@17: Chris@17: // This is a special condition for T_ARRAY tokens used for Chris@17: // type hinting function arguments as being arrays. We want to keep Chris@17: // the parenthesis map clean, so let's tag these tokens as Chris@17: // T_STRING. Chris@17: if ($newToken['code'] === T_ARRAY) { Chris@17: for ($i = $stackPtr; $i < $numTokens; $i++) { Chris@17: if ($tokens[$i] === '(') { Chris@17: break; Chris@17: } else if ($tokens[$i][0] === T_VARIABLE) { Chris@17: $newToken['code'] = T_STRING; Chris@17: $newToken['type'] = 'T_STRING'; Chris@17: break; Chris@17: } Chris@17: } Chris@17: } Chris@17: Chris@17: // This is a special case when checking PHP 5.5+ code in PHP < 5.5 Chris@17: // where "finally" should be T_FINALLY instead of T_STRING. Chris@17: if ($newToken['code'] === T_STRING Chris@17: && strtolower($newToken['content']) === 'finally' Chris@17: ) { Chris@17: $newToken['code'] = T_FINALLY; Chris@17: $newToken['type'] = 'T_FINALLY'; Chris@17: } Chris@17: Chris@17: // This is a special case for the PHP 5.5 classname::class syntax Chris@17: // where "class" should be T_STRING instead of T_CLASS. Chris@17: if (($newToken['code'] === T_CLASS Chris@17: || $newToken['code'] === T_FUNCTION) Chris@17: && $finalTokens[$lastNotEmptyToken]['code'] === T_DOUBLE_COLON Chris@17: ) { Chris@17: $newToken['code'] = T_STRING; Chris@17: $newToken['type'] = 'T_STRING'; Chris@17: } Chris@17: Chris@17: // This is a special case for PHP 5.6 use function and use const Chris@17: // where "function" and "const" should be T_STRING instead of T_FUNCTION Chris@17: // and T_CONST. Chris@17: if (($newToken['code'] === T_FUNCTION Chris@17: || $newToken['code'] === T_CONST) Chris@17: && ($finalTokens[$lastNotEmptyToken]['code'] === T_USE || $insideUseGroup === true) Chris@17: ) { Chris@17: $newToken['code'] = T_STRING; Chris@17: $newToken['type'] = 'T_STRING'; Chris@17: } Chris@17: Chris@17: // This is a special case for use groups in PHP 7+ where leaving Chris@17: // the curly braces as their normal tokens would confuse Chris@17: // the scope map and sniffs. Chris@17: if ($newToken['code'] === T_OPEN_CURLY_BRACKET Chris@17: && $finalTokens[$lastNotEmptyToken]['code'] === T_NS_SEPARATOR Chris@17: ) { Chris@17: $newToken['code'] = T_OPEN_USE_GROUP; Chris@17: $newToken['type'] = 'T_OPEN_USE_GROUP'; Chris@17: $insideUseGroup = true; Chris@17: } Chris@17: Chris@17: if ($insideUseGroup === true && $newToken['code'] === T_CLOSE_CURLY_BRACKET) { Chris@17: $newToken['code'] = T_CLOSE_USE_GROUP; Chris@17: $newToken['type'] = 'T_CLOSE_USE_GROUP'; Chris@17: $insideUseGroup = false; Chris@17: } Chris@17: Chris@17: $finalTokens[$newStackPtr] = $newToken; Chris@17: $newStackPtr++; Chris@17: }//end if Chris@17: }//end for Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t*** END PHP TOKENIZING ***".PHP_EOL; Chris@17: } Chris@17: Chris@17: return $finalTokens; Chris@17: Chris@17: }//end tokenize() Chris@17: Chris@17: Chris@17: /** Chris@17: * Performs additional processing after main tokenizing. Chris@17: * Chris@17: * This additional processing checks for CASE statements that are using curly Chris@17: * braces for scope openers and closers. It also turns some T_FUNCTION tokens Chris@17: * into T_CLOSURE when they are not standard function definitions. It also Chris@17: * detects short array syntax and converts those square brackets into new tokens. Chris@17: * It also corrects some usage of the static and class keywords. It also Chris@17: * assigns tokens to function return types. Chris@17: * Chris@17: * @return void Chris@17: */ Chris@17: protected function processAdditional() Chris@17: { Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL; Chris@17: } Chris@17: Chris@17: $numTokens = count($this->tokens); Chris@17: for ($i = ($numTokens - 1); $i >= 0; $i--) { Chris@17: // Check for any unset scope conditions due to alternate IF/ENDIF syntax. Chris@17: if (isset($this->tokens[$i]['scope_opener']) === true Chris@17: && isset($this->tokens[$i]['scope_condition']) === false Chris@17: ) { Chris@17: $this->tokens[$i]['scope_condition'] = $this->tokens[$this->tokens[$i]['scope_opener']]['scope_condition']; Chris@17: } Chris@17: Chris@17: if ($this->tokens[$i]['code'] === T_FUNCTION) { Chris@17: /* Chris@17: Detect functions that are actually closures and Chris@17: assign them a different token. Chris@17: */ Chris@17: Chris@17: if (isset($this->tokens[$i]['scope_opener']) === true) { Chris@17: for ($x = ($i + 1); $x < $numTokens; $x++) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false Chris@17: && $this->tokens[$x]['code'] !== T_BITWISE_AND Chris@17: ) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) { Chris@17: $this->tokens[$i]['code'] = T_CLOSURE; Chris@17: $this->tokens[$i]['type'] = 'T_CLOSURE'; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL; Chris@17: } Chris@17: Chris@17: for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) { Chris@17: if (isset($this->tokens[$x]['conditions'][$i]) === false) { Chris@17: continue; Chris@17: } Chris@17: Chris@17: $this->tokens[$x]['conditions'][$i] = T_CLOSURE; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $type = $this->tokens[$x]['type']; Chris@17: echo "\t\t* cleaned $x ($type) *".PHP_EOL; Chris@17: } Chris@17: } Chris@17: } Chris@17: }//end if Chris@17: Chris@17: continue; Chris@17: } else if ($this->tokens[$i]['code'] === T_CLASS && isset($this->tokens[$i]['scope_opener']) === true) { Chris@17: /* Chris@17: Detect anonymous classes and assign them a different token. Chris@17: */ Chris@17: Chris@17: for ($x = ($i + 1); $x < $numTokens; $x++) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS Chris@17: || $this->tokens[$x]['code'] === T_OPEN_CURLY_BRACKET Chris@17: || $this->tokens[$x]['code'] === T_EXTENDS Chris@17: || $this->tokens[$x]['code'] === T_IMPLEMENTS Chris@17: ) { Chris@17: $this->tokens[$i]['code'] = T_ANON_CLASS; Chris@17: $this->tokens[$i]['type'] = 'T_ANON_CLASS'; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: echo "\t* token $i on line $line changed from T_CLASS to T_ANON_CLASS".PHP_EOL; Chris@17: } Chris@17: Chris@17: for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) { Chris@17: if (isset($this->tokens[$x]['conditions'][$i]) === false) { Chris@17: continue; Chris@17: } Chris@17: Chris@17: $this->tokens[$x]['conditions'][$i] = T_ANON_CLASS; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $type = $this->tokens[$x]['type']; Chris@17: echo "\t\t* cleaned $x ($type) *".PHP_EOL; Chris@17: } Chris@17: } Chris@17: } Chris@17: Chris@17: continue; Chris@17: } else if ($this->tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) { Chris@17: if (isset($this->tokens[$i]['bracket_closer']) === false) { Chris@17: continue; Chris@17: } Chris@17: Chris@17: // Unless there is a variable or a bracket before this token, Chris@17: // it is the start of an array being defined using the short syntax. Chris@17: $isShortArray = false; Chris@17: $allowed = [ Chris@17: T_CLOSE_SQUARE_BRACKET => T_CLOSE_SQUARE_BRACKET, Chris@17: T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET, Chris@17: T_CLOSE_PARENTHESIS => T_CLOSE_PARENTHESIS, Chris@17: T_VARIABLE => T_VARIABLE, Chris@17: T_OBJECT_OPERATOR => T_OBJECT_OPERATOR, Chris@17: T_STRING => T_STRING, Chris@17: T_CONSTANT_ENCAPSED_STRING => T_CONSTANT_ENCAPSED_STRING, Chris@17: ]; Chris@17: Chris@17: for ($x = ($i - 1); $x >= 0; $x--) { Chris@17: // If we hit a scope opener, the statement has ended Chris@17: // without finding anything, so it's probably an array Chris@17: // using PHP 7.1 short list syntax. Chris@17: if (isset($this->tokens[$x]['scope_opener']) === true) { Chris@17: $isShortArray = true; Chris@17: break; Chris@17: } Chris@17: Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: if (isset($allowed[$this->tokens[$x]['code']]) === false) { Chris@17: $isShortArray = true; Chris@17: } Chris@17: Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($isShortArray === true) { Chris@17: $this->tokens[$i]['code'] = T_OPEN_SHORT_ARRAY; Chris@17: $this->tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY'; Chris@17: Chris@17: $closer = $this->tokens[$i]['bracket_closer']; Chris@17: $this->tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY; Chris@17: $this->tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY'; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL; Chris@17: $line = $this->tokens[$closer]['line']; Chris@17: echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL; Chris@17: } Chris@17: } Chris@17: Chris@17: continue; Chris@17: } else if ($this->tokens[$i]['code'] === T_STATIC) { Chris@17: for ($x = ($i - 1); $x > 0; $x--) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] === T_INSTANCEOF) { Chris@17: $this->tokens[$i]['code'] = T_STRING; Chris@17: $this->tokens[$i]['type'] = 'T_STRING'; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL; Chris@17: } Chris@17: } Chris@17: Chris@17: continue; Chris@17: } else if ($this->tokens[$i]['code'] === T_TRUE Chris@17: || $this->tokens[$i]['code'] === T_FALSE Chris@17: || $this->tokens[$i]['code'] === T_NULL Chris@17: ) { Chris@17: for ($x = ($i + 1); $i < $numTokens; $x++) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: // Non-whitespace content. Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: $context = [ Chris@17: T_OBJECT_OPERATOR => true, Chris@17: T_NS_SEPARATOR => true, Chris@17: T_PAAMAYIM_NEKUDOTAYIM => true, Chris@17: ]; Chris@17: if (isset($context[$this->tokens[$x]['code']]) === true) { Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: $type = $this->tokens[$i]['type']; Chris@17: echo "\t* token $i on line $line changed from $type to T_STRING".PHP_EOL; Chris@17: } Chris@17: Chris@17: $this->tokens[$i]['code'] = T_STRING; Chris@17: $this->tokens[$i]['type'] = 'T_STRING'; Chris@17: } Chris@17: } else if ($this->tokens[$i]['code'] === T_CONST) { Chris@17: // Context sensitive keywords support. Chris@17: for ($x = ($i + 1); $i < $numTokens; $x++) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: // Non-whitespace content. Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] !== T_STRING) { Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$x]['line']; Chris@17: $type = $this->tokens[$x]['type']; Chris@17: echo "\t* token $x on line $line changed from $type to T_STRING".PHP_EOL; Chris@17: } Chris@17: Chris@17: $this->tokens[$x]['code'] = T_STRING; Chris@17: $this->tokens[$x]['type'] = 'T_STRING'; Chris@17: } Chris@17: }//end if Chris@17: Chris@17: if (($this->tokens[$i]['code'] !== T_CASE Chris@17: && $this->tokens[$i]['code'] !== T_DEFAULT) Chris@17: || isset($this->tokens[$i]['scope_opener']) === false Chris@17: ) { Chris@17: // Only interested in CASE and DEFAULT statements from here on in. Chris@17: continue; Chris@17: } Chris@17: Chris@17: $scopeOpener = $this->tokens[$i]['scope_opener']; Chris@17: $scopeCloser = $this->tokens[$i]['scope_closer']; Chris@17: Chris@17: // If the first char after the opener is a curly brace Chris@17: // and that brace has been ignored, it is actually Chris@17: // opening this case statement and the opener and closer are Chris@17: // probably set incorrectly. Chris@17: for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) { Chris@17: if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) { Chris@17: // Non-whitespace content. Chris@17: break; Chris@17: } Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] === T_CASE || $this->tokens[$x]['code'] === T_DEFAULT) { Chris@17: // Special case for multiple CASE statements that share the same Chris@17: // closer. Because we are going backwards through the file, this next Chris@17: // CASE statement is already fixed, so just use its closer and don't Chris@17: // worry about fixing anything. Chris@17: $newCloser = $this->tokens[$x]['scope_closer']; Chris@17: $this->tokens[$i]['scope_closer'] = $newCloser; Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $oldType = $this->tokens[$scopeCloser]['type']; Chris@17: $newType = $this->tokens[$newCloser]['type']; Chris@17: $line = $this->tokens[$i]['line']; Chris@17: echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; Chris@17: } Chris@17: Chris@17: continue; Chris@17: } Chris@17: Chris@17: if ($this->tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET Chris@17: || isset($this->tokens[$x]['scope_condition']) === true Chris@17: ) { Chris@17: // Not a CASE/DEFAULT with a curly brace opener. Chris@17: continue; Chris@17: } Chris@17: Chris@17: // The closer for this CASE/DEFAULT should be the closing curly brace and Chris@17: // not whatever it already is. The opener needs to be the opening curly Chris@17: // brace so everything matches up. Chris@17: $newCloser = $this->tokens[$x]['bracket_closer']; Chris@17: foreach ([$i, $x, $newCloser] as $index) { Chris@17: $this->tokens[$index]['scope_condition'] = $i; Chris@17: $this->tokens[$index]['scope_opener'] = $x; Chris@17: $this->tokens[$index]['scope_closer'] = $newCloser; Chris@17: } Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$i]['line']; Chris@17: $tokenType = $this->tokens[$i]['type']; Chris@17: Chris@17: $oldType = $this->tokens[$scopeOpener]['type']; Chris@17: $newType = $this->tokens[$x]['type']; Chris@17: echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL; Chris@17: Chris@17: $oldType = $this->tokens[$scopeCloser]['type']; Chris@17: $newType = $this->tokens[$newCloser]['type']; Chris@17: echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; Chris@17: } Chris@17: Chris@17: if ($this->tokens[$scopeOpener]['scope_condition'] === $i) { Chris@17: unset($this->tokens[$scopeOpener]['scope_condition']); Chris@17: unset($this->tokens[$scopeOpener]['scope_opener']); Chris@17: unset($this->tokens[$scopeOpener]['scope_closer']); Chris@17: } Chris@17: Chris@17: if ($this->tokens[$scopeCloser]['scope_condition'] === $i) { Chris@17: unset($this->tokens[$scopeCloser]['scope_condition']); Chris@17: unset($this->tokens[$scopeCloser]['scope_opener']); Chris@17: unset($this->tokens[$scopeCloser]['scope_closer']); Chris@17: } else { Chris@17: // We were using a shared closer. All tokens that were Chris@17: // sharing this closer with us, except for the scope condition Chris@17: // and it's opener, need to now point to the new closer. Chris@17: $condition = $this->tokens[$scopeCloser]['scope_condition']; Chris@17: $start = ($this->tokens[$condition]['scope_opener'] + 1); Chris@17: for ($y = $start; $y < $scopeCloser; $y++) { Chris@17: if (isset($this->tokens[$y]['scope_closer']) === true Chris@17: && $this->tokens[$y]['scope_closer'] === $scopeCloser Chris@17: ) { Chris@17: $this->tokens[$y]['scope_closer'] = $newCloser; Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $line = $this->tokens[$y]['line']; Chris@17: $tokenType = $this->tokens[$y]['type']; Chris@17: $oldType = $this->tokens[$scopeCloser]['type']; Chris@17: $newType = $this->tokens[$newCloser]['type']; Chris@17: echo "\t\t* token $y ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL; Chris@17: } Chris@17: } Chris@17: } Chris@17: }//end if Chris@17: Chris@17: unset($this->tokens[$x]['bracket_opener']); Chris@17: unset($this->tokens[$x]['bracket_closer']); Chris@17: unset($this->tokens[$newCloser]['bracket_opener']); Chris@17: unset($this->tokens[$newCloser]['bracket_closer']); Chris@17: $this->tokens[$scopeCloser]['conditions'][] = $i; Chris@17: Chris@17: // Now fix up all the tokens that think they are Chris@17: // inside the CASE/DEFAULT statement when they are really outside. Chris@17: for ($x = $newCloser; $x < $scopeCloser; $x++) { Chris@17: foreach ($this->tokens[$x]['conditions'] as $num => $oldCond) { Chris@17: if ($oldCond === $this->tokens[$i]['code']) { Chris@17: $oldConditions = $this->tokens[$x]['conditions']; Chris@17: unset($this->tokens[$x]['conditions'][$num]); Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: $type = $this->tokens[$x]['type']; Chris@17: $oldConds = ''; Chris@17: foreach ($oldConditions as $condition) { Chris@17: $oldConds .= Util\Tokens::tokenName($condition).','; Chris@17: } Chris@17: Chris@17: $oldConds = rtrim($oldConds, ','); Chris@17: Chris@17: $newConds = ''; Chris@17: foreach ($this->tokens[$x]['conditions'] as $condition) { Chris@17: $newConds .= Util\Tokens::tokenName($condition).','; Chris@17: } Chris@17: Chris@17: $newConds = rtrim($newConds, ','); Chris@17: Chris@17: echo "\t\t* cleaned $x ($type) *".PHP_EOL; Chris@17: echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL; Chris@17: } Chris@17: Chris@17: break; Chris@17: }//end if Chris@17: }//end foreach Chris@17: }//end for Chris@17: }//end for Chris@17: Chris@17: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@17: echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL; Chris@17: } Chris@17: Chris@17: }//end processAdditional() Chris@17: Chris@17: Chris@17: /** Chris@17: * Takes a token produced from token_get_all() and produces a Chris@17: * more uniform token. Chris@17: * Chris@17: * @param string|array $token The token to convert. Chris@17: * Chris@17: * @return array The new token. Chris@17: */ Chris@17: public static function standardiseToken($token) Chris@17: { Chris@17: if (isset($token[1]) === false) { Chris@17: if (isset(self::$resolveTokenCache[$token[0]]) === true) { Chris@17: return self::$resolveTokenCache[$token[0]]; Chris@17: } Chris@17: } else { Chris@17: $cacheKey = null; Chris@17: if ($token[0] === T_STRING) { Chris@17: $cacheKey = strtolower($token[1]); Chris@17: } else if ($token[0] !== T_CURLY_OPEN) { Chris@17: $cacheKey = $token[0]; Chris@17: } Chris@17: Chris@17: if ($cacheKey !== null && isset(self::$resolveTokenCache[$cacheKey]) === true) { Chris@17: $newToken = self::$resolveTokenCache[$cacheKey]; Chris@17: $newToken['content'] = $token[1]; Chris@17: return $newToken; Chris@17: } Chris@17: } Chris@17: Chris@17: if (isset($token[1]) === false) { Chris@17: return self::resolveSimpleToken($token[0]); Chris@17: } Chris@17: Chris@17: if ($token[0] === T_STRING) { Chris@17: switch ($cacheKey) { Chris@17: case 'false': Chris@17: $newToken['type'] = 'T_FALSE'; Chris@17: break; Chris@17: case 'true': Chris@17: $newToken['type'] = 'T_TRUE'; Chris@17: break; Chris@17: case 'null': Chris@17: $newToken['type'] = 'T_NULL'; Chris@17: break; Chris@17: case 'self': Chris@17: $newToken['type'] = 'T_SELF'; Chris@17: break; Chris@17: case 'parent': Chris@17: $newToken['type'] = 'T_PARENT'; Chris@17: break; Chris@17: default: Chris@17: $newToken['type'] = 'T_STRING'; Chris@17: break; Chris@17: } Chris@17: Chris@17: $newToken['code'] = constant($newToken['type']); Chris@17: Chris@17: self::$resolveTokenCache[$cacheKey] = $newToken; Chris@17: } else if ($token[0] === T_CURLY_OPEN) { Chris@17: $newToken = [ Chris@17: 'code' => T_OPEN_CURLY_BRACKET, Chris@17: 'type' => 'T_OPEN_CURLY_BRACKET', Chris@17: ]; Chris@17: } else { Chris@17: $newToken = [ Chris@17: 'code' => $token[0], Chris@17: 'type' => Util\Tokens::tokenName($token[0]), Chris@17: ]; Chris@17: Chris@17: self::$resolveTokenCache[$token[0]] = $newToken; Chris@17: }//end if Chris@17: Chris@17: $newToken['content'] = $token[1]; Chris@17: return $newToken; Chris@17: Chris@17: }//end standardiseToken() Chris@17: Chris@17: Chris@17: /** Chris@17: * Converts simple tokens into a format that conforms to complex tokens Chris@17: * produced by token_get_all(). Chris@17: * Chris@17: * Simple tokens are tokens that are not in array form when produced from Chris@17: * token_get_all(). Chris@17: * Chris@17: * @param string $token The simple token to convert. Chris@17: * Chris@17: * @return array The new token in array format. Chris@17: */ Chris@17: public static function resolveSimpleToken($token) Chris@17: { Chris@17: $newToken = []; Chris@17: Chris@17: switch ($token) { Chris@17: case '{': Chris@17: $newToken['type'] = 'T_OPEN_CURLY_BRACKET'; Chris@17: break; Chris@17: case '}': Chris@17: $newToken['type'] = 'T_CLOSE_CURLY_BRACKET'; Chris@17: break; Chris@17: case '[': Chris@17: $newToken['type'] = 'T_OPEN_SQUARE_BRACKET'; Chris@17: break; Chris@17: case ']': Chris@17: $newToken['type'] = 'T_CLOSE_SQUARE_BRACKET'; Chris@17: break; Chris@17: case '(': Chris@17: $newToken['type'] = 'T_OPEN_PARENTHESIS'; Chris@17: break; Chris@17: case ')': Chris@17: $newToken['type'] = 'T_CLOSE_PARENTHESIS'; Chris@17: break; Chris@17: case ':': Chris@17: $newToken['type'] = 'T_COLON'; Chris@17: break; Chris@17: case '.': Chris@17: $newToken['type'] = 'T_STRING_CONCAT'; Chris@17: break; Chris@17: case ';': Chris@17: $newToken['type'] = 'T_SEMICOLON'; Chris@17: break; Chris@17: case '=': Chris@17: $newToken['type'] = 'T_EQUAL'; Chris@17: break; Chris@17: case '*': Chris@17: $newToken['type'] = 'T_MULTIPLY'; Chris@17: break; Chris@17: case '/': Chris@17: $newToken['type'] = 'T_DIVIDE'; Chris@17: break; Chris@17: case '+': Chris@17: $newToken['type'] = 'T_PLUS'; Chris@17: break; Chris@17: case '-': Chris@17: $newToken['type'] = 'T_MINUS'; Chris@17: break; Chris@17: case '%': Chris@17: $newToken['type'] = 'T_MODULUS'; Chris@17: break; Chris@17: case '^': Chris@17: $newToken['type'] = 'T_BITWISE_XOR'; Chris@17: break; Chris@17: case '&': Chris@17: $newToken['type'] = 'T_BITWISE_AND'; Chris@17: break; Chris@17: case '|': Chris@17: $newToken['type'] = 'T_BITWISE_OR'; Chris@17: break; Chris@17: case '~': Chris@17: $newToken['type'] = 'T_BITWISE_NOT'; Chris@17: break; Chris@17: case '<': Chris@17: $newToken['type'] = 'T_LESS_THAN'; Chris@17: break; Chris@17: case '>': Chris@17: $newToken['type'] = 'T_GREATER_THAN'; Chris@17: break; Chris@17: case '!': Chris@17: $newToken['type'] = 'T_BOOLEAN_NOT'; Chris@17: break; Chris@17: case ',': Chris@17: $newToken['type'] = 'T_COMMA'; Chris@17: break; Chris@17: case '@': Chris@17: $newToken['type'] = 'T_ASPERAND'; Chris@17: break; Chris@17: case '$': Chris@17: $newToken['type'] = 'T_DOLLAR'; Chris@17: break; Chris@17: case '`': Chris@17: $newToken['type'] = 'T_BACKTICK'; Chris@17: break; Chris@17: default: Chris@17: $newToken['type'] = 'T_NONE'; Chris@17: break; Chris@17: }//end switch Chris@17: Chris@17: $newToken['code'] = constant($newToken['type']); Chris@17: $newToken['content'] = $token; Chris@17: Chris@17: self::$resolveTokenCache[$token] = $newToken; Chris@17: return $newToken; Chris@17: Chris@17: }//end resolveSimpleToken() Chris@17: Chris@17: Chris@17: }//end class