Chris@17: <?php
Chris@17: /**
Chris@17:  * Tokenizes JS code.
Chris@17:  *
Chris@17:  * @author    Greg Sherwood <gsherwood@squiz.net>
Chris@17:  * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@17:  * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@17:  */
Chris@17: 
Chris@17: namespace PHP_CodeSniffer\Tokenizers;
Chris@17: 
Chris@17: use PHP_CodeSniffer\Util;
Chris@17: use PHP_CodeSniffer\Exceptions\TokenizerException;
Chris@17: use PHP_CodeSniffer\Config;
Chris@17: 
Chris@17: class JS extends Tokenizer
Chris@17: {
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * A list of tokens that are allowed to open a scope.
Chris@17:      *
Chris@17:      * This array also contains information about what kind of token the scope
Chris@17:      * opener uses to open and close the scope, if the token strictly requires
Chris@17:      * an opener, if the token can share a scope closer, and who it can be shared
Chris@17:      * with. An example of a token that shares a scope closer is a CASE scope.
Chris@17:      *
Chris@17:      * @var array
Chris@17:      */
Chris@17:     public $scopeOpeners = [
Chris@17:         T_IF       => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => false,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_TRY      => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => true,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_CATCH    => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => true,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_ELSE     => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => false,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_FOR      => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => false,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_CLASS    => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => true,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_FUNCTION => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => false,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_WHILE    => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => false,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_DO       => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => true,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_SWITCH   => [
Chris@17:             'start'  => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17:             'end'    => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17:             'strict' => true,
Chris@17:             'shared' => false,
Chris@17:             'with'   => [],
Chris@17:         ],
Chris@17:         T_CASE     => [
Chris@17:             'start'  => [T_COLON => T_COLON],
Chris@17:             'end'    => [
Chris@17:                 T_BREAK    => T_BREAK,
Chris@17:                 T_RETURN   => T_RETURN,
Chris@17:                 T_CONTINUE => T_CONTINUE,
Chris@17:                 T_THROW    => T_THROW,
Chris@17:             ],
Chris@17:             'strict' => true,
Chris@17:             'shared' => true,
Chris@17:             'with'   => [
Chris@17:                 T_DEFAULT => T_DEFAULT,
Chris@17:                 T_CASE    => T_CASE,
Chris@17:                 T_SWITCH  => T_SWITCH,
Chris@17:             ],
Chris@17:         ],
Chris@17:         T_DEFAULT  => [
Chris@17:             'start'  => [T_COLON => T_COLON],
Chris@17:             'end'    => [
Chris@17:                 T_BREAK    => T_BREAK,
Chris@17:                 T_RETURN   => T_RETURN,
Chris@17:                 T_CONTINUE => T_CONTINUE,
Chris@17:                 T_THROW    => T_THROW,
Chris@17:             ],
Chris@17:             'strict' => true,
Chris@17:             'shared' => true,
Chris@17:             'with'   => [
Chris@17:                 T_CASE   => T_CASE,
Chris@17:                 T_SWITCH => T_SWITCH,
Chris@17:             ],
Chris@17:         ],
Chris@17:     ];
Chris@17: 
Chris@17:     /**
Chris@17:      * A list of tokens that end the scope.
Chris@17:      *
Chris@17:      * This array is just a unique collection of the end tokens
Chris@17:      * from the _scopeOpeners array. The data is duplicated here to
Chris@17:      * save time during parsing of the file.
Chris@17:      *
Chris@17:      * @var array
Chris@17:      */
Chris@17:     public $endScopeTokens = [
Chris@17:         T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@17:         T_BREAK               => T_BREAK,
Chris@17:     ];
Chris@17: 
Chris@17:     /**
Chris@17:      * A list of special JS tokens and their types.
Chris@17:      *
Chris@17:      * @var array
Chris@17:      */
Chris@17:     protected $tokenValues = [
Chris@17:         'class'     => 'T_CLASS',
Chris@17:         'function'  => 'T_FUNCTION',
Chris@17:         'prototype' => 'T_PROTOTYPE',
Chris@17:         'try'       => 'T_TRY',
Chris@17:         'catch'     => 'T_CATCH',
Chris@17:         'return'    => 'T_RETURN',
Chris@17:         'throw'     => 'T_THROW',
Chris@17:         'break'     => 'T_BREAK',
Chris@17:         'switch'    => 'T_SWITCH',
Chris@17:         'continue'  => 'T_CONTINUE',
Chris@17:         'if'        => 'T_IF',
Chris@17:         'else'      => 'T_ELSE',
Chris@17:         'do'        => 'T_DO',
Chris@17:         'while'     => 'T_WHILE',
Chris@17:         'for'       => 'T_FOR',
Chris@17:         'var'       => 'T_VAR',
Chris@17:         'case'      => 'T_CASE',
Chris@17:         'default'   => 'T_DEFAULT',
Chris@17:         'true'      => 'T_TRUE',
Chris@17:         'false'     => 'T_FALSE',
Chris@17:         'null'      => 'T_NULL',
Chris@17:         'this'      => 'T_THIS',
Chris@17:         'typeof'    => 'T_TYPEOF',
Chris@17:         '('         => 'T_OPEN_PARENTHESIS',
Chris@17:         ')'         => 'T_CLOSE_PARENTHESIS',
Chris@17:         '{'         => 'T_OPEN_CURLY_BRACKET',
Chris@17:         '}'         => 'T_CLOSE_CURLY_BRACKET',
Chris@17:         '['         => 'T_OPEN_SQUARE_BRACKET',
Chris@17:         ']'         => 'T_CLOSE_SQUARE_BRACKET',
Chris@17:         '?'         => 'T_INLINE_THEN',
Chris@17:         '.'         => 'T_OBJECT_OPERATOR',
Chris@17:         '+'         => 'T_PLUS',
Chris@17:         '-'         => 'T_MINUS',
Chris@17:         '*'         => 'T_MULTIPLY',
Chris@17:         '%'         => 'T_MODULUS',
Chris@17:         '/'         => 'T_DIVIDE',
Chris@17:         '^'         => 'T_LOGICAL_XOR',
Chris@17:         ','         => 'T_COMMA',
Chris@17:         ';'         => 'T_SEMICOLON',
Chris@17:         ':'         => 'T_COLON',
Chris@17:         '<'         => 'T_LESS_THAN',
Chris@17:         '>'         => 'T_GREATER_THAN',
Chris@17:         '<<'        => 'T_SL',
Chris@17:         '>>'        => 'T_SR',
Chris@17:         '>>>'       => 'T_ZSR',
Chris@17:         '<<='       => 'T_SL_EQUAL',
Chris@17:         '>>='       => 'T_SR_EQUAL',
Chris@17:         '>>>='      => 'T_ZSR_EQUAL',
Chris@17:         '<='        => 'T_IS_SMALLER_OR_EQUAL',
Chris@17:         '>='        => 'T_IS_GREATER_OR_EQUAL',
Chris@17:         '=>'        => 'T_DOUBLE_ARROW',
Chris@17:         '!'         => 'T_BOOLEAN_NOT',
Chris@17:         '||'        => 'T_BOOLEAN_OR',
Chris@17:         '&&'        => 'T_BOOLEAN_AND',
Chris@17:         '|'         => 'T_BITWISE_OR',
Chris@17:         '&'         => 'T_BITWISE_AND',
Chris@17:         '!='        => 'T_IS_NOT_EQUAL',
Chris@17:         '!=='       => 'T_IS_NOT_IDENTICAL',
Chris@17:         '='         => 'T_EQUAL',
Chris@17:         '=='        => 'T_IS_EQUAL',
Chris@17:         '==='       => 'T_IS_IDENTICAL',
Chris@17:         '-='        => 'T_MINUS_EQUAL',
Chris@17:         '+='        => 'T_PLUS_EQUAL',
Chris@17:         '*='        => 'T_MUL_EQUAL',
Chris@17:         '/='        => 'T_DIV_EQUAL',
Chris@17:         '%='        => 'T_MOD_EQUAL',
Chris@17:         '++'        => 'T_INC',
Chris@17:         '--'        => 'T_DEC',
Chris@17:         '//'        => 'T_COMMENT',
Chris@17:         '/*'        => 'T_COMMENT',
Chris@17:         '/**'       => 'T_DOC_COMMENT',
Chris@17:         '*/'        => 'T_COMMENT',
Chris@17:     ];
Chris@17: 
Chris@17:     /**
Chris@17:      * A list string delimiters.
Chris@17:      *
Chris@17:      * @var array
Chris@17:      */
Chris@17:     protected $stringTokens = [
Chris@17:         '\'' => '\'',
Chris@17:         '"'  => '"',
Chris@17:     ];
Chris@17: 
Chris@17:     /**
Chris@17:      * A list tokens that start and end comments.
Chris@17:      *
Chris@17:      * @var array
Chris@17:      */
Chris@17:     protected $commentTokens = [
Chris@17:         '//'  => null,
Chris@17:         '/*'  => '*/',
Chris@17:         '/**' => '*/',
Chris@17:     ];
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Initialise the tokenizer.
Chris@17:      *
Chris@17:      * Pre-checks the content to see if it looks minified.
Chris@17:      *
Chris@17:      * @param string                  $content The content to tokenize,
Chris@17:      * @param \PHP_CodeSniffer\Config $config  The config data for the run.
Chris@17:      * @param string                  $eolChar The EOL char used in the content.
Chris@17:      *
Chris@17:      * @return void
Chris@18:      * @throws \PHP_CodeSniffer\Exceptions\TokenizerException If the file appears to be minified.
Chris@17:      */
Chris@17:     public function __construct($content, Config $config, $eolChar='\n')
Chris@17:     {
Chris@17:         if ($this->isMinifiedContent($content, $eolChar) === true) {
Chris@17:             throw new TokenizerException('File appears to be minified and cannot be processed');
Chris@17:         }
Chris@17: 
Chris@18:         parent::__construct($content, $config, $eolChar);
Chris@17: 
Chris@17:     }//end __construct()
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Creates an array of tokens when given some JS code.
Chris@17:      *
Chris@17:      * @param string $string The string to tokenize.
Chris@17:      *
Chris@17:      * @return array
Chris@17:      */
Chris@17:     public function tokenize($string)
Chris@17:     {
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t*** START JS TOKENIZING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         $maxTokenLength = 0;
Chris@17:         foreach ($this->tokenValues as $token => $values) {
Chris@17:             if (strlen($token) > $maxTokenLength) {
Chris@17:                 $maxTokenLength = strlen($token);
Chris@17:             }
Chris@17:         }
Chris@17: 
Chris@17:         $tokens          = [];
Chris@17:         $inString        = '';
Chris@17:         $stringChar      = null;
Chris@17:         $inComment       = '';
Chris@17:         $buffer          = '';
Chris@17:         $preStringBuffer = '';
Chris@17:         $cleanBuffer     = false;
Chris@17: 
Chris@17:         $commentTokenizer = new Comment();
Chris@17: 
Chris@17:         $tokens[] = [
Chris@17:             'code'    => T_OPEN_TAG,
Chris@17:             'type'    => 'T_OPEN_TAG',
Chris@17:             'content' => '',
Chris@17:         ];
Chris@17: 
Chris@17:         // Convert newlines to single characters for ease of
Chris@17:         // processing. We will change them back later.
Chris@17:         $string = str_replace($this->eolChar, "\n", $string);
Chris@17: 
Chris@17:         $chars    = str_split($string);
Chris@17:         $numChars = count($chars);
Chris@17:         for ($i = 0; $i < $numChars; $i++) {
Chris@17:             $char = $chars[$i];
Chris@17: 
Chris@17:             if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                 $content       = Util\Common::prepareForOutput($char);
Chris@17:                 $bufferContent = Util\Common::prepareForOutput($buffer);
Chris@17: 
Chris@17:                 if ($inString !== '') {
Chris@17:                     echo "\t";
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($inComment !== '') {
Chris@17:                     echo "\t";
Chris@17:                 }
Chris@17: 
Chris@17:                 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@17:             }//end if
Chris@17: 
Chris@17:             if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@17:                 // If the buffer only has whitespace and we are about to
Chris@17:                 // add a character, store the whitespace first.
Chris@17:                 if (trim($char) !== '' && trim($buffer) === '') {
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => T_WHITESPACE,
Chris@17:                         'type'    => 'T_WHITESPACE',
Chris@17:                         'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($buffer);
Chris@17:                         echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $buffer = '';
Chris@17:                 }
Chris@17: 
Chris@17:                 // If the buffer is not whitespace and we are about to
Chris@17:                 // add a whitespace character, store the content first.
Chris@17:                 if ($inString === ''
Chris@17:                     && $inComment === ''
Chris@17:                     && trim($char) === ''
Chris@17:                     && trim($buffer) !== ''
Chris@17:                 ) {
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => T_STRING,
Chris@17:                         'type'    => 'T_STRING',
Chris@17:                         'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($buffer);
Chris@17:                         echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $buffer = '';
Chris@17:                 }
Chris@17:             }//end if
Chris@17: 
Chris@17:             // Process strings.
Chris@17:             if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@17:                 if ($inString === $char) {
Chris@17:                     // This could be the end of the string, but make sure it
Chris@17:                     // is not escaped first.
Chris@17:                     $escapes = 0;
Chris@17:                     for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17:                         if ($chars[$x] !== '\\') {
Chris@17:                             break;
Chris@17:                         }
Chris@17: 
Chris@17:                         $escapes++;
Chris@17:                     }
Chris@17: 
Chris@17:                     if ($escapes === 0 || ($escapes % 2) === 0) {
Chris@17:                         // There is an even number escape chars,
Chris@17:                         // so this is not escaped, it is the end of the string.
Chris@17:                         $tokens[] = [
Chris@17:                             'code'    => T_CONSTANT_ENCAPSED_STRING,
Chris@17:                             'type'    => 'T_CONSTANT_ENCAPSED_STRING',
Chris@17:                             'content' => str_replace("\n", $this->eolChar, $buffer).$char,
Chris@17:                         ];
Chris@17: 
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             echo "\t\t* found end of string *".PHP_EOL;
Chris@17:                             $content = Util\Common::prepareForOutput($buffer.$char);
Chris@17:                             echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@17:                         }
Chris@17: 
Chris@17:                         $buffer          = '';
Chris@17:                         $preStringBuffer = '';
Chris@17:                         $inString        = '';
Chris@17:                         $stringChar      = null;
Chris@17:                         continue;
Chris@17:                     }//end if
Chris@17:                 } else if ($inString === '') {
Chris@17:                     $inString        = $char;
Chris@17:                     $stringChar      = $i;
Chris@17:                     $preStringBuffer = $buffer;
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo "\t\t* looking for string closer *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }//end if
Chris@17:             }//end if
Chris@17: 
Chris@17:             if ($inString !== '' && $char === "\n") {
Chris@17:                 // Unless this newline character is escaped, the string did not
Chris@17:                 // end before the end of the line, which means it probably
Chris@17:                 // wasn't a string at all (maybe a regex).
Chris@17:                 if ($chars[($i - 1)] !== '\\') {
Chris@17:                     $i      = $stringChar;
Chris@17:                     $buffer = $preStringBuffer;
Chris@17:                     $preStringBuffer = '';
Chris@17:                     $inString        = '';
Chris@17:                     $stringChar      = null;
Chris@17:                     $char            = $chars[$i];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }
Chris@17:             }
Chris@17: 
Chris@17:             $buffer .= $char;
Chris@17: 
Chris@17:             // We don't look for special tokens inside strings,
Chris@17:             // so if we are in a string, we can continue here now
Chris@17:             // that the current char is in the buffer.
Chris@17:             if ($inString !== '') {
Chris@17:                 continue;
Chris@17:             }
Chris@17: 
Chris@17:             // Special case for T_DIVIDE which can actually be
Chris@17:             // the start of a regular expression.
Chris@17:             if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@18:                 $regex = $this->getRegexToken($i, $string, $chars, $tokens);
Chris@17:                 if ($regex !== null) {
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => T_REGULAR_EXPRESSION,
Chris@17:                         'type'    => 'T_REGULAR_EXPRESSION',
Chris@17:                         'content' => $regex['content'],
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($regex['content']);
Chris@17:                         echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $i           = $regex['end'];
Chris@17:                     $buffer      = '';
Chris@17:                     $cleanBuffer = false;
Chris@17:                     continue;
Chris@17:                 }//end if
Chris@17:             }//end if
Chris@17: 
Chris@17:             // Check for known tokens, but ignore tokens found that are not at
Chris@17:             // the end of a string, like FOR and this.FORmat.
Chris@17:             if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@17:                 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
Chris@17:                 || isset($chars[($i + 1)]) === false
Chris@17:                 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
Chris@17:             ) {
Chris@17:                 $matchedToken    = false;
Chris@17:                 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@17: 
Chris@17:                 if ($lookAheadLength > 0) {
Chris@17:                     // The buffer contains a token type, but we need
Chris@17:                     // to look ahead at the next chars to see if this is
Chris@17:                     // actually part of a larger token. For example,
Chris@17:                     // FOR and FOREACH.
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $charBuffer = $buffer;
Chris@17:                     for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@17:                         if (isset($chars[($i + $x)]) === false) {
Chris@17:                             break;
Chris@17:                         }
Chris@17: 
Chris@17:                         $charBuffer .= $chars[($i + $x)];
Chris@17: 
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             $content = Util\Common::prepareForOutput($charBuffer);
Chris@17:                             echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17:                         }
Chris@17: 
Chris@17:                         if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17:                             // We've found something larger that matches
Chris@17:                             // so we can ignore this char. Except for 1 very specific
Chris@17:                             // case where a comment like /**/ needs to tokenize as
Chris@17:                             // T_COMMENT and not T_DOC_COMMENT.
Chris@17:                             $oldType = $this->tokenValues[strtolower($buffer)];
Chris@17:                             $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@17:                             if ($oldType === 'T_COMMENT'
Chris@17:                                 && $newType === 'T_DOC_COMMENT'
Chris@17:                                 && $chars[($i + $x + 1)] === '/'
Chris@17:                             ) {
Chris@17:                                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                                     echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@17:                                 }
Chris@17:                             } else {
Chris@17:                                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                                     echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@17:                                 }
Chris@17: 
Chris@17:                                 $matchedToken = true;
Chris@17:                                 break;
Chris@17:                             }
Chris@17:                         }//end if
Chris@17:                     }//end for
Chris@17:                 }//end if
Chris@17: 
Chris@17:                 if ($matchedToken === false) {
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@17:                         echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $value = $this->tokenValues[strtolower($buffer)];
Chris@17: 
Chris@17:                     if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@17:                         // The function keyword needs to be all lowercase or else
Chris@17:                         // it is just a function called "Function".
Chris@17:                         $value = 'T_STRING';
Chris@17:                     }
Chris@17: 
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => constant($value),
Chris@17:                         'type'    => $value,
Chris@17:                         'content' => $buffer,
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($buffer);
Chris@17:                         echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $cleanBuffer = true;
Chris@17:                 }//end if
Chris@17:             } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@17:                 // No matter what token we end up using, we don't
Chris@17:                 // need the content in the buffer any more because we have
Chris@17:                 // found a valid token.
Chris@17:                 $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1);
Chris@17:                 if ($newContent !== '') {
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => T_STRING,
Chris@17:                         'type'    => 'T_STRING',
Chris@17:                         'content' => $newContent,
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput(substr($buffer, 0, -1));
Chris@17:                         echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@17:                 }
Chris@17: 
Chris@17:                 // The char is a token type, but we need to look ahead at the
Chris@17:                 // next chars to see if this is actually part of a larger token.
Chris@17:                 // For example, = and ===.
Chris@17:                 $charBuffer   = $char;
Chris@17:                 $matchedToken = false;
Chris@17:                 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@17:                     if (isset($chars[($i + $x)]) === false) {
Chris@17:                         break;
Chris@17:                     }
Chris@17: 
Chris@17:                     $charBuffer .= $chars[($i + $x)];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($charBuffer);
Chris@17:                         echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17:                         // We've found something larger that matches
Chris@17:                         // so we can ignore this char.
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             $type = $this->tokenValues[strtolower($charBuffer)];
Chris@17:                             echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@17:                         }
Chris@17: 
Chris@17:                         $matchedToken = true;
Chris@17:                         break;
Chris@17:                     }
Chris@17:                 }//end for
Chris@17: 
Chris@17:                 if ($matchedToken === false) {
Chris@17:                     $value    = $this->tokenValues[strtolower($char)];
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => constant($value),
Chris@17:                         'type'    => $value,
Chris@17:                         'content' => $char,
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17:                         $content = Util\Common::prepareForOutput($char);
Chris@17:                         echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $cleanBuffer = true;
Chris@17:                 } else {
Chris@17:                     $buffer = $char;
Chris@17:                 }//end if
Chris@17:             }//end if
Chris@17: 
Chris@17:             // Keep track of content inside comments.
Chris@17:             if ($inComment === ''
Chris@17:                 && array_key_exists($buffer, $this->commentTokens) === true
Chris@17:             ) {
Chris@17:                 // This is not really a comment if the content
Chris@17:                 // looks like \// (i.e., it is escaped).
Chris@17:                 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@17:                     $lastToken   = array_pop($tokens);
Chris@17:                     $lastContent = $lastToken['content'];
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $value   = $this->tokenValues[strtolower($lastContent)];
Chris@17:                         $content = Util\Common::prepareForOutput($lastContent);
Chris@17:                         echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $lastChars    = str_split($lastContent);
Chris@17:                     $lastNumChars = count($lastChars);
Chris@17:                     for ($x = 0; $x < $lastNumChars; $x++) {
Chris@17:                         $lastChar = $lastChars[$x];
Chris@17:                         $value    = $this->tokenValues[strtolower($lastChar)];
Chris@17:                         $tokens[] = [
Chris@17:                             'code'    => constant($value),
Chris@17:                             'type'    => $value,
Chris@17:                             'content' => $lastChar,
Chris@17:                         ];
Chris@17: 
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             $content = Util\Common::prepareForOutput($lastChar);
Chris@17:                             echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17:                         }
Chris@17:                     }
Chris@17:                 } else {
Chris@17:                     // We have started a comment.
Chris@17:                     $inComment = $buffer;
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }//end if
Chris@17:             } else if ($inComment !== '') {
Chris@17:                 if ($this->commentTokens[$inComment] === null) {
Chris@17:                     // Comment ends at the next newline.
Chris@17:                     if (strpos($buffer, "\n") !== false) {
Chris@17:                         $inComment = '';
Chris@17:                     }
Chris@17:                 } else {
Chris@17:                     if ($this->commentTokens[$inComment] === $buffer) {
Chris@17:                         $inComment = '';
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     if ($inComment === '') {
Chris@17:                         echo "\t\t* found end of comment *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($inComment === '' && $cleanBuffer === false) {
Chris@17:                     $tokens[] = [
Chris@17:                         'code'    => T_STRING,
Chris@17:                         'type'    => 'T_STRING',
Chris@17:                         'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17:                     ];
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $content = Util\Common::prepareForOutput($buffer);
Chris@17:                         echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     $buffer = '';
Chris@17:                 }
Chris@17:             }//end if
Chris@17: 
Chris@17:             if ($cleanBuffer === true) {
Chris@17:                 $buffer      = '';
Chris@17:                 $cleanBuffer = false;
Chris@17:             }
Chris@17:         }//end for
Chris@17: 
Chris@17:         if (empty($buffer) === false) {
Chris@17:             if ($inString !== '') {
Chris@17:                 // The string did not end before the end of the file,
Chris@17:                 // which means there was probably a syntax error somewhere.
Chris@17:                 $tokens[] = [
Chris@17:                     'code'    => T_STRING,
Chris@17:                     'type'    => 'T_STRING',
Chris@17:                     'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17:                 ];
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     $content = Util\Common::prepareForOutput($buffer);
Chris@17:                     echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17:                 }
Chris@17:             } else {
Chris@17:                 // Buffer contains whitespace from the end of the file.
Chris@17:                 $tokens[] = [
Chris@17:                     'code'    => T_WHITESPACE,
Chris@17:                     'type'    => 'T_WHITESPACE',
Chris@17:                     'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17:                 ];
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     $content = Util\Common::prepareForOutput($buffer);
Chris@17:                     echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17:                 }
Chris@17:             }//end if
Chris@17:         }//end if
Chris@17: 
Chris@17:         $tokens[] = [
Chris@17:             'code'    => T_CLOSE_TAG,
Chris@17:             'type'    => 'T_CLOSE_TAG',
Chris@17:             'content' => '',
Chris@17:         ];
Chris@17: 
Chris@17:         /*
Chris@17:             Now that we have done some basic tokenizing, we need to
Chris@17:             modify the tokens to join some together and split some apart
Chris@17:             so they match what the PHP tokenizer does.
Chris@17:         */
Chris@17: 
Chris@17:         $finalTokens = [];
Chris@17:         $newStackPtr = 0;
Chris@17:         $numTokens   = count($tokens);
Chris@17:         for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@17:             $token = $tokens[$stackPtr];
Chris@17: 
Chris@17:             /*
Chris@17:                 Look for comments and join the tokens together.
Chris@17:             */
Chris@17: 
Chris@17:             if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
Chris@17:                 $newContent   = '';
Chris@17:                 $tokenContent = $token['content'];
Chris@17: 
Chris@17:                 $endContent = null;
Chris@17:                 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@17:                     $endContent = $this->commentTokens[$tokenContent];
Chris@17:                 }
Chris@17: 
Chris@17:                 while ($tokenContent !== $endContent) {
Chris@17:                     if ($endContent === null
Chris@17:                         && strpos($tokenContent, $this->eolChar) !== false
Chris@17:                     ) {
Chris@17:                         // A null end token means the comment ends at the end of
Chris@17:                         // the line so we look for newlines and split the token.
Chris@17:                         $tokens[$stackPtr]['content'] = substr(
Chris@17:                             $tokenContent,
Chris@17:                             (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17:                         );
Chris@17: 
Chris@17:                         $tokenContent = substr(
Chris@17:                             $tokenContent,
Chris@17:                             0,
Chris@17:                             (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17:                         );
Chris@17: 
Chris@17:                         // If the substr failed, skip the token as the content
Chris@17:                         // will now be blank.
Chris@17:                         if ($tokens[$stackPtr]['content'] !== false
Chris@17:                             && $tokens[$stackPtr]['content'] !== ''
Chris@17:                         ) {
Chris@17:                             $stackPtr--;
Chris@17:                         }
Chris@17: 
Chris@17:                         break;
Chris@17:                     }//end if
Chris@17: 
Chris@17:                     $stackPtr++;
Chris@17:                     $newContent .= $tokenContent;
Chris@17:                     if (isset($tokens[$stackPtr]) === false) {
Chris@17:                         break;
Chris@17:                     }
Chris@17: 
Chris@17:                     $tokenContent = $tokens[$stackPtr]['content'];
Chris@17:                 }//end while
Chris@17: 
Chris@17:                 if ($token['code'] === T_DOC_COMMENT) {
Chris@17:                     $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr);
Chris@17:                     foreach ($commentTokens as $commentToken) {
Chris@17:                         $finalTokens[$newStackPtr] = $commentToken;
Chris@17:                         $newStackPtr++;
Chris@17:                     }
Chris@17: 
Chris@17:                     continue;
Chris@17:                 } else {
Chris@17:                     // Save the new content in the current token so
Chris@17:                     // the code below can chop it up on newlines.
Chris@17:                     $token['content'] = $newContent.$tokenContent;
Chris@17:                 }
Chris@17:             }//end if
Chris@17: 
Chris@17:             /*
Chris@17:                 If this token has newlines in its content, split each line up
Chris@17:                 and create a new token for each line. We do this so it's easier
Chris@17:                 to ascertain where errors occur on a line.
Chris@17:                 Note that $token[1] is the token's content.
Chris@17:             */
Chris@17: 
Chris@17:             if (strpos($token['content'], $this->eolChar) !== false) {
Chris@17:                 $tokenLines = explode($this->eolChar, $token['content']);
Chris@17:                 $numLines   = count($tokenLines);
Chris@17: 
Chris@17:                 for ($i = 0; $i < $numLines; $i++) {
Chris@18:                     $newToken = ['content' => $tokenLines[$i]];
Chris@17:                     if ($i === ($numLines - 1)) {
Chris@17:                         if ($tokenLines[$i] === '') {
Chris@17:                             break;
Chris@17:                         }
Chris@17:                     } else {
Chris@17:                         $newToken['content'] .= $this->eolChar;
Chris@17:                     }
Chris@17: 
Chris@17:                     $newToken['type']          = $token['type'];
Chris@17:                     $newToken['code']          = $token['code'];
Chris@17:                     $finalTokens[$newStackPtr] = $newToken;
Chris@17:                     $newStackPtr++;
Chris@17:                 }
Chris@17:             } else {
Chris@17:                 $finalTokens[$newStackPtr] = $token;
Chris@17:                 $newStackPtr++;
Chris@17:             }//end if
Chris@17: 
Chris@17:             // Convert numbers, including decimals.
Chris@17:             if ($token['code'] === T_STRING
Chris@17:                 || $token['code'] === T_OBJECT_OPERATOR
Chris@17:             ) {
Chris@17:                 $newContent  = '';
Chris@17:                 $oldStackPtr = $stackPtr;
Chris@17:                 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
Chris@17:                     $newContent .= $tokens[$stackPtr]['content'];
Chris@17:                     $stackPtr++;
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($newContent !== '' && $newContent !== '.') {
Chris@17:                     $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@17:                     if (ctype_digit($newContent) === true) {
Chris@17:                         $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@17:                         $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@17:                     } else {
Chris@17:                         $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@17:                         $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@17:                     }
Chris@17: 
Chris@17:                     $stackPtr--;
Chris@17:                     continue;
Chris@17:                 } else {
Chris@17:                     $stackPtr = $oldStackPtr;
Chris@17:                 }
Chris@17:             }//end if
Chris@17: 
Chris@17:             // Convert the token after an object operator into a string, in most cases.
Chris@17:             if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@17:                 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@17:                     if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@17:                         continue;
Chris@17:                     }
Chris@17: 
Chris@17:                     if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@17:                         && $tokens[$i]['code'] !== T_LNUMBER
Chris@17:                         && $tokens[$i]['code'] !== T_DNUMBER
Chris@17:                     ) {
Chris@17:                         $tokens[$i]['code'] = T_STRING;
Chris@17:                         $tokens[$i]['type'] = 'T_STRING';
Chris@17:                     }
Chris@17: 
Chris@17:                     break;
Chris@17:                 }
Chris@17:             }
Chris@17:         }//end for
Chris@17: 
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t*** END TOKENIZING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         return $finalTokens;
Chris@17: 
Chris@17:     }//end tokenize()
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Tokenizes a regular expression if one is found.
Chris@17:      *
Chris@17:      * If a regular expression is not found, NULL is returned.
Chris@17:      *
Chris@17:      * @param string $char   The index of the possible regex start character.
Chris@17:      * @param string $string The complete content of the string being tokenized.
Chris@17:      * @param string $chars  An array of characters being tokenized.
Chris@17:      * @param string $tokens The current array of tokens found in the string.
Chris@17:      *
Chris@18:      * @return array<string, string>|null
Chris@17:      */
Chris@17:     public function getRegexToken($char, $string, $chars, $tokens)
Chris@17:     {
Chris@17:         $beforeTokens = [
Chris@17:             T_EQUAL               => true,
Chris@17:             T_IS_NOT_EQUAL        => true,
Chris@17:             T_IS_IDENTICAL        => true,
Chris@17:             T_IS_NOT_IDENTICAL    => true,
Chris@17:             T_OPEN_PARENTHESIS    => true,
Chris@17:             T_OPEN_SQUARE_BRACKET => true,
Chris@17:             T_RETURN              => true,
Chris@17:             T_BOOLEAN_OR          => true,
Chris@17:             T_BOOLEAN_AND         => true,
Chris@17:             T_BOOLEAN_NOT         => true,
Chris@17:             T_BITWISE_OR          => true,
Chris@17:             T_BITWISE_AND         => true,
Chris@17:             T_COMMA               => true,
Chris@17:             T_COLON               => true,
Chris@17:             T_TYPEOF              => true,
Chris@17:             T_INLINE_THEN         => true,
Chris@17:             T_INLINE_ELSE         => true,
Chris@17:         ];
Chris@17: 
Chris@17:         $afterTokens = [
Chris@17:             ','            => true,
Chris@17:             ')'            => true,
Chris@17:             ']'            => true,
Chris@17:             ';'            => true,
Chris@17:             ' '            => true,
Chris@17:             '.'            => true,
Chris@17:             ':'            => true,
Chris@17:             $this->eolChar => true,
Chris@17:         ];
Chris@17: 
Chris@17:         // Find the last non-whitespace token that was added
Chris@17:         // to the tokens array.
Chris@17:         $numTokens = count($tokens);
Chris@17:         for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@17:             if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@17:                 break;
Chris@17:             }
Chris@17:         }
Chris@17: 
Chris@17:         if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@17:             return null;
Chris@17:         }
Chris@17: 
Chris@17:         // This is probably a regular expression, so look for the end of it.
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         $numChars = count($chars);
Chris@17:         for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@17:             if ($chars[$next] === '/') {
Chris@17:                 // Just make sure this is not escaped first.
Chris@17:                 if ($chars[($next - 1)] !== '\\') {
Chris@17:                     // In the simple form: /.../ so we found the end.
Chris@17:                     break;
Chris@17:                 } else if ($chars[($next - 2)] === '\\') {
Chris@17:                     // In the form: /...\\/ so we found the end.
Chris@17:                     break;
Chris@17:                 }
Chris@17:             } else {
Chris@17:                 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17:                 if ($possibleEolChar === $this->eolChar) {
Chris@17:                     // This is the last token on the line and regular
Chris@17:                     // expressions need to be defined on a single line,
Chris@17:                     // so this is not a regular expression.
Chris@17:                     break;
Chris@17:                 }
Chris@17:             }
Chris@17:         }
Chris@17: 
Chris@17:         if ($chars[$next] !== '/') {
Chris@17:             if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@17:             }
Chris@17: 
Chris@17:             return null;
Chris@17:         }
Chris@17: 
Chris@17:         while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
Chris@17:             // The token directly after the end of the regex can
Chris@17:             // be modifiers like global and case insensitive
Chris@17:             // (.e.g, /pattern/gi).
Chris@17:             $next++;
Chris@17:         }
Chris@17: 
Chris@17:         $regexEnd = $next;
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         for ($next += 1; $next < $numChars; $next++) {
Chris@17:             if ($chars[$next] !== ' ') {
Chris@17:                 break;
Chris@17:             } else {
Chris@17:                 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17:                 if ($possibleEolChar === $this->eolChar) {
Chris@17:                     // This is the last token on the line.
Chris@17:                     break;
Chris@17:                 }
Chris@17:             }
Chris@17:         }
Chris@17: 
Chris@17:         if (isset($afterTokens[$chars[$next]]) === false) {
Chris@17:             if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@17:             }
Chris@17: 
Chris@17:             return null;
Chris@17:         }
Chris@17: 
Chris@17:         // This is a regular expression, so join all the tokens together.
Chris@17:         $content = '';
Chris@17:         for ($x = $char; $x <= $regexEnd; $x++) {
Chris@17:             $content .= $chars[$x];
Chris@17:         }
Chris@17: 
Chris@17:         $token = [
Chris@17:             'start'   => $char,
Chris@17:             'end'     => $regexEnd,
Chris@17:             'content' => $content,
Chris@17:         ];
Chris@17: 
Chris@17:         return $token;
Chris@17: 
Chris@17:     }//end getRegexToken()
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Performs additional processing after main tokenizing.
Chris@17:      *
Chris@17:      * This additional processing looks for properties, closures, labels and objects.
Chris@17:      *
Chris@17:      * @return void
Chris@17:      */
Chris@17:     public function processAdditional()
Chris@17:     {
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         $numTokens  = count($this->tokens);
Chris@17:         $classStack = [];
Chris@17: 
Chris@17:         for ($i = 0; $i < $numTokens; $i++) {
Chris@17:             if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                 $type    = $this->tokens[$i]['type'];
Chris@17:                 $content = Util\Common::prepareForOutput($this->tokens[$i]['content']);
Chris@17: 
Chris@17:                 echo str_repeat("\t", count($classStack));
Chris@17:                 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@17:             }
Chris@17: 
Chris@17:             // Looking for functions that are actually closures.
Chris@17:             if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) {
Chris@17:                 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@17:                     if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) {
Chris@17:                         break;
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@17:                     $this->tokens[$i]['code'] = T_CLOSURE;
Chris@17:                     $this->tokens[$i]['type'] = 'T_CLOSURE';
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $line = $this->tokens[$i]['line'];
Chris@17:                         echo str_repeat("\t", count($classStack));
Chris@17:                         echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL;
Chris@17:                     }
Chris@17: 
Chris@17:                     for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
Chris@17:                         if (isset($this->tokens[$x]['conditions'][$i]) === false) {
Chris@17:                             continue;
Chris@17:                         }
Chris@17: 
Chris@17:                         $this->tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             $type = $this->tokens[$x]['type'];
Chris@17:                             echo str_repeat("\t", count($classStack));
Chris@17:                             echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@17:                         }
Chris@17:                     }
Chris@17:                 }//end if
Chris@17: 
Chris@17:                 continue;
Chris@17:             } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@17:                 && isset($this->tokens[$i]['scope_condition']) === false
Chris@17:                 && isset($this->tokens[$i]['bracket_closer']) === true
Chris@17:             ) {
Chris@18:                 $condition = $this->tokens[$i]['conditions'];
Chris@18:                 $condition = end($condition);
Chris@17:                 if ($condition === T_CLASS) {
Chris@17:                     // Possibly an ES6 method. To be classified as one, the previous
Chris@17:                     // non-empty tokens need to be a set of parenthesis, and then a string
Chris@17:                     // (the method name).
Chris@17:                     for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) {
Chris@17:                         if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) {
Chris@17:                             break;
Chris@17:                         }
Chris@17:                     }
Chris@17: 
Chris@17:                     if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) {
Chris@17:                         $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener'];
Chris@17:                         for ($name = ($parenOpener - 1); $name > 0; $name--) {
Chris@17:                             if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) {
Chris@17:                                 break;
Chris@17:                             }
Chris@17:                         }
Chris@17: 
Chris@17:                         if ($this->tokens[$name]['code'] === T_STRING) {
Chris@17:                             // We found a method name.
Chris@17:                             if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                                 $line = $this->tokens[$name]['line'];
Chris@17:                                 echo str_repeat("\t", count($classStack));
Chris@17:                                 echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL;
Chris@17:                             }
Chris@17: 
Chris@17:                             $closer = $this->tokens[$i]['bracket_closer'];
Chris@17: 
Chris@17:                             $this->tokens[$name]['code'] = T_FUNCTION;
Chris@17:                             $this->tokens[$name]['type'] = 'T_FUNCTION';
Chris@17: 
Chris@17:                             foreach ([$name, $i, $closer] as $token) {
Chris@17:                                 $this->tokens[$token]['scope_condition']    = $name;
Chris@17:                                 $this->tokens[$token]['scope_opener']       = $i;
Chris@17:                                 $this->tokens[$token]['scope_closer']       = $closer;
Chris@17:                                 $this->tokens[$token]['parenthesis_opener'] = $parenOpener;
Chris@17:                                 $this->tokens[$token]['parenthesis_closer'] = $parenCloser;
Chris@17:                                 $this->tokens[$token]['parenthesis_owner']  = $name;
Chris@17:                             }
Chris@17: 
Chris@17:                             $this->tokens[$parenOpener]['parenthesis_owner'] = $name;
Chris@17:                             $this->tokens[$parenCloser]['parenthesis_owner'] = $name;
Chris@17: 
Chris@17:                             for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17:                                 $this->tokens[$x]['conditions'][$name] = T_FUNCTION;
Chris@17:                                 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17:                                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                                     $type = $this->tokens[$x]['type'];
Chris@17:                                     echo str_repeat("\t", count($classStack));
Chris@17:                                     echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL;
Chris@17:                                 }
Chris@17:                             }
Chris@17: 
Chris@17:                             continue;
Chris@17:                         }//end if
Chris@17:                     }//end if
Chris@17:                 }//end if
Chris@17: 
Chris@17:                 $classStack[] = $i;
Chris@17: 
Chris@17:                 $closer = $this->tokens[$i]['bracket_closer'];
Chris@17:                 $this->tokens[$i]['code']      = T_OBJECT;
Chris@17:                 $this->tokens[$i]['type']      = 'T_OBJECT';
Chris@17:                 $this->tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@17:                 $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     echo str_repeat("\t", count($classStack));
Chris@17:                     echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@17:                     echo str_repeat("\t", count($classStack));
Chris@17:                     echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@17:                 }
Chris@17: 
Chris@17:                 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17:                     $this->tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@17:                     ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         $type = $this->tokens[$x]['type'];
Chris@17:                         echo str_repeat("\t", count($classStack));
Chris@17:                         echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }
Chris@17:             } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@17:                 $opener = array_pop($classStack);
Chris@17:             } else if ($this->tokens[$i]['code'] === T_COLON) {
Chris@17:                 // If it is a scope opener, it belongs to a
Chris@17:                 // DEFAULT or CASE statement.
Chris@17:                 if (isset($this->tokens[$i]['scope_condition']) === true) {
Chris@17:                     continue;
Chris@17:                 }
Chris@17: 
Chris@17:                 // Make sure this is not part of an inline IF statement.
Chris@17:                 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17:                     if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
Chris@17:                         $this->tokens[$i]['code'] = T_INLINE_ELSE;
Chris@17:                         $this->tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@17: 
Chris@17:                         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                             echo str_repeat("\t", count($classStack));
Chris@17:                             echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@17:                         }
Chris@17: 
Chris@17:                         continue(2);
Chris@17:                     } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) {
Chris@17:                         break;
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 // The string to the left of the colon is either a property or label.
Chris@17:                 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@17:                     if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) {
Chris@17:                         break;
Chris@17:                     }
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($this->tokens[$label]['code'] !== T_STRING
Chris@17:                     && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@17:                 ) {
Chris@17:                     continue;
Chris@17:                 }
Chris@17: 
Chris@17:                 if (empty($classStack) === false) {
Chris@17:                     $this->tokens[$label]['code'] = T_PROPERTY;
Chris@17:                     $this->tokens[$label]['type'] = 'T_PROPERTY';
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo str_repeat("\t", count($classStack));
Chris@17:                         echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@17:                     }
Chris@17:                 } else {
Chris@17:                     $this->tokens[$label]['code'] = T_LABEL;
Chris@17:                     $this->tokens[$label]['type'] = 'T_LABEL';
Chris@17: 
Chris@17:                     if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                         echo str_repeat("\t", count($classStack));
Chris@17:                         echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@17:                     }
Chris@17:                 }//end if
Chris@17:             }//end if
Chris@17:         }//end for
Chris@17: 
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:     }//end processAdditional()
Chris@17: 
Chris@17: 
Chris@17: }//end class