Mercurial > hg > isophonics-drupal-site

diff vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/JS.php @ 0:4c8ae668cc8c
Initial import (non-working)
author: Chris Cannam
date: Wed, 29 Nov 2017 16:09:58 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/JS.php	Wed Nov 29 16:09:58 2017 +0000
@@ -0,0 +1,1179 @@
+<?php
+/**
+ * Tokenizes JS code.
+ *
+ * PHP version 5
+ *
+ * @category  PHP
+ * @package   PHP_CodeSniffer
+ * @author    Greg Sherwood <gsherwood@squiz.net>
+ * @author    Marc McIntyre <mmcintyre@squiz.net>
+ * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
+ * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
+ * @link      http://pear.php.net/package/PHP_CodeSniffer
+ */
+
+/**
+ * Tokenizes JS code.
+ *
+ * @category  PHP
+ * @package   PHP_CodeSniffer
+ * @author    Greg Sherwood <gsherwood@squiz.net>
+ * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
+ * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
+ * @version   Release: @package_version@
+ * @link      http://pear.php.net/package/PHP_CodeSniffer
+ */
+class PHP_CodeSniffer_Tokenizers_JS
+{
+
+    /**
+     * If TRUE, files that appear to be minified will not be processed.
+     *
+     * @var boolean
+     */
+    public $skipMinified = true;
+
+    /**
+     * A list of tokens that are allowed to open a scope.
+     *
+     * This array also contains information about what kind of token the scope
+     * opener uses to open and close the scope, if the token strictly requires
+     * an opener, if the token can share a scope closer, and who it can be shared
+     * with. An example of a token that shares a scope closer is a CASE scope.
+     *
+     * @var array
+     */
+    public $scopeOpeners = array(
+                            T_IF       => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => false,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_TRY      => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => true,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_CATCH    => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => true,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_ELSE     => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => false,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_FOR      => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => false,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_FUNCTION => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => false,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_WHILE    => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => false,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_DO       => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => true,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_SWITCH   => array(
+                                           'start'  => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
+                                           'end'    => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
+                                           'strict' => true,
+                                           'shared' => false,
+                                           'with'   => array(),
+                                          ),
+                            T_CASE     => array(
+                                           'start'  => array(T_COLON => T_COLON),
+                                           'end'    => array(
+                                                        T_BREAK    => T_BREAK,
+                                                        T_RETURN   => T_RETURN,
+                                                        T_CONTINUE => T_CONTINUE,
+                                                        T_THROW    => T_THROW,
+                                                       ),
+                                           'strict' => true,
+                                           'shared' => true,
+                                           'with'   => array(
+                                                        T_DEFAULT => T_DEFAULT,
+                                                        T_CASE    => T_CASE,
+                                                        T_SWITCH  => T_SWITCH,
+                                                       ),
+                                          ),
+                            T_DEFAULT  => array(
+                                           'start'  => array(T_COLON => T_COLON),
+                                           'end'    => array(
+                                                        T_BREAK    => T_BREAK,
+                                                        T_RETURN   => T_RETURN,
+                                                        T_CONTINUE => T_CONTINUE,
+                                                        T_THROW    => T_THROW,
+                                                       ),
+                                           'strict' => true,
+                                           'shared' => true,
+                                           'with'   => array(
+                                                        T_CASE   => T_CASE,
+                                                        T_SWITCH => T_SWITCH,
+                                                       ),
+                                          ),
+                           );
+
+    /**
+     * A list of tokens that end the scope.
+     *
+     * This array is just a unique collection of the end tokens
+     * from the _scopeOpeners array. The data is duplicated here to
+     * save time during parsing of the file.
+     *
+     * @var array
+     */
+    public $endScopeTokens = array(
+                              T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
+                              T_BREAK               => T_BREAK,
+                             );
+
+    /**
+     * A list of special JS tokens and their types.
+     *
+     * @var array
+     */
+    protected $tokenValues = array(
+                              'function'  => 'T_FUNCTION',
+                              'prototype' => 'T_PROTOTYPE',
+                              'try'       => 'T_TRY',
+                              'catch'     => 'T_CATCH',
+                              'return'    => 'T_RETURN',
+                              'throw'     => 'T_THROW',
+                              'break'     => 'T_BREAK',
+                              'switch'    => 'T_SWITCH',
+                              'continue'  => 'T_CONTINUE',
+                              'if'        => 'T_IF',
+                              'else'      => 'T_ELSE',
+                              'do'        => 'T_DO',
+                              'while'     => 'T_WHILE',
+                              'for'       => 'T_FOR',
+                              'var'       => 'T_VAR',
+                              'case'      => 'T_CASE',
+                              'default'   => 'T_DEFAULT',
+                              'true'      => 'T_TRUE',
+                              'false'     => 'T_FALSE',
+                              'null'      => 'T_NULL',
+                              'this'      => 'T_THIS',
+                              'typeof'    => 'T_TYPEOF',
+                              '('         => 'T_OPEN_PARENTHESIS',
+                              ')'         => 'T_CLOSE_PARENTHESIS',
+                              '{'         => 'T_OPEN_CURLY_BRACKET',
+                              '}'         => 'T_CLOSE_CURLY_BRACKET',
+                              '['         => 'T_OPEN_SQUARE_BRACKET',
+                              ']'         => 'T_CLOSE_SQUARE_BRACKET',
+                              '?'         => 'T_INLINE_THEN',
+                              '.'         => 'T_OBJECT_OPERATOR',
+                              '+'         => 'T_PLUS',
+                              '-'         => 'T_MINUS',
+                              '*'         => 'T_MULTIPLY',
+                              '%'         => 'T_MODULUS',
+                              '/'         => 'T_DIVIDE',
+                              '^'         => 'T_LOGICAL_XOR',
+                              ','         => 'T_COMMA',
+                              ';'         => 'T_SEMICOLON',
+                              ':'         => 'T_COLON',
+                              '<'         => 'T_LESS_THAN',
+                              '>'         => 'T_GREATER_THAN',
+                              '<<'        => 'T_SL',
+                              '>>'        => 'T_SR',
+                              '>>>'       => 'T_ZSR',
+                              '<<='       => 'T_SL_EQUAL',
+                              '>>='       => 'T_SR_EQUAL',
+                              '>>>='      => 'T_ZSR_EQUAL',
+                              '<='        => 'T_IS_SMALLER_OR_EQUAL',
+                              '>='        => 'T_IS_GREATER_OR_EQUAL',
+                              '=>'        => 'T_DOUBLE_ARROW',
+                              '!'         => 'T_BOOLEAN_NOT',
+                              '||'        => 'T_BOOLEAN_OR',
+                              '&&'        => 'T_BOOLEAN_AND',
+                              '|'         => 'T_BITWISE_OR',
+                              '&'         => 'T_BITWISE_AND',
+                              '!='        => 'T_IS_NOT_EQUAL',
+                              '!=='       => 'T_IS_NOT_IDENTICAL',
+                              '='         => 'T_EQUAL',
+                              '=='        => 'T_IS_EQUAL',
+                              '==='       => 'T_IS_IDENTICAL',
+                              '-='        => 'T_MINUS_EQUAL',
+                              '+='        => 'T_PLUS_EQUAL',
+                              '*='        => 'T_MUL_EQUAL',
+                              '/='        => 'T_DIV_EQUAL',
+                              '%='        => 'T_MOD_EQUAL',
+                              '++'        => 'T_INC',
+                              '--'        => 'T_DEC',
+                              '//'        => 'T_COMMENT',
+                              '/*'        => 'T_COMMENT',
+                              '/**'       => 'T_DOC_COMMENT',
+                              '*/'        => 'T_COMMENT',
+                             );
+
+    /**
+     * A list string delimiters.
+     *
+     * @var array
+     */
+    protected $stringTokens = array(
+                               '\'' => '\'',
+                               '"'  => '"',
+                              );
+
+    /**
+     * A list tokens that start and end comments.
+     *
+     * @var array
+     */
+    protected $commentTokens = array(
+                                '//'  => null,
+                                '/*'  => '*/',
+                                '/**' => '*/',
+                               );
+
+
+    /**
+     * Creates an array of tokens when given some JS code.
+     *
+     * @param string $string  The string to tokenize.
+     * @param string $eolChar The EOL character to use for splitting strings.
+     *
+     * @return array
+     */
+    public function tokenizeString($string, $eolChar='\n')
+    {
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t*** START JS TOKENIZING ***".PHP_EOL;
+        }
+
+        $maxTokenLength = 0;
+        foreach ($this->tokenValues as $token => $values) {
+            if (strlen($token) > $maxTokenLength) {
+                $maxTokenLength = strlen($token);
+            }
+        }
+
+        $tokens          = array();
+        $inString        = '';
+        $stringChar      = null;
+        $inComment       = '';
+        $buffer          = '';
+        $preStringBuffer = '';
+        $cleanBuffer     = false;
+
+        $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
+
+        $tokens[] = array(
+                     'code'    => T_OPEN_TAG,
+                     'type'    => 'T_OPEN_TAG',
+                     'content' => '',
+                    );
+
+        // Convert newlines to single characters for ease of
+        // processing. We will change them back later.
+        $string = str_replace($eolChar, "\n", $string);
+
+        $chars    = str_split($string);
+        $numChars = count($chars);
+        for ($i = 0; $i < $numChars; $i++) {
+            $char = $chars[$i];
+
+            if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                $content       = PHP_CodeSniffer::prepareForOutput($char);
+                $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
+
+                if ($inString !== '') {
+                    echo "\t";
+                }
+
+                if ($inComment !== '') {
+                    echo "\t";
+                }
+
+                echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
+            }//end if
+
+            if ($inString === '' && $inComment === '' && $buffer !== '') {
+                // If the buffer only has whitespace and we are about to
+                // add a character, store the whitespace first.
+                if (trim($char) !== '' && trim($buffer) === '') {
+                    $tokens[] = array(
+                                 'code'    => T_WHITESPACE,
+                                 'type'    => 'T_WHITESPACE',
+                                 'content' => str_replace("\n", $eolChar, $buffer),
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
+                        echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
+                    }
+
+                    $buffer = '';
+                }
+
+                // If the buffer is not whitespace and we are about to
+                // add a whitespace character, store the content first.
+                if ($inString === ''
+                    && $inComment === ''
+                    && trim($char) === ''
+                    && trim($buffer) !== ''
+                ) {
+                    $tokens[] = array(
+                                 'code'    => T_STRING,
+                                 'type'    => 'T_STRING',
+                                 'content' => str_replace("\n", $eolChar, $buffer),
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
+                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
+                    }
+
+                    $buffer = '';
+                }
+            }//end if
+
+            // Process strings.
+            if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
+                if ($inString === $char) {
+                    // This could be the end of the string, but make sure it
+                    // is not escaped first.
+                    $escapes = 0;
+                    for ($x = ($i - 1); $x >= 0; $x--) {
+                        if ($chars[$x] !== '\\') {
+                            break;
+                        }
+
+                        $escapes++;
+                    }
+
+                    if ($escapes === 0 || ($escapes % 2) === 0) {
+                        // There is an even number escape chars,
+                        // so this is not escaped, it is the end of the string.
+                        $tokens[] = array(
+                                     'code'    => T_CONSTANT_ENCAPSED_STRING,
+                                     'type'    => 'T_CONSTANT_ENCAPSED_STRING',
+                                     'content' => str_replace("\n", $eolChar, $buffer).$char,
+                                    );
+
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            echo "\t\t* found end of string *".PHP_EOL;
+                            $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
+                            echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
+                        }
+
+                        $buffer          = '';
+                        $preStringBuffer = '';
+                        $inString        = '';
+                        $stringChar      = null;
+                        continue;
+                    }//end if
+                } else if ($inString === '') {
+                    $inString        = $char;
+                    $stringChar      = $i;
+                    $preStringBuffer = $buffer;
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo "\t\t* looking for string closer *".PHP_EOL;
+                    }
+                }//end if
+            }//end if
+
+            if ($inString !== '' && $char === "\n") {
+                // Unless this newline character is escaped, the string did not
+                // end before the end of the line, which means it probably
+                // wasn't a string at all (maybe a regex).
+                if ($chars[($i - 1)] !== '\\') {
+                    $i      = $stringChar;
+                    $buffer = $preStringBuffer;
+                    $preStringBuffer = '';
+                    $inString        = '';
+                    $stringChar      = null;
+                    $char            = $chars[$i];
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
+                    }
+                }
+            }
+
+            $buffer .= $char;
+
+            // We don't look for special tokens inside strings,
+            // so if we are in a string, we can continue here now
+            // that the current char is in the buffer.
+            if ($inString !== '') {
+                continue;
+            }
+
+            // Special case for T_DIVIDE which can actually be
+            // the start of a regular expression.
+            if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
+                $regex = $this->getRegexToken(
+                    $i,
+                    $string,
+                    $chars,
+                    $tokens,
+                    $eolChar
+                );
+
+                if ($regex !== null) {
+                    $tokens[] = array(
+                                 'code'    => T_REGULAR_EXPRESSION,
+                                 'type'    => 'T_REGULAR_EXPRESSION',
+                                 'content' => $regex['content'],
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
+                        echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
+                    }
+
+                    $i           = $regex['end'];
+                    $buffer      = '';
+                    $cleanBuffer = false;
+                    continue;
+                }//end if
+            }//end if
+
+            // Check for known tokens, but ignore tokens found that are not at
+            // the end of a string, like FOR and this.FORmat.
+            if (isset($this->tokenValues[strtolower($buffer)]) === true
+                && (preg_match('|[a-zA-z0-9_]|', $char) === 0
+                || isset($chars[($i + 1)]) === false
+                || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
+            ) {
+                $matchedToken    = false;
+                $lookAheadLength = ($maxTokenLength - strlen($buffer));
+
+                if ($lookAheadLength > 0) {
+                    // The buffer contains a token type, but we need
+                    // to look ahead at the next chars to see if this is
+                    // actually part of a larger token. For example,
+                    // FOR and FOREACH.
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
+                    }
+
+                    $charBuffer = $buffer;
+                    for ($x = 1; $x <= $lookAheadLength; $x++) {
+                        if (isset($chars[($i + $x)]) === false) {
+                            break;
+                        }
+
+                        $charBuffer .= $chars[($i + $x)];
+
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
+                            echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
+                        }
+
+                        if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
+                            // We've found something larger that matches
+                            // so we can ignore this char. Except for 1 very specific
+                            // case where a comment like /**/ needs to tokenize as
+                            // T_COMMENT and not T_DOC_COMMENT.
+                            $oldType = $this->tokenValues[strtolower($buffer)];
+                            $newType = $this->tokenValues[strtolower($charBuffer)];
+                            if ($oldType === 'T_COMMENT'
+                                && $newType === 'T_DOC_COMMENT'
+                                && $chars[($i + $x + 1)] === '/'
+                            ) {
+                                if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                                    echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
+                                }
+                            } else {
+                                if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                                    echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
+                                }
+
+                                $matchedToken = true;
+                                break;
+                            }
+                        }//end if
+                    }//end for
+                }//end if
+
+                if ($matchedToken === false) {
+                    if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
+                        echo "\t\t* look ahead found nothing *".PHP_EOL;
+                    }
+
+                    $value = $this->tokenValues[strtolower($buffer)];
+
+                    if ($value === 'T_FUNCTION' && $buffer !== 'function') {
+                        // The function keyword needs to be all lowercase or else
+                        // it is just a function called "Function".
+                        $value = 'T_STRING';
+                    }
+
+                    $tokens[] = array(
+                                 'code'    => constant($value),
+                                 'type'    => $value,
+                                 'content' => $buffer,
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
+                        echo "\t=> Added token $value ($content)".PHP_EOL;
+                    }
+
+                    $cleanBuffer = true;
+                }//end if
+            } else if (isset($this->tokenValues[strtolower($char)]) === true) {
+                // No matter what token we end up using, we don't
+                // need the content in the buffer any more because we have
+                // found a valid token.
+                $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
+                if ($newContent !== '') {
+                    $tokens[] = array(
+                                 'code'    => T_STRING,
+                                 'type'    => 'T_STRING',
+                                 'content' => $newContent,
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
+                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
+                    }
+                }
+
+                if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                    echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
+                }
+
+                // The char is a token type, but we need to look ahead at the
+                // next chars to see if this is actually part of a larger token.
+                // For example, = and ===.
+                $charBuffer   = $char;
+                $matchedToken = false;
+                for ($x = 1; $x <= $maxTokenLength; $x++) {
+                    if (isset($chars[($i + $x)]) === false) {
+                        break;
+                    }
+
+                    $charBuffer .= $chars[($i + $x)];
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
+                        echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
+                    }
+
+                    if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
+                        // We've found something larger that matches
+                        // so we can ignore this char.
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            $type = $this->tokenValues[strtolower($charBuffer)];
+                            echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
+                        }
+
+                        $matchedToken = true;
+                        break;
+                    }
+                }//end for
+
+                if ($matchedToken === false) {
+                    $value    = $this->tokenValues[strtolower($char)];
+                    $tokens[] = array(
+                                 'code'    => constant($value),
+                                 'type'    => $value,
+                                 'content' => $char,
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo "\t\t* look ahead found nothing *".PHP_EOL;
+                        $content = PHP_CodeSniffer::prepareForOutput($char);
+                        echo "\t=> Added token $value ($content)".PHP_EOL;
+                    }
+
+                    $cleanBuffer = true;
+                } else {
+                    $buffer = $char;
+                }//end if
+            }//end if
+
+            // Keep track of content inside comments.
+            if ($inComment === ''
+                && array_key_exists($buffer, $this->commentTokens) === true
+            ) {
+                // This is not really a comment if the content
+                // looks like \// (i.e., it is escaped).
+                if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
+                    $lastToken   = array_pop($tokens);
+                    $lastContent = $lastToken['content'];
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $value   = $this->tokenValues[strtolower($lastContent)];
+                        $content = PHP_CodeSniffer::prepareForOutput($lastContent);
+                        echo "\t=> Removed token $value ($content)".PHP_EOL;
+                    }
+
+                    $lastChars    = str_split($lastContent);
+                    $lastNumChars = count($lastChars);
+                    for ($x = 0; $x < $lastNumChars; $x++) {
+                        $lastChar = $lastChars[$x];
+                        $value    = $this->tokenValues[strtolower($lastChar)];
+                        $tokens[] = array(
+                                     'code'    => constant($value),
+                                     'type'    => $value,
+                                     'content' => $lastChar,
+                                    );
+
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            $content = PHP_CodeSniffer::prepareForOutput($lastChar);
+                            echo "\t=> Added token $value ($content)".PHP_EOL;
+                        }
+                    }
+                } else {
+                    // We have started a comment.
+                    $inComment = $buffer;
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo "\t\t* looking for end of comment *".PHP_EOL;
+                    }
+                }//end if
+            } else if ($inComment !== '') {
+                if ($this->commentTokens[$inComment] === null) {
+                    // Comment ends at the next newline.
+                    if (strpos($buffer, "\n") !== false) {
+                        $inComment = '';
+                    }
+                } else {
+                    if ($this->commentTokens[$inComment] === $buffer) {
+                        $inComment = '';
+                    }
+                }
+
+                if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                    if ($inComment === '') {
+                        echo "\t\t* found end of comment *".PHP_EOL;
+                    }
+                }
+
+                if ($inComment === '' && $cleanBuffer === false) {
+                    $tokens[] = array(
+                                 'code'    => T_STRING,
+                                 'type'    => 'T_STRING',
+                                 'content' => str_replace("\n", $eolChar, $buffer),
+                                );
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $content = PHP_CodeSniffer::prepareForOutput($buffer);
+                        echo "\t=> Added token T_STRING ($content)".PHP_EOL;
+                    }
+
+                    $buffer = '';
+                }
+            }//end if
+
+            if ($cleanBuffer === true) {
+                $buffer      = '';
+                $cleanBuffer = false;
+            }
+        }//end for
+
+        if (empty($buffer) === false) {
+            // Buffer contains whitespace from the end of the file.
+            $tokens[] = array(
+                         'code'    => T_WHITESPACE,
+                         'type'    => 'T_WHITESPACE',
+                         'content' => str_replace("\n", $eolChar, $buffer),
+                        );
+
+            if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                $content = PHP_CodeSniffer::prepareForOutput($buffer);
+                echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
+            }
+        }
+
+        $tokens[] = array(
+                     'code'    => T_CLOSE_TAG,
+                     'type'    => 'T_CLOSE_TAG',
+                     'content' => '',
+                    );
+
+        /*
+            Now that we have done some basic tokenizing, we need to
+            modify the tokens to join some together and split some apart
+            so they match what the PHP tokenizer does.
+        */
+
+        $finalTokens = array();
+        $newStackPtr = 0;
+        $numTokens   = count($tokens);
+        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
+            $token = $tokens[$stackPtr];
+
+            /*
+                Look for comments and join the tokens together.
+            */
+
+            if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
+                $newContent   = '';
+                $tokenContent = $token['content'];
+
+                $endContent = null;
+                if (isset($this->commentTokens[$tokenContent]) === true) {
+                    $endContent = $this->commentTokens[$tokenContent];
+                }
+
+                while ($tokenContent !== $endContent) {
+                    if ($endContent === null
+                        && strpos($tokenContent, $eolChar) !== false
+                    ) {
+                        // A null end token means the comment ends at the end of
+                        // the line so we look for newlines and split the token.
+                        $tokens[$stackPtr]['content'] = substr(
+                            $tokenContent,
+                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
+                        );
+
+                        $tokenContent = substr(
+                            $tokenContent,
+                            0,
+                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
+                        );
+
+                        // If the substr failed, skip the token as the content
+                        // will now be blank.
+                        if ($tokens[$stackPtr]['content'] !== false
+                            && $tokens[$stackPtr]['content'] !== ''
+                        ) {
+                            $stackPtr--;
+                        }
+
+                        break;
+                    }//end if
+
+                    $stackPtr++;
+                    $newContent .= $tokenContent;
+                    if (isset($tokens[$stackPtr]) === false) {
+                        break;
+                    }
+
+                    $tokenContent = $tokens[$stackPtr]['content'];
+                }//end while
+
+                if ($token['code'] === T_DOC_COMMENT) {
+                    $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
+                    foreach ($commentTokens as $commentToken) {
+                        $finalTokens[$newStackPtr] = $commentToken;
+                        $newStackPtr++;
+                    }
+
+                    continue;
+                } else {
+                    // Save the new content in the current token so
+                    // the code below can chop it up on newlines.
+                    $token['content'] = $newContent.$tokenContent;
+                }
+            }//end if
+
+            /*
+                If this token has newlines in its content, split each line up
+                and create a new token for each line. We do this so it's easier
+                to ascertain where errors occur on a line.
+                Note that $token[1] is the token's content.
+            */
+
+            if (strpos($token['content'], $eolChar) !== false) {
+                $tokenLines = explode($eolChar, $token['content']);
+                $numLines   = count($tokenLines);
+
+                for ($i = 0; $i < $numLines; $i++) {
+                    $newToken['content'] = $tokenLines[$i];
+                    if ($i === ($numLines - 1)) {
+                        if ($tokenLines[$i] === '') {
+                            break;
+                        }
+                    } else {
+                        $newToken['content'] .= $eolChar;
+                    }
+
+                    $newToken['type']          = $token['type'];
+                    $newToken['code']          = $token['code'];
+                    $finalTokens[$newStackPtr] = $newToken;
+                    $newStackPtr++;
+                }
+            } else {
+                $finalTokens[$newStackPtr] = $token;
+                $newStackPtr++;
+            }//end if
+
+            // Convert numbers, including decimals.
+            if ($token['code'] === T_STRING
+                || $token['code'] === T_OBJECT_OPERATOR
+            ) {
+                $newContent  = '';
+                $oldStackPtr = $stackPtr;
+                while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
+                    $newContent .= $tokens[$stackPtr]['content'];
+                    $stackPtr++;
+                }
+
+                if ($newContent !== '' && $newContent !== '.') {
+                    $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
+                    if (ctype_digit($newContent) === true) {
+                        $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
+                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
+                    } else {
+                        $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
+                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
+                    }
+
+                    $stackPtr--;
+                    continue;
+                } else {
+                    $stackPtr = $oldStackPtr;
+                }
+            }//end if
+
+            // Convert the token after an object operator into a string, in most cases.
+            if ($token['code'] === T_OBJECT_OPERATOR) {
+                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
+                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
+                        continue;
+                    }
+
+                    if ($tokens[$i]['code'] !== T_PROTOTYPE
+                        && $tokens[$i]['code'] !== T_LNUMBER
+                        && $tokens[$i]['code'] !== T_DNUMBER
+                    ) {
+                        $tokens[$i]['code'] = T_STRING;
+                        $tokens[$i]['type'] = 'T_STRING';
+                    }
+
+                    break;
+                }
+            }
+        }//end for
+
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t*** END TOKENIZING ***".PHP_EOL;
+        }
+
+        return $finalTokens;
+
+    }//end tokenizeString()
+
+
+    /**
+     * Tokenizes a regular expression if one is found.
+     *
+     * If a regular expression is not found, NULL is returned.
+     *
+     * @param string $char    The index of the possible regex start character.
+     * @param string $string  The complete content of the string being tokenized.
+     * @param string $chars   An array of characters being tokenized.
+     * @param string $tokens  The current array of tokens found in the string.
+     * @param string $eolChar The EOL character to use for splitting strings.
+     *
+     * @return void
+     */
+    public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
+    {
+        $beforeTokens = array(
+                         T_EQUAL               => true,
+                         T_IS_NOT_EQUAL        => true,
+                         T_IS_IDENTICAL        => true,
+                         T_IS_NOT_IDENTICAL    => true,
+                         T_OPEN_PARENTHESIS    => true,
+                         T_OPEN_SQUARE_BRACKET => true,
+                         T_RETURN              => true,
+                         T_BOOLEAN_OR          => true,
+                         T_BOOLEAN_AND         => true,
+                         T_BITWISE_OR          => true,
+                         T_BITWISE_AND         => true,
+                         T_COMMA               => true,
+                         T_COLON               => true,
+                         T_TYPEOF              => true,
+                         T_INLINE_THEN         => true,
+                         T_INLINE_ELSE         => true,
+                        );
+
+        $afterTokens = array(
+                        ','      => true,
+                        ')'      => true,
+                        ']'      => true,
+                        ';'      => true,
+                        ' '      => true,
+                        '.'      => true,
+                        ':'      => true,
+                        $eolChar => true,
+                       );
+
+        // Find the last non-whitespace token that was added
+        // to the tokens array.
+        $numTokens = count($tokens);
+        for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
+            if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
+                break;
+            }
+        }
+
+        if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
+            return null;
+        }
+
+        // This is probably a regular expression, so look for the end of it.
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t* token possibly starts a regular expression *".PHP_EOL;
+        }
+
+        $numChars = count($chars);
+        for ($next = ($char + 1); $next < $numChars; $next++) {
+            if ($chars[$next] === '/') {
+                // Just make sure this is not escaped first.
+                if ($chars[($next - 1)] !== '\\') {
+                    // In the simple form: /.../ so we found the end.
+                    break;
+                } else if ($chars[($next - 2)] === '\\') {
+                    // In the form: /...\\/ so we found the end.
+                    break;
+                }
+            } else {
+                $possibleEolChar = substr($string, $next, strlen($eolChar));
+                if ($possibleEolChar === $eolChar) {
+                    // This is the last token on the line and regular
+                    // expressions need to be defined on a single line,
+                    // so this is not a regular expression.
+                    break;
+                }
+            }
+        }
+
+        if ($chars[$next] !== '/') {
+            if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                echo "\t* could not find end of regular expression *".PHP_EOL;
+            }
+
+            return null;
+        }
+
+        while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
+            // The token directly after the end of the regex can
+            // be modifiers like global and case insensitive
+            // (.e.g, /pattern/gi).
+            $next++;
+        }
+
+        $regexEnd = $next;
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
+        }
+
+        for ($next = ($next + 1); $next < $numChars; $next++) {
+            if ($chars[$next] !== ' ') {
+                break;
+            } else {
+                $possibleEolChar = substr($string, $next, strlen($eolChar));
+                if ($possibleEolChar === $eolChar) {
+                    // This is the last token on the line.
+                    break;
+                }
+            }
+        }
+
+        if (isset($afterTokens[$chars[$next]]) === false) {
+            if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
+            }
+
+            return null;
+        }
+
+        // This is a regular expression, so join all the tokens together.
+        $content = '';
+        for ($x = $char; $x <= $regexEnd; $x++) {
+            $content .= $chars[$x];
+        }
+
+        $token = array(
+                  'start'   => $char,
+                  'end'     => $regexEnd,
+                  'content' => $content,
+                 );
+
+        return $token;
+
+    }//end getRegexToken()
+
+
+    /**
+     * Performs additional processing after main tokenizing.
+     *
+     * This additional processing looks for properties, closures, labels and objects.
+     *
+     * @param array  $tokens  The array of tokens to process.
+     * @param string $eolChar The EOL character to use for splitting strings.
+     *
+     * @return void
+     */
+    public function processAdditional(&$tokens, $eolChar)
+    {
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
+        }
+
+        $numTokens  = count($tokens);
+        $classStack = array();
+
+        for ($i = 0; $i < $numTokens; $i++) {
+            if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                $type    = $tokens[$i]['type'];
+                $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
+
+                echo str_repeat("\t", count($classStack));
+                echo "\tProcess token $i: $type => $content".PHP_EOL;
+            }
+
+            // Looking for functions that are actually closures.
+            if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
+                for ($x = ($i + 1); $x < $numTokens; $x++) {
+                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
+                        break;
+                    }
+                }
+
+                if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
+                    $tokens[$i]['code'] = T_CLOSURE;
+                    $tokens[$i]['type'] = 'T_CLOSURE';
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $line = $tokens[$i]['line'];
+                        echo str_repeat("\t", count($classStack));
+                        echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
+                    }
+
+                    for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
+                        if (isset($tokens[$x]['conditions'][$i]) === false) {
+                            continue;
+                        }
+
+                        $tokens[$x]['conditions'][$i] = T_CLOSURE;
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            $type = $tokens[$x]['type'];
+                            echo str_repeat("\t", count($classStack));
+                            echo "\t\t* cleaned $x ($type) *".PHP_EOL;
+                        }
+                    }
+                }//end if
+
+                continue;
+            } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
+                && isset($tokens[$i]['scope_condition']) === false
+                && isset($tokens[$i]['bracket_closer']) === true
+            ) {
+                $classStack[] = $i;
+
+                $closer = $tokens[$i]['bracket_closer'];
+                $tokens[$i]['code']      = T_OBJECT;
+                $tokens[$i]['type']      = 'T_OBJECT';
+                $tokens[$closer]['code'] = T_CLOSE_OBJECT;
+                $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
+
+                if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                    echo str_repeat("\t", count($classStack));
+                    echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
+                    echo str_repeat("\t", count($classStack));
+                    echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
+                }
+
+                for ($x = ($i + 1); $x < $closer; $x++) {
+                    $tokens[$x]['conditions'][$i] = T_OBJECT;
+                    ksort($tokens[$x]['conditions'], SORT_NUMERIC);
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        $type = $tokens[$x]['type'];
+                        echo str_repeat("\t", count($classStack));
+                        echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
+                    }
+                }
+            } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
+                $opener = array_pop($classStack);
+            } else if ($tokens[$i]['code'] === T_COLON) {
+                // If it is a scope opener, it belongs to a
+                // DEFAULT or CASE statement.
+                if (isset($tokens[$i]['scope_condition']) === true) {
+                    continue;
+                }
+
+                // Make sure this is not part of an inline IF statement.
+                for ($x = ($i - 1); $x >= 0; $x--) {
+                    if ($tokens[$x]['code'] === T_INLINE_THEN) {
+                        $tokens[$i]['code'] = T_INLINE_ELSE;
+                        $tokens[$i]['type'] = 'T_INLINE_ELSE';
+
+                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                            echo str_repeat("\t", count($classStack));
+                            echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
+                        }
+
+                        continue(2);
+                    } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
+                        break;
+                    }
+                }
+
+                // The string to the left of the colon is either a property or label.
+                for ($label = ($i - 1); $label >= 0; $label--) {
+                    if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
+                        break;
+                    }
+                }
+
+                if ($tokens[$label]['code'] !== T_STRING
+                    && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
+                ) {
+                    continue;
+                }
+
+                if (empty($classStack) === false) {
+                    $tokens[$label]['code'] = T_PROPERTY;
+                    $tokens[$label]['type'] = 'T_PROPERTY';
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo str_repeat("\t", count($classStack));
+                        echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
+                    }
+                } else {
+                    $tokens[$label]['code'] = T_LABEL;
+                    $tokens[$label]['type'] = 'T_LABEL';
+
+                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
+                        echo str_repeat("\t", count($classStack));
+                        echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
+                    }
+                }//end if
+            }//end if
+        }//end for
+
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
+        }
+
+    }//end processAdditional()
+
+
+}//end class
author	Chris Cannam
date	Wed, 29 Nov 2017 16:09:58 +0000
parents
children