annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents af1871eacc83
children
rev   line source
Chris@17 1 <?php
Chris@17 2 /**
Chris@17 3 * Tokenizes JS code.
Chris@17 4 *
Chris@17 5 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@17 6 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@17 7 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@17 8 */
Chris@17 9
Chris@17 10 namespace PHP_CodeSniffer\Tokenizers;
Chris@17 11
Chris@17 12 use PHP_CodeSniffer\Util;
Chris@17 13 use PHP_CodeSniffer\Exceptions\TokenizerException;
Chris@17 14 use PHP_CodeSniffer\Config;
Chris@17 15
Chris@17 16 class JS extends Tokenizer
Chris@17 17 {
Chris@17 18
Chris@17 19
Chris@17 20 /**
Chris@17 21 * A list of tokens that are allowed to open a scope.
Chris@17 22 *
Chris@17 23 * This array also contains information about what kind of token the scope
Chris@17 24 * opener uses to open and close the scope, if the token strictly requires
Chris@17 25 * an opener, if the token can share a scope closer, and who it can be shared
Chris@17 26 * with. An example of a token that shares a scope closer is a CASE scope.
Chris@17 27 *
Chris@17 28 * @var array
Chris@17 29 */
Chris@17 30 public $scopeOpeners = [
Chris@17 31 T_IF => [
Chris@17 32 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 33 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 34 'strict' => false,
Chris@17 35 'shared' => false,
Chris@17 36 'with' => [],
Chris@17 37 ],
Chris@17 38 T_TRY => [
Chris@17 39 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 40 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 41 'strict' => true,
Chris@17 42 'shared' => false,
Chris@17 43 'with' => [],
Chris@17 44 ],
Chris@17 45 T_CATCH => [
Chris@17 46 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 47 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 48 'strict' => true,
Chris@17 49 'shared' => false,
Chris@17 50 'with' => [],
Chris@17 51 ],
Chris@17 52 T_ELSE => [
Chris@17 53 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 54 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 55 'strict' => false,
Chris@17 56 'shared' => false,
Chris@17 57 'with' => [],
Chris@17 58 ],
Chris@17 59 T_FOR => [
Chris@17 60 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 61 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 62 'strict' => false,
Chris@17 63 'shared' => false,
Chris@17 64 'with' => [],
Chris@17 65 ],
Chris@17 66 T_CLASS => [
Chris@17 67 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 68 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 69 'strict' => true,
Chris@17 70 'shared' => false,
Chris@17 71 'with' => [],
Chris@17 72 ],
Chris@17 73 T_FUNCTION => [
Chris@17 74 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 75 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 76 'strict' => false,
Chris@17 77 'shared' => false,
Chris@17 78 'with' => [],
Chris@17 79 ],
Chris@17 80 T_WHILE => [
Chris@17 81 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 82 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 83 'strict' => false,
Chris@17 84 'shared' => false,
Chris@17 85 'with' => [],
Chris@17 86 ],
Chris@17 87 T_DO => [
Chris@17 88 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 89 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 90 'strict' => true,
Chris@17 91 'shared' => false,
Chris@17 92 'with' => [],
Chris@17 93 ],
Chris@17 94 T_SWITCH => [
Chris@17 95 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17 96 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17 97 'strict' => true,
Chris@17 98 'shared' => false,
Chris@17 99 'with' => [],
Chris@17 100 ],
Chris@17 101 T_CASE => [
Chris@17 102 'start' => [T_COLON => T_COLON],
Chris@17 103 'end' => [
Chris@17 104 T_BREAK => T_BREAK,
Chris@17 105 T_RETURN => T_RETURN,
Chris@17 106 T_CONTINUE => T_CONTINUE,
Chris@17 107 T_THROW => T_THROW,
Chris@17 108 ],
Chris@17 109 'strict' => true,
Chris@17 110 'shared' => true,
Chris@17 111 'with' => [
Chris@17 112 T_DEFAULT => T_DEFAULT,
Chris@17 113 T_CASE => T_CASE,
Chris@17 114 T_SWITCH => T_SWITCH,
Chris@17 115 ],
Chris@17 116 ],
Chris@17 117 T_DEFAULT => [
Chris@17 118 'start' => [T_COLON => T_COLON],
Chris@17 119 'end' => [
Chris@17 120 T_BREAK => T_BREAK,
Chris@17 121 T_RETURN => T_RETURN,
Chris@17 122 T_CONTINUE => T_CONTINUE,
Chris@17 123 T_THROW => T_THROW,
Chris@17 124 ],
Chris@17 125 'strict' => true,
Chris@17 126 'shared' => true,
Chris@17 127 'with' => [
Chris@17 128 T_CASE => T_CASE,
Chris@17 129 T_SWITCH => T_SWITCH,
Chris@17 130 ],
Chris@17 131 ],
Chris@17 132 ];
Chris@17 133
Chris@17 134 /**
Chris@17 135 * A list of tokens that end the scope.
Chris@17 136 *
Chris@17 137 * This array is just a unique collection of the end tokens
Chris@17 138 * from the _scopeOpeners array. The data is duplicated here to
Chris@17 139 * save time during parsing of the file.
Chris@17 140 *
Chris@17 141 * @var array
Chris@17 142 */
Chris@17 143 public $endScopeTokens = [
Chris@17 144 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@17 145 T_BREAK => T_BREAK,
Chris@17 146 ];
Chris@17 147
Chris@17 148 /**
Chris@17 149 * A list of special JS tokens and their types.
Chris@17 150 *
Chris@17 151 * @var array
Chris@17 152 */
Chris@17 153 protected $tokenValues = [
Chris@17 154 'class' => 'T_CLASS',
Chris@17 155 'function' => 'T_FUNCTION',
Chris@17 156 'prototype' => 'T_PROTOTYPE',
Chris@17 157 'try' => 'T_TRY',
Chris@17 158 'catch' => 'T_CATCH',
Chris@17 159 'return' => 'T_RETURN',
Chris@17 160 'throw' => 'T_THROW',
Chris@17 161 'break' => 'T_BREAK',
Chris@17 162 'switch' => 'T_SWITCH',
Chris@17 163 'continue' => 'T_CONTINUE',
Chris@17 164 'if' => 'T_IF',
Chris@17 165 'else' => 'T_ELSE',
Chris@17 166 'do' => 'T_DO',
Chris@17 167 'while' => 'T_WHILE',
Chris@17 168 'for' => 'T_FOR',
Chris@17 169 'var' => 'T_VAR',
Chris@17 170 'case' => 'T_CASE',
Chris@17 171 'default' => 'T_DEFAULT',
Chris@17 172 'true' => 'T_TRUE',
Chris@17 173 'false' => 'T_FALSE',
Chris@17 174 'null' => 'T_NULL',
Chris@17 175 'this' => 'T_THIS',
Chris@17 176 'typeof' => 'T_TYPEOF',
Chris@17 177 '(' => 'T_OPEN_PARENTHESIS',
Chris@17 178 ')' => 'T_CLOSE_PARENTHESIS',
Chris@17 179 '{' => 'T_OPEN_CURLY_BRACKET',
Chris@17 180 '}' => 'T_CLOSE_CURLY_BRACKET',
Chris@17 181 '[' => 'T_OPEN_SQUARE_BRACKET',
Chris@17 182 ']' => 'T_CLOSE_SQUARE_BRACKET',
Chris@17 183 '?' => 'T_INLINE_THEN',
Chris@17 184 '.' => 'T_OBJECT_OPERATOR',
Chris@17 185 '+' => 'T_PLUS',
Chris@17 186 '-' => 'T_MINUS',
Chris@17 187 '*' => 'T_MULTIPLY',
Chris@17 188 '%' => 'T_MODULUS',
Chris@17 189 '/' => 'T_DIVIDE',
Chris@17 190 '^' => 'T_LOGICAL_XOR',
Chris@17 191 ',' => 'T_COMMA',
Chris@17 192 ';' => 'T_SEMICOLON',
Chris@17 193 ':' => 'T_COLON',
Chris@17 194 '<' => 'T_LESS_THAN',
Chris@17 195 '>' => 'T_GREATER_THAN',
Chris@17 196 '<<' => 'T_SL',
Chris@17 197 '>>' => 'T_SR',
Chris@17 198 '>>>' => 'T_ZSR',
Chris@17 199 '<<=' => 'T_SL_EQUAL',
Chris@17 200 '>>=' => 'T_SR_EQUAL',
Chris@17 201 '>>>=' => 'T_ZSR_EQUAL',
Chris@17 202 '<=' => 'T_IS_SMALLER_OR_EQUAL',
Chris@17 203 '>=' => 'T_IS_GREATER_OR_EQUAL',
Chris@17 204 '=>' => 'T_DOUBLE_ARROW',
Chris@17 205 '!' => 'T_BOOLEAN_NOT',
Chris@17 206 '||' => 'T_BOOLEAN_OR',
Chris@17 207 '&&' => 'T_BOOLEAN_AND',
Chris@17 208 '|' => 'T_BITWISE_OR',
Chris@17 209 '&' => 'T_BITWISE_AND',
Chris@17 210 '!=' => 'T_IS_NOT_EQUAL',
Chris@17 211 '!==' => 'T_IS_NOT_IDENTICAL',
Chris@17 212 '=' => 'T_EQUAL',
Chris@17 213 '==' => 'T_IS_EQUAL',
Chris@17 214 '===' => 'T_IS_IDENTICAL',
Chris@17 215 '-=' => 'T_MINUS_EQUAL',
Chris@17 216 '+=' => 'T_PLUS_EQUAL',
Chris@17 217 '*=' => 'T_MUL_EQUAL',
Chris@17 218 '/=' => 'T_DIV_EQUAL',
Chris@17 219 '%=' => 'T_MOD_EQUAL',
Chris@17 220 '++' => 'T_INC',
Chris@17 221 '--' => 'T_DEC',
Chris@17 222 '//' => 'T_COMMENT',
Chris@17 223 '/*' => 'T_COMMENT',
Chris@17 224 '/**' => 'T_DOC_COMMENT',
Chris@17 225 '*/' => 'T_COMMENT',
Chris@17 226 ];
Chris@17 227
Chris@17 228 /**
Chris@17 229 * A list string delimiters.
Chris@17 230 *
Chris@17 231 * @var array
Chris@17 232 */
Chris@17 233 protected $stringTokens = [
Chris@17 234 '\'' => '\'',
Chris@17 235 '"' => '"',
Chris@17 236 ];
Chris@17 237
Chris@17 238 /**
Chris@17 239 * A list tokens that start and end comments.
Chris@17 240 *
Chris@17 241 * @var array
Chris@17 242 */
Chris@17 243 protected $commentTokens = [
Chris@17 244 '//' => null,
Chris@17 245 '/*' => '*/',
Chris@17 246 '/**' => '*/',
Chris@17 247 ];
Chris@17 248
Chris@17 249
Chris@17 250 /**
Chris@17 251 * Initialise the tokenizer.
Chris@17 252 *
Chris@17 253 * Pre-checks the content to see if it looks minified.
Chris@17 254 *
Chris@17 255 * @param string $content The content to tokenize,
Chris@17 256 * @param \PHP_CodeSniffer\Config $config The config data for the run.
Chris@17 257 * @param string $eolChar The EOL char used in the content.
Chris@17 258 *
Chris@17 259 * @return void
Chris@18 260 * @throws \PHP_CodeSniffer\Exceptions\TokenizerException If the file appears to be minified.
Chris@17 261 */
Chris@17 262 public function __construct($content, Config $config, $eolChar='\n')
Chris@17 263 {
Chris@17 264 if ($this->isMinifiedContent($content, $eolChar) === true) {
Chris@17 265 throw new TokenizerException('File appears to be minified and cannot be processed');
Chris@17 266 }
Chris@17 267
Chris@18 268 parent::__construct($content, $config, $eolChar);
Chris@17 269
Chris@17 270 }//end __construct()
Chris@17 271
Chris@17 272
Chris@17 273 /**
Chris@17 274 * Creates an array of tokens when given some JS code.
Chris@17 275 *
Chris@17 276 * @param string $string The string to tokenize.
Chris@17 277 *
Chris@17 278 * @return array
Chris@17 279 */
Chris@17 280 public function tokenize($string)
Chris@17 281 {
Chris@17 282 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 283 echo "\t*** START JS TOKENIZING ***".PHP_EOL;
Chris@17 284 }
Chris@17 285
Chris@17 286 $maxTokenLength = 0;
Chris@17 287 foreach ($this->tokenValues as $token => $values) {
Chris@17 288 if (strlen($token) > $maxTokenLength) {
Chris@17 289 $maxTokenLength = strlen($token);
Chris@17 290 }
Chris@17 291 }
Chris@17 292
Chris@17 293 $tokens = [];
Chris@17 294 $inString = '';
Chris@17 295 $stringChar = null;
Chris@17 296 $inComment = '';
Chris@17 297 $buffer = '';
Chris@17 298 $preStringBuffer = '';
Chris@17 299 $cleanBuffer = false;
Chris@17 300
Chris@17 301 $commentTokenizer = new Comment();
Chris@17 302
Chris@17 303 $tokens[] = [
Chris@17 304 'code' => T_OPEN_TAG,
Chris@17 305 'type' => 'T_OPEN_TAG',
Chris@17 306 'content' => '',
Chris@17 307 ];
Chris@17 308
Chris@17 309 // Convert newlines to single characters for ease of
Chris@17 310 // processing. We will change them back later.
Chris@17 311 $string = str_replace($this->eolChar, "\n", $string);
Chris@17 312
Chris@17 313 $chars = str_split($string);
Chris@17 314 $numChars = count($chars);
Chris@17 315 for ($i = 0; $i < $numChars; $i++) {
Chris@17 316 $char = $chars[$i];
Chris@17 317
Chris@17 318 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 319 $content = Util\Common::prepareForOutput($char);
Chris@17 320 $bufferContent = Util\Common::prepareForOutput($buffer);
Chris@17 321
Chris@17 322 if ($inString !== '') {
Chris@17 323 echo "\t";
Chris@17 324 }
Chris@17 325
Chris@17 326 if ($inComment !== '') {
Chris@17 327 echo "\t";
Chris@17 328 }
Chris@17 329
Chris@17 330 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@17 331 }//end if
Chris@17 332
Chris@17 333 if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@17 334 // If the buffer only has whitespace and we are about to
Chris@17 335 // add a character, store the whitespace first.
Chris@17 336 if (trim($char) !== '' && trim($buffer) === '') {
Chris@17 337 $tokens[] = [
Chris@17 338 'code' => T_WHITESPACE,
Chris@17 339 'type' => 'T_WHITESPACE',
Chris@17 340 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17 341 ];
Chris@17 342
Chris@17 343 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 344 $content = Util\Common::prepareForOutput($buffer);
Chris@17 345 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17 346 }
Chris@17 347
Chris@17 348 $buffer = '';
Chris@17 349 }
Chris@17 350
Chris@17 351 // If the buffer is not whitespace and we are about to
Chris@17 352 // add a whitespace character, store the content first.
Chris@17 353 if ($inString === ''
Chris@17 354 && $inComment === ''
Chris@17 355 && trim($char) === ''
Chris@17 356 && trim($buffer) !== ''
Chris@17 357 ) {
Chris@17 358 $tokens[] = [
Chris@17 359 'code' => T_STRING,
Chris@17 360 'type' => 'T_STRING',
Chris@17 361 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17 362 ];
Chris@17 363
Chris@17 364 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 365 $content = Util\Common::prepareForOutput($buffer);
Chris@17 366 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17 367 }
Chris@17 368
Chris@17 369 $buffer = '';
Chris@17 370 }
Chris@17 371 }//end if
Chris@17 372
Chris@17 373 // Process strings.
Chris@17 374 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@17 375 if ($inString === $char) {
Chris@17 376 // This could be the end of the string, but make sure it
Chris@17 377 // is not escaped first.
Chris@17 378 $escapes = 0;
Chris@17 379 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17 380 if ($chars[$x] !== '\\') {
Chris@17 381 break;
Chris@17 382 }
Chris@17 383
Chris@17 384 $escapes++;
Chris@17 385 }
Chris@17 386
Chris@17 387 if ($escapes === 0 || ($escapes % 2) === 0) {
Chris@17 388 // There is an even number escape chars,
Chris@17 389 // so this is not escaped, it is the end of the string.
Chris@17 390 $tokens[] = [
Chris@17 391 'code' => T_CONSTANT_ENCAPSED_STRING,
Chris@17 392 'type' => 'T_CONSTANT_ENCAPSED_STRING',
Chris@17 393 'content' => str_replace("\n", $this->eolChar, $buffer).$char,
Chris@17 394 ];
Chris@17 395
Chris@17 396 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 397 echo "\t\t* found end of string *".PHP_EOL;
Chris@17 398 $content = Util\Common::prepareForOutput($buffer.$char);
Chris@17 399 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@17 400 }
Chris@17 401
Chris@17 402 $buffer = '';
Chris@17 403 $preStringBuffer = '';
Chris@17 404 $inString = '';
Chris@17 405 $stringChar = null;
Chris@17 406 continue;
Chris@17 407 }//end if
Chris@17 408 } else if ($inString === '') {
Chris@17 409 $inString = $char;
Chris@17 410 $stringChar = $i;
Chris@17 411 $preStringBuffer = $buffer;
Chris@17 412
Chris@17 413 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 414 echo "\t\t* looking for string closer *".PHP_EOL;
Chris@17 415 }
Chris@17 416 }//end if
Chris@17 417 }//end if
Chris@17 418
Chris@17 419 if ($inString !== '' && $char === "\n") {
Chris@17 420 // Unless this newline character is escaped, the string did not
Chris@17 421 // end before the end of the line, which means it probably
Chris@17 422 // wasn't a string at all (maybe a regex).
Chris@17 423 if ($chars[($i - 1)] !== '\\') {
Chris@17 424 $i = $stringChar;
Chris@17 425 $buffer = $preStringBuffer;
Chris@17 426 $preStringBuffer = '';
Chris@17 427 $inString = '';
Chris@17 428 $stringChar = null;
Chris@17 429 $char = $chars[$i];
Chris@17 430
Chris@17 431 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 432 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@17 433 }
Chris@17 434 }
Chris@17 435 }
Chris@17 436
Chris@17 437 $buffer .= $char;
Chris@17 438
Chris@17 439 // We don't look for special tokens inside strings,
Chris@17 440 // so if we are in a string, we can continue here now
Chris@17 441 // that the current char is in the buffer.
Chris@17 442 if ($inString !== '') {
Chris@17 443 continue;
Chris@17 444 }
Chris@17 445
Chris@17 446 // Special case for T_DIVIDE which can actually be
Chris@17 447 // the start of a regular expression.
Chris@17 448 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@18 449 $regex = $this->getRegexToken($i, $string, $chars, $tokens);
Chris@17 450 if ($regex !== null) {
Chris@17 451 $tokens[] = [
Chris@17 452 'code' => T_REGULAR_EXPRESSION,
Chris@17 453 'type' => 'T_REGULAR_EXPRESSION',
Chris@17 454 'content' => $regex['content'],
Chris@17 455 ];
Chris@17 456
Chris@17 457 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 458 $content = Util\Common::prepareForOutput($regex['content']);
Chris@17 459 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@17 460 }
Chris@17 461
Chris@17 462 $i = $regex['end'];
Chris@17 463 $buffer = '';
Chris@17 464 $cleanBuffer = false;
Chris@17 465 continue;
Chris@17 466 }//end if
Chris@17 467 }//end if
Chris@17 468
Chris@17 469 // Check for known tokens, but ignore tokens found that are not at
Chris@17 470 // the end of a string, like FOR and this.FORmat.
Chris@17 471 if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@17 472 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
Chris@17 473 || isset($chars[($i + 1)]) === false
Chris@17 474 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
Chris@17 475 ) {
Chris@17 476 $matchedToken = false;
Chris@17 477 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@17 478
Chris@17 479 if ($lookAheadLength > 0) {
Chris@17 480 // The buffer contains a token type, but we need
Chris@17 481 // to look ahead at the next chars to see if this is
Chris@17 482 // actually part of a larger token. For example,
Chris@17 483 // FOR and FOREACH.
Chris@17 484 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 485 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@17 486 }
Chris@17 487
Chris@17 488 $charBuffer = $buffer;
Chris@17 489 for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@17 490 if (isset($chars[($i + $x)]) === false) {
Chris@17 491 break;
Chris@17 492 }
Chris@17 493
Chris@17 494 $charBuffer .= $chars[($i + $x)];
Chris@17 495
Chris@17 496 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 497 $content = Util\Common::prepareForOutput($charBuffer);
Chris@17 498 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17 499 }
Chris@17 500
Chris@17 501 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17 502 // We've found something larger that matches
Chris@17 503 // so we can ignore this char. Except for 1 very specific
Chris@17 504 // case where a comment like /**/ needs to tokenize as
Chris@17 505 // T_COMMENT and not T_DOC_COMMENT.
Chris@17 506 $oldType = $this->tokenValues[strtolower($buffer)];
Chris@17 507 $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@17 508 if ($oldType === 'T_COMMENT'
Chris@17 509 && $newType === 'T_DOC_COMMENT'
Chris@17 510 && $chars[($i + $x + 1)] === '/'
Chris@17 511 ) {
Chris@17 512 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 513 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@17 514 }
Chris@17 515 } else {
Chris@17 516 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 517 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@17 518 }
Chris@17 519
Chris@17 520 $matchedToken = true;
Chris@17 521 break;
Chris@17 522 }
Chris@17 523 }//end if
Chris@17 524 }//end for
Chris@17 525 }//end if
Chris@17 526
Chris@17 527 if ($matchedToken === false) {
Chris@17 528 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@17 529 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17 530 }
Chris@17 531
Chris@17 532 $value = $this->tokenValues[strtolower($buffer)];
Chris@17 533
Chris@17 534 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@17 535 // The function keyword needs to be all lowercase or else
Chris@17 536 // it is just a function called "Function".
Chris@17 537 $value = 'T_STRING';
Chris@17 538 }
Chris@17 539
Chris@17 540 $tokens[] = [
Chris@17 541 'code' => constant($value),
Chris@17 542 'type' => $value,
Chris@17 543 'content' => $buffer,
Chris@17 544 ];
Chris@17 545
Chris@17 546 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 547 $content = Util\Common::prepareForOutput($buffer);
Chris@17 548 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17 549 }
Chris@17 550
Chris@17 551 $cleanBuffer = true;
Chris@17 552 }//end if
Chris@17 553 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@17 554 // No matter what token we end up using, we don't
Chris@17 555 // need the content in the buffer any more because we have
Chris@17 556 // found a valid token.
Chris@17 557 $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1);
Chris@17 558 if ($newContent !== '') {
Chris@17 559 $tokens[] = [
Chris@17 560 'code' => T_STRING,
Chris@17 561 'type' => 'T_STRING',
Chris@17 562 'content' => $newContent,
Chris@17 563 ];
Chris@17 564
Chris@17 565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 566 $content = Util\Common::prepareForOutput(substr($buffer, 0, -1));
Chris@17 567 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17 568 }
Chris@17 569 }
Chris@17 570
Chris@17 571 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 572 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@17 573 }
Chris@17 574
Chris@17 575 // The char is a token type, but we need to look ahead at the
Chris@17 576 // next chars to see if this is actually part of a larger token.
Chris@17 577 // For example, = and ===.
Chris@17 578 $charBuffer = $char;
Chris@17 579 $matchedToken = false;
Chris@17 580 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@17 581 if (isset($chars[($i + $x)]) === false) {
Chris@17 582 break;
Chris@17 583 }
Chris@17 584
Chris@17 585 $charBuffer .= $chars[($i + $x)];
Chris@17 586
Chris@17 587 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 588 $content = Util\Common::prepareForOutput($charBuffer);
Chris@17 589 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17 590 }
Chris@17 591
Chris@17 592 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17 593 // We've found something larger that matches
Chris@17 594 // so we can ignore this char.
Chris@17 595 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 596 $type = $this->tokenValues[strtolower($charBuffer)];
Chris@17 597 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@17 598 }
Chris@17 599
Chris@17 600 $matchedToken = true;
Chris@17 601 break;
Chris@17 602 }
Chris@17 603 }//end for
Chris@17 604
Chris@17 605 if ($matchedToken === false) {
Chris@17 606 $value = $this->tokenValues[strtolower($char)];
Chris@17 607 $tokens[] = [
Chris@17 608 'code' => constant($value),
Chris@17 609 'type' => $value,
Chris@17 610 'content' => $char,
Chris@17 611 ];
Chris@17 612
Chris@17 613 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 614 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17 615 $content = Util\Common::prepareForOutput($char);
Chris@17 616 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17 617 }
Chris@17 618
Chris@17 619 $cleanBuffer = true;
Chris@17 620 } else {
Chris@17 621 $buffer = $char;
Chris@17 622 }//end if
Chris@17 623 }//end if
Chris@17 624
Chris@17 625 // Keep track of content inside comments.
Chris@17 626 if ($inComment === ''
Chris@17 627 && array_key_exists($buffer, $this->commentTokens) === true
Chris@17 628 ) {
Chris@17 629 // This is not really a comment if the content
Chris@17 630 // looks like \// (i.e., it is escaped).
Chris@17 631 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@17 632 $lastToken = array_pop($tokens);
Chris@17 633 $lastContent = $lastToken['content'];
Chris@17 634 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 635 $value = $this->tokenValues[strtolower($lastContent)];
Chris@17 636 $content = Util\Common::prepareForOutput($lastContent);
Chris@17 637 echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@17 638 }
Chris@17 639
Chris@17 640 $lastChars = str_split($lastContent);
Chris@17 641 $lastNumChars = count($lastChars);
Chris@17 642 for ($x = 0; $x < $lastNumChars; $x++) {
Chris@17 643 $lastChar = $lastChars[$x];
Chris@17 644 $value = $this->tokenValues[strtolower($lastChar)];
Chris@17 645 $tokens[] = [
Chris@17 646 'code' => constant($value),
Chris@17 647 'type' => $value,
Chris@17 648 'content' => $lastChar,
Chris@17 649 ];
Chris@17 650
Chris@17 651 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 652 $content = Util\Common::prepareForOutput($lastChar);
Chris@17 653 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17 654 }
Chris@17 655 }
Chris@17 656 } else {
Chris@17 657 // We have started a comment.
Chris@17 658 $inComment = $buffer;
Chris@17 659
Chris@17 660 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 661 echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@17 662 }
Chris@17 663 }//end if
Chris@17 664 } else if ($inComment !== '') {
Chris@17 665 if ($this->commentTokens[$inComment] === null) {
Chris@17 666 // Comment ends at the next newline.
Chris@17 667 if (strpos($buffer, "\n") !== false) {
Chris@17 668 $inComment = '';
Chris@17 669 }
Chris@17 670 } else {
Chris@17 671 if ($this->commentTokens[$inComment] === $buffer) {
Chris@17 672 $inComment = '';
Chris@17 673 }
Chris@17 674 }
Chris@17 675
Chris@17 676 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 677 if ($inComment === '') {
Chris@17 678 echo "\t\t* found end of comment *".PHP_EOL;
Chris@17 679 }
Chris@17 680 }
Chris@17 681
Chris@17 682 if ($inComment === '' && $cleanBuffer === false) {
Chris@17 683 $tokens[] = [
Chris@17 684 'code' => T_STRING,
Chris@17 685 'type' => 'T_STRING',
Chris@17 686 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17 687 ];
Chris@17 688
Chris@17 689 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 690 $content = Util\Common::prepareForOutput($buffer);
Chris@17 691 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17 692 }
Chris@17 693
Chris@17 694 $buffer = '';
Chris@17 695 }
Chris@17 696 }//end if
Chris@17 697
Chris@17 698 if ($cleanBuffer === true) {
Chris@17 699 $buffer = '';
Chris@17 700 $cleanBuffer = false;
Chris@17 701 }
Chris@17 702 }//end for
Chris@17 703
Chris@17 704 if (empty($buffer) === false) {
Chris@17 705 if ($inString !== '') {
Chris@17 706 // The string did not end before the end of the file,
Chris@17 707 // which means there was probably a syntax error somewhere.
Chris@17 708 $tokens[] = [
Chris@17 709 'code' => T_STRING,
Chris@17 710 'type' => 'T_STRING',
Chris@17 711 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17 712 ];
Chris@17 713
Chris@17 714 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 715 $content = Util\Common::prepareForOutput($buffer);
Chris@17 716 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17 717 }
Chris@17 718 } else {
Chris@17 719 // Buffer contains whitespace from the end of the file.
Chris@17 720 $tokens[] = [
Chris@17 721 'code' => T_WHITESPACE,
Chris@17 722 'type' => 'T_WHITESPACE',
Chris@17 723 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17 724 ];
Chris@17 725
Chris@17 726 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 727 $content = Util\Common::prepareForOutput($buffer);
Chris@17 728 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17 729 }
Chris@17 730 }//end if
Chris@17 731 }//end if
Chris@17 732
Chris@17 733 $tokens[] = [
Chris@17 734 'code' => T_CLOSE_TAG,
Chris@17 735 'type' => 'T_CLOSE_TAG',
Chris@17 736 'content' => '',
Chris@17 737 ];
Chris@17 738
Chris@17 739 /*
Chris@17 740 Now that we have done some basic tokenizing, we need to
Chris@17 741 modify the tokens to join some together and split some apart
Chris@17 742 so they match what the PHP tokenizer does.
Chris@17 743 */
Chris@17 744
Chris@17 745 $finalTokens = [];
Chris@17 746 $newStackPtr = 0;
Chris@17 747 $numTokens = count($tokens);
Chris@17 748 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@17 749 $token = $tokens[$stackPtr];
Chris@17 750
Chris@17 751 /*
Chris@17 752 Look for comments and join the tokens together.
Chris@17 753 */
Chris@17 754
Chris@17 755 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
Chris@17 756 $newContent = '';
Chris@17 757 $tokenContent = $token['content'];
Chris@17 758
Chris@17 759 $endContent = null;
Chris@17 760 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@17 761 $endContent = $this->commentTokens[$tokenContent];
Chris@17 762 }
Chris@17 763
Chris@17 764 while ($tokenContent !== $endContent) {
Chris@17 765 if ($endContent === null
Chris@17 766 && strpos($tokenContent, $this->eolChar) !== false
Chris@17 767 ) {
Chris@17 768 // A null end token means the comment ends at the end of
Chris@17 769 // the line so we look for newlines and split the token.
Chris@17 770 $tokens[$stackPtr]['content'] = substr(
Chris@17 771 $tokenContent,
Chris@17 772 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17 773 );
Chris@17 774
Chris@17 775 $tokenContent = substr(
Chris@17 776 $tokenContent,
Chris@17 777 0,
Chris@17 778 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17 779 );
Chris@17 780
Chris@17 781 // If the substr failed, skip the token as the content
Chris@17 782 // will now be blank.
Chris@17 783 if ($tokens[$stackPtr]['content'] !== false
Chris@17 784 && $tokens[$stackPtr]['content'] !== ''
Chris@17 785 ) {
Chris@17 786 $stackPtr--;
Chris@17 787 }
Chris@17 788
Chris@17 789 break;
Chris@17 790 }//end if
Chris@17 791
Chris@17 792 $stackPtr++;
Chris@17 793 $newContent .= $tokenContent;
Chris@17 794 if (isset($tokens[$stackPtr]) === false) {
Chris@17 795 break;
Chris@17 796 }
Chris@17 797
Chris@17 798 $tokenContent = $tokens[$stackPtr]['content'];
Chris@17 799 }//end while
Chris@17 800
Chris@17 801 if ($token['code'] === T_DOC_COMMENT) {
Chris@17 802 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr);
Chris@17 803 foreach ($commentTokens as $commentToken) {
Chris@17 804 $finalTokens[$newStackPtr] = $commentToken;
Chris@17 805 $newStackPtr++;
Chris@17 806 }
Chris@17 807
Chris@17 808 continue;
Chris@17 809 } else {
Chris@17 810 // Save the new content in the current token so
Chris@17 811 // the code below can chop it up on newlines.
Chris@17 812 $token['content'] = $newContent.$tokenContent;
Chris@17 813 }
Chris@17 814 }//end if
Chris@17 815
Chris@17 816 /*
Chris@17 817 If this token has newlines in its content, split each line up
Chris@17 818 and create a new token for each line. We do this so it's easier
Chris@17 819 to ascertain where errors occur on a line.
Chris@17 820 Note that $token[1] is the token's content.
Chris@17 821 */
Chris@17 822
Chris@17 823 if (strpos($token['content'], $this->eolChar) !== false) {
Chris@17 824 $tokenLines = explode($this->eolChar, $token['content']);
Chris@17 825 $numLines = count($tokenLines);
Chris@17 826
Chris@17 827 for ($i = 0; $i < $numLines; $i++) {
Chris@18 828 $newToken = ['content' => $tokenLines[$i]];
Chris@17 829 if ($i === ($numLines - 1)) {
Chris@17 830 if ($tokenLines[$i] === '') {
Chris@17 831 break;
Chris@17 832 }
Chris@17 833 } else {
Chris@17 834 $newToken['content'] .= $this->eolChar;
Chris@17 835 }
Chris@17 836
Chris@17 837 $newToken['type'] = $token['type'];
Chris@17 838 $newToken['code'] = $token['code'];
Chris@17 839 $finalTokens[$newStackPtr] = $newToken;
Chris@17 840 $newStackPtr++;
Chris@17 841 }
Chris@17 842 } else {
Chris@17 843 $finalTokens[$newStackPtr] = $token;
Chris@17 844 $newStackPtr++;
Chris@17 845 }//end if
Chris@17 846
Chris@17 847 // Convert numbers, including decimals.
Chris@17 848 if ($token['code'] === T_STRING
Chris@17 849 || $token['code'] === T_OBJECT_OPERATOR
Chris@17 850 ) {
Chris@17 851 $newContent = '';
Chris@17 852 $oldStackPtr = $stackPtr;
Chris@17 853 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
Chris@17 854 $newContent .= $tokens[$stackPtr]['content'];
Chris@17 855 $stackPtr++;
Chris@17 856 }
Chris@17 857
Chris@17 858 if ($newContent !== '' && $newContent !== '.') {
Chris@17 859 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@17 860 if (ctype_digit($newContent) === true) {
Chris@17 861 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@17 862 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@17 863 } else {
Chris@17 864 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@17 865 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@17 866 }
Chris@17 867
Chris@17 868 $stackPtr--;
Chris@17 869 continue;
Chris@17 870 } else {
Chris@17 871 $stackPtr = $oldStackPtr;
Chris@17 872 }
Chris@17 873 }//end if
Chris@17 874
Chris@17 875 // Convert the token after an object operator into a string, in most cases.
Chris@17 876 if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@17 877 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@17 878 if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@17 879 continue;
Chris@17 880 }
Chris@17 881
Chris@17 882 if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@17 883 && $tokens[$i]['code'] !== T_LNUMBER
Chris@17 884 && $tokens[$i]['code'] !== T_DNUMBER
Chris@17 885 ) {
Chris@17 886 $tokens[$i]['code'] = T_STRING;
Chris@17 887 $tokens[$i]['type'] = 'T_STRING';
Chris@17 888 }
Chris@17 889
Chris@17 890 break;
Chris@17 891 }
Chris@17 892 }
Chris@17 893 }//end for
Chris@17 894
Chris@17 895 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 896 echo "\t*** END TOKENIZING ***".PHP_EOL;
Chris@17 897 }
Chris@17 898
Chris@17 899 return $finalTokens;
Chris@17 900
Chris@17 901 }//end tokenize()
Chris@17 902
Chris@17 903
Chris@17 904 /**
Chris@17 905 * Tokenizes a regular expression if one is found.
Chris@17 906 *
Chris@17 907 * If a regular expression is not found, NULL is returned.
Chris@17 908 *
Chris@17 909 * @param string $char The index of the possible regex start character.
Chris@17 910 * @param string $string The complete content of the string being tokenized.
Chris@17 911 * @param string $chars An array of characters being tokenized.
Chris@17 912 * @param string $tokens The current array of tokens found in the string.
Chris@17 913 *
Chris@18 914 * @return array<string, string>|null
Chris@17 915 */
Chris@17 916 public function getRegexToken($char, $string, $chars, $tokens)
Chris@17 917 {
Chris@17 918 $beforeTokens = [
Chris@17 919 T_EQUAL => true,
Chris@17 920 T_IS_NOT_EQUAL => true,
Chris@17 921 T_IS_IDENTICAL => true,
Chris@17 922 T_IS_NOT_IDENTICAL => true,
Chris@17 923 T_OPEN_PARENTHESIS => true,
Chris@17 924 T_OPEN_SQUARE_BRACKET => true,
Chris@17 925 T_RETURN => true,
Chris@17 926 T_BOOLEAN_OR => true,
Chris@17 927 T_BOOLEAN_AND => true,
Chris@17 928 T_BOOLEAN_NOT => true,
Chris@17 929 T_BITWISE_OR => true,
Chris@17 930 T_BITWISE_AND => true,
Chris@17 931 T_COMMA => true,
Chris@17 932 T_COLON => true,
Chris@17 933 T_TYPEOF => true,
Chris@17 934 T_INLINE_THEN => true,
Chris@17 935 T_INLINE_ELSE => true,
Chris@17 936 ];
Chris@17 937
Chris@17 938 $afterTokens = [
Chris@17 939 ',' => true,
Chris@17 940 ')' => true,
Chris@17 941 ']' => true,
Chris@17 942 ';' => true,
Chris@17 943 ' ' => true,
Chris@17 944 '.' => true,
Chris@17 945 ':' => true,
Chris@17 946 $this->eolChar => true,
Chris@17 947 ];
Chris@17 948
Chris@17 949 // Find the last non-whitespace token that was added
Chris@17 950 // to the tokens array.
Chris@17 951 $numTokens = count($tokens);
Chris@17 952 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@17 953 if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@17 954 break;
Chris@17 955 }
Chris@17 956 }
Chris@17 957
Chris@17 958 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@17 959 return null;
Chris@17 960 }
Chris@17 961
Chris@17 962 // This is probably a regular expression, so look for the end of it.
Chris@17 963 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 964 echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@17 965 }
Chris@17 966
Chris@17 967 $numChars = count($chars);
Chris@17 968 for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@17 969 if ($chars[$next] === '/') {
Chris@17 970 // Just make sure this is not escaped first.
Chris@17 971 if ($chars[($next - 1)] !== '\\') {
Chris@17 972 // In the simple form: /.../ so we found the end.
Chris@17 973 break;
Chris@17 974 } else if ($chars[($next - 2)] === '\\') {
Chris@17 975 // In the form: /...\\/ so we found the end.
Chris@17 976 break;
Chris@17 977 }
Chris@17 978 } else {
Chris@17 979 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17 980 if ($possibleEolChar === $this->eolChar) {
Chris@17 981 // This is the last token on the line and regular
Chris@17 982 // expressions need to be defined on a single line,
Chris@17 983 // so this is not a regular expression.
Chris@17 984 break;
Chris@17 985 }
Chris@17 986 }
Chris@17 987 }
Chris@17 988
Chris@17 989 if ($chars[$next] !== '/') {
Chris@17 990 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 991 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@17 992 }
Chris@17 993
Chris@17 994 return null;
Chris@17 995 }
Chris@17 996
Chris@17 997 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
Chris@17 998 // The token directly after the end of the regex can
Chris@17 999 // be modifiers like global and case insensitive
Chris@17 1000 // (.e.g, /pattern/gi).
Chris@17 1001 $next++;
Chris@17 1002 }
Chris@17 1003
Chris@17 1004 $regexEnd = $next;
Chris@17 1005 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1006 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@17 1007 }
Chris@17 1008
Chris@17 1009 for ($next += 1; $next < $numChars; $next++) {
Chris@17 1010 if ($chars[$next] !== ' ') {
Chris@17 1011 break;
Chris@17 1012 } else {
Chris@17 1013 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17 1014 if ($possibleEolChar === $this->eolChar) {
Chris@17 1015 // This is the last token on the line.
Chris@17 1016 break;
Chris@17 1017 }
Chris@17 1018 }
Chris@17 1019 }
Chris@17 1020
Chris@17 1021 if (isset($afterTokens[$chars[$next]]) === false) {
Chris@17 1022 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1023 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@17 1024 }
Chris@17 1025
Chris@17 1026 return null;
Chris@17 1027 }
Chris@17 1028
Chris@17 1029 // This is a regular expression, so join all the tokens together.
Chris@17 1030 $content = '';
Chris@17 1031 for ($x = $char; $x <= $regexEnd; $x++) {
Chris@17 1032 $content .= $chars[$x];
Chris@17 1033 }
Chris@17 1034
Chris@17 1035 $token = [
Chris@17 1036 'start' => $char,
Chris@17 1037 'end' => $regexEnd,
Chris@17 1038 'content' => $content,
Chris@17 1039 ];
Chris@17 1040
Chris@17 1041 return $token;
Chris@17 1042
Chris@17 1043 }//end getRegexToken()
Chris@17 1044
Chris@17 1045
Chris@17 1046 /**
Chris@17 1047 * Performs additional processing after main tokenizing.
Chris@17 1048 *
Chris@17 1049 * This additional processing looks for properties, closures, labels and objects.
Chris@17 1050 *
Chris@17 1051 * @return void
Chris@17 1052 */
Chris@17 1053 public function processAdditional()
Chris@17 1054 {
Chris@17 1055 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1056 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@17 1057 }
Chris@17 1058
Chris@17 1059 $numTokens = count($this->tokens);
Chris@17 1060 $classStack = [];
Chris@17 1061
Chris@17 1062 for ($i = 0; $i < $numTokens; $i++) {
Chris@17 1063 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1064 $type = $this->tokens[$i]['type'];
Chris@17 1065 $content = Util\Common::prepareForOutput($this->tokens[$i]['content']);
Chris@17 1066
Chris@17 1067 echo str_repeat("\t", count($classStack));
Chris@17 1068 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@17 1069 }
Chris@17 1070
Chris@17 1071 // Looking for functions that are actually closures.
Chris@17 1072 if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) {
Chris@17 1073 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@17 1074 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) {
Chris@17 1075 break;
Chris@17 1076 }
Chris@17 1077 }
Chris@17 1078
Chris@17 1079 if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@17 1080 $this->tokens[$i]['code'] = T_CLOSURE;
Chris@17 1081 $this->tokens[$i]['type'] = 'T_CLOSURE';
Chris@17 1082 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1083 $line = $this->tokens[$i]['line'];
Chris@17 1084 echo str_repeat("\t", count($classStack));
Chris@17 1085 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL;
Chris@17 1086 }
Chris@17 1087
Chris@17 1088 for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
Chris@17 1089 if (isset($this->tokens[$x]['conditions'][$i]) === false) {
Chris@17 1090 continue;
Chris@17 1091 }
Chris@17 1092
Chris@17 1093 $this->tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@17 1094 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1095 $type = $this->tokens[$x]['type'];
Chris@17 1096 echo str_repeat("\t", count($classStack));
Chris@17 1097 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@17 1098 }
Chris@17 1099 }
Chris@17 1100 }//end if
Chris@17 1101
Chris@17 1102 continue;
Chris@17 1103 } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@17 1104 && isset($this->tokens[$i]['scope_condition']) === false
Chris@17 1105 && isset($this->tokens[$i]['bracket_closer']) === true
Chris@17 1106 ) {
Chris@18 1107 $condition = $this->tokens[$i]['conditions'];
Chris@18 1108 $condition = end($condition);
Chris@17 1109 if ($condition === T_CLASS) {
Chris@17 1110 // Possibly an ES6 method. To be classified as one, the previous
Chris@17 1111 // non-empty tokens need to be a set of parenthesis, and then a string
Chris@17 1112 // (the method name).
Chris@17 1113 for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) {
Chris@17 1114 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) {
Chris@17 1115 break;
Chris@17 1116 }
Chris@17 1117 }
Chris@17 1118
Chris@17 1119 if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) {
Chris@17 1120 $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener'];
Chris@17 1121 for ($name = ($parenOpener - 1); $name > 0; $name--) {
Chris@17 1122 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) {
Chris@17 1123 break;
Chris@17 1124 }
Chris@17 1125 }
Chris@17 1126
Chris@17 1127 if ($this->tokens[$name]['code'] === T_STRING) {
Chris@17 1128 // We found a method name.
Chris@17 1129 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1130 $line = $this->tokens[$name]['line'];
Chris@17 1131 echo str_repeat("\t", count($classStack));
Chris@17 1132 echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL;
Chris@17 1133 }
Chris@17 1134
Chris@17 1135 $closer = $this->tokens[$i]['bracket_closer'];
Chris@17 1136
Chris@17 1137 $this->tokens[$name]['code'] = T_FUNCTION;
Chris@17 1138 $this->tokens[$name]['type'] = 'T_FUNCTION';
Chris@17 1139
Chris@17 1140 foreach ([$name, $i, $closer] as $token) {
Chris@17 1141 $this->tokens[$token]['scope_condition'] = $name;
Chris@17 1142 $this->tokens[$token]['scope_opener'] = $i;
Chris@17 1143 $this->tokens[$token]['scope_closer'] = $closer;
Chris@17 1144 $this->tokens[$token]['parenthesis_opener'] = $parenOpener;
Chris@17 1145 $this->tokens[$token]['parenthesis_closer'] = $parenCloser;
Chris@17 1146 $this->tokens[$token]['parenthesis_owner'] = $name;
Chris@17 1147 }
Chris@17 1148
Chris@17 1149 $this->tokens[$parenOpener]['parenthesis_owner'] = $name;
Chris@17 1150 $this->tokens[$parenCloser]['parenthesis_owner'] = $name;
Chris@17 1151
Chris@17 1152 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17 1153 $this->tokens[$x]['conditions'][$name] = T_FUNCTION;
Chris@17 1154 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17 1155 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1156 $type = $this->tokens[$x]['type'];
Chris@17 1157 echo str_repeat("\t", count($classStack));
Chris@17 1158 echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL;
Chris@17 1159 }
Chris@17 1160 }
Chris@17 1161
Chris@17 1162 continue;
Chris@17 1163 }//end if
Chris@17 1164 }//end if
Chris@17 1165 }//end if
Chris@17 1166
Chris@17 1167 $classStack[] = $i;
Chris@17 1168
Chris@17 1169 $closer = $this->tokens[$i]['bracket_closer'];
Chris@17 1170 $this->tokens[$i]['code'] = T_OBJECT;
Chris@17 1171 $this->tokens[$i]['type'] = 'T_OBJECT';
Chris@17 1172 $this->tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@17 1173 $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@17 1174
Chris@17 1175 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1176 echo str_repeat("\t", count($classStack));
Chris@17 1177 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@17 1178 echo str_repeat("\t", count($classStack));
Chris@17 1179 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@17 1180 }
Chris@17 1181
Chris@17 1182 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17 1183 $this->tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@17 1184 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17 1185 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1186 $type = $this->tokens[$x]['type'];
Chris@17 1187 echo str_repeat("\t", count($classStack));
Chris@17 1188 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@17 1189 }
Chris@17 1190 }
Chris@17 1191 } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@17 1192 $opener = array_pop($classStack);
Chris@17 1193 } else if ($this->tokens[$i]['code'] === T_COLON) {
Chris@17 1194 // If it is a scope opener, it belongs to a
Chris@17 1195 // DEFAULT or CASE statement.
Chris@17 1196 if (isset($this->tokens[$i]['scope_condition']) === true) {
Chris@17 1197 continue;
Chris@17 1198 }
Chris@17 1199
Chris@17 1200 // Make sure this is not part of an inline IF statement.
Chris@17 1201 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17 1202 if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
Chris@17 1203 $this->tokens[$i]['code'] = T_INLINE_ELSE;
Chris@17 1204 $this->tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@17 1205
Chris@17 1206 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1207 echo str_repeat("\t", count($classStack));
Chris@17 1208 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@17 1209 }
Chris@17 1210
Chris@17 1211 continue(2);
Chris@17 1212 } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) {
Chris@17 1213 break;
Chris@17 1214 }
Chris@17 1215 }
Chris@17 1216
Chris@17 1217 // The string to the left of the colon is either a property or label.
Chris@17 1218 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@17 1219 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) {
Chris@17 1220 break;
Chris@17 1221 }
Chris@17 1222 }
Chris@17 1223
Chris@17 1224 if ($this->tokens[$label]['code'] !== T_STRING
Chris@17 1225 && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@17 1226 ) {
Chris@17 1227 continue;
Chris@17 1228 }
Chris@17 1229
Chris@17 1230 if (empty($classStack) === false) {
Chris@17 1231 $this->tokens[$label]['code'] = T_PROPERTY;
Chris@17 1232 $this->tokens[$label]['type'] = 'T_PROPERTY';
Chris@17 1233
Chris@17 1234 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1235 echo str_repeat("\t", count($classStack));
Chris@17 1236 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@17 1237 }
Chris@17 1238 } else {
Chris@17 1239 $this->tokens[$label]['code'] = T_LABEL;
Chris@17 1240 $this->tokens[$label]['type'] = 'T_LABEL';
Chris@17 1241
Chris@17 1242 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1243 echo str_repeat("\t", count($classStack));
Chris@17 1244 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@17 1245 }
Chris@17 1246 }//end if
Chris@17 1247 }//end if
Chris@17 1248 }//end for
Chris@17 1249
Chris@17 1250 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17 1251 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@17 1252 }
Chris@17 1253
Chris@17 1254 }//end processAdditional()
Chris@17 1255
Chris@17 1256
Chris@17 1257 }//end class