annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 5:12f9dff5fda9 tip

Update to Drupal core 8.7.1
author Chris Cannam
date Thu, 09 May 2019 15:34:47 +0100
parents a9cd425dd02b
children
rev   line source
Chris@4 1 <?php
Chris@4 2 /**
Chris@4 3 * Tokenizes JS code.
Chris@4 4 *
Chris@4 5 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@4 6 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@4 7 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@4 8 */
Chris@4 9
Chris@4 10 namespace PHP_CodeSniffer\Tokenizers;
Chris@4 11
Chris@4 12 use PHP_CodeSniffer\Util;
Chris@4 13 use PHP_CodeSniffer\Exceptions\TokenizerException;
Chris@4 14 use PHP_CodeSniffer\Config;
Chris@4 15
Chris@4 16 class JS extends Tokenizer
Chris@4 17 {
Chris@4 18
Chris@4 19
Chris@4 20 /**
Chris@4 21 * A list of tokens that are allowed to open a scope.
Chris@4 22 *
Chris@4 23 * This array also contains information about what kind of token the scope
Chris@4 24 * opener uses to open and close the scope, if the token strictly requires
Chris@4 25 * an opener, if the token can share a scope closer, and who it can be shared
Chris@4 26 * with. An example of a token that shares a scope closer is a CASE scope.
Chris@4 27 *
Chris@4 28 * @var array
Chris@4 29 */
Chris@4 30 public $scopeOpeners = [
Chris@4 31 T_IF => [
Chris@4 32 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 33 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 34 'strict' => false,
Chris@4 35 'shared' => false,
Chris@4 36 'with' => [],
Chris@4 37 ],
Chris@4 38 T_TRY => [
Chris@4 39 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 40 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 41 'strict' => true,
Chris@4 42 'shared' => false,
Chris@4 43 'with' => [],
Chris@4 44 ],
Chris@4 45 T_CATCH => [
Chris@4 46 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 47 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 48 'strict' => true,
Chris@4 49 'shared' => false,
Chris@4 50 'with' => [],
Chris@4 51 ],
Chris@4 52 T_ELSE => [
Chris@4 53 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 54 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 55 'strict' => false,
Chris@4 56 'shared' => false,
Chris@4 57 'with' => [],
Chris@4 58 ],
Chris@4 59 T_FOR => [
Chris@4 60 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 61 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 62 'strict' => false,
Chris@4 63 'shared' => false,
Chris@4 64 'with' => [],
Chris@4 65 ],
Chris@4 66 T_CLASS => [
Chris@4 67 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 68 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 69 'strict' => true,
Chris@4 70 'shared' => false,
Chris@4 71 'with' => [],
Chris@4 72 ],
Chris@4 73 T_FUNCTION => [
Chris@4 74 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 75 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 76 'strict' => false,
Chris@4 77 'shared' => false,
Chris@4 78 'with' => [],
Chris@4 79 ],
Chris@4 80 T_WHILE => [
Chris@4 81 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 82 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 83 'strict' => false,
Chris@4 84 'shared' => false,
Chris@4 85 'with' => [],
Chris@4 86 ],
Chris@4 87 T_DO => [
Chris@4 88 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 89 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 90 'strict' => true,
Chris@4 91 'shared' => false,
Chris@4 92 'with' => [],
Chris@4 93 ],
Chris@4 94 T_SWITCH => [
Chris@4 95 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4 96 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4 97 'strict' => true,
Chris@4 98 'shared' => false,
Chris@4 99 'with' => [],
Chris@4 100 ],
Chris@4 101 T_CASE => [
Chris@4 102 'start' => [T_COLON => T_COLON],
Chris@4 103 'end' => [
Chris@4 104 T_BREAK => T_BREAK,
Chris@4 105 T_RETURN => T_RETURN,
Chris@4 106 T_CONTINUE => T_CONTINUE,
Chris@4 107 T_THROW => T_THROW,
Chris@4 108 ],
Chris@4 109 'strict' => true,
Chris@4 110 'shared' => true,
Chris@4 111 'with' => [
Chris@4 112 T_DEFAULT => T_DEFAULT,
Chris@4 113 T_CASE => T_CASE,
Chris@4 114 T_SWITCH => T_SWITCH,
Chris@4 115 ],
Chris@4 116 ],
Chris@4 117 T_DEFAULT => [
Chris@4 118 'start' => [T_COLON => T_COLON],
Chris@4 119 'end' => [
Chris@4 120 T_BREAK => T_BREAK,
Chris@4 121 T_RETURN => T_RETURN,
Chris@4 122 T_CONTINUE => T_CONTINUE,
Chris@4 123 T_THROW => T_THROW,
Chris@4 124 ],
Chris@4 125 'strict' => true,
Chris@4 126 'shared' => true,
Chris@4 127 'with' => [
Chris@4 128 T_CASE => T_CASE,
Chris@4 129 T_SWITCH => T_SWITCH,
Chris@4 130 ],
Chris@4 131 ],
Chris@4 132 ];
Chris@4 133
Chris@4 134 /**
Chris@4 135 * A list of tokens that end the scope.
Chris@4 136 *
Chris@4 137 * This array is just a unique collection of the end tokens
Chris@4 138 * from the _scopeOpeners array. The data is duplicated here to
Chris@4 139 * save time during parsing of the file.
Chris@4 140 *
Chris@4 141 * @var array
Chris@4 142 */
Chris@4 143 public $endScopeTokens = [
Chris@4 144 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@4 145 T_BREAK => T_BREAK,
Chris@4 146 ];
Chris@4 147
Chris@4 148 /**
Chris@4 149 * A list of special JS tokens and their types.
Chris@4 150 *
Chris@4 151 * @var array
Chris@4 152 */
Chris@4 153 protected $tokenValues = [
Chris@4 154 'class' => 'T_CLASS',
Chris@4 155 'function' => 'T_FUNCTION',
Chris@4 156 'prototype' => 'T_PROTOTYPE',
Chris@4 157 'try' => 'T_TRY',
Chris@4 158 'catch' => 'T_CATCH',
Chris@4 159 'return' => 'T_RETURN',
Chris@4 160 'throw' => 'T_THROW',
Chris@4 161 'break' => 'T_BREAK',
Chris@4 162 'switch' => 'T_SWITCH',
Chris@4 163 'continue' => 'T_CONTINUE',
Chris@4 164 'if' => 'T_IF',
Chris@4 165 'else' => 'T_ELSE',
Chris@4 166 'do' => 'T_DO',
Chris@4 167 'while' => 'T_WHILE',
Chris@4 168 'for' => 'T_FOR',
Chris@4 169 'var' => 'T_VAR',
Chris@4 170 'case' => 'T_CASE',
Chris@4 171 'default' => 'T_DEFAULT',
Chris@4 172 'true' => 'T_TRUE',
Chris@4 173 'false' => 'T_FALSE',
Chris@4 174 'null' => 'T_NULL',
Chris@4 175 'this' => 'T_THIS',
Chris@4 176 'typeof' => 'T_TYPEOF',
Chris@4 177 '(' => 'T_OPEN_PARENTHESIS',
Chris@4 178 ')' => 'T_CLOSE_PARENTHESIS',
Chris@4 179 '{' => 'T_OPEN_CURLY_BRACKET',
Chris@4 180 '}' => 'T_CLOSE_CURLY_BRACKET',
Chris@4 181 '[' => 'T_OPEN_SQUARE_BRACKET',
Chris@4 182 ']' => 'T_CLOSE_SQUARE_BRACKET',
Chris@4 183 '?' => 'T_INLINE_THEN',
Chris@4 184 '.' => 'T_OBJECT_OPERATOR',
Chris@4 185 '+' => 'T_PLUS',
Chris@4 186 '-' => 'T_MINUS',
Chris@4 187 '*' => 'T_MULTIPLY',
Chris@4 188 '%' => 'T_MODULUS',
Chris@4 189 '/' => 'T_DIVIDE',
Chris@4 190 '^' => 'T_LOGICAL_XOR',
Chris@4 191 ',' => 'T_COMMA',
Chris@4 192 ';' => 'T_SEMICOLON',
Chris@4 193 ':' => 'T_COLON',
Chris@4 194 '<' => 'T_LESS_THAN',
Chris@4 195 '>' => 'T_GREATER_THAN',
Chris@4 196 '<<' => 'T_SL',
Chris@4 197 '>>' => 'T_SR',
Chris@4 198 '>>>' => 'T_ZSR',
Chris@4 199 '<<=' => 'T_SL_EQUAL',
Chris@4 200 '>>=' => 'T_SR_EQUAL',
Chris@4 201 '>>>=' => 'T_ZSR_EQUAL',
Chris@4 202 '<=' => 'T_IS_SMALLER_OR_EQUAL',
Chris@4 203 '>=' => 'T_IS_GREATER_OR_EQUAL',
Chris@4 204 '=>' => 'T_DOUBLE_ARROW',
Chris@4 205 '!' => 'T_BOOLEAN_NOT',
Chris@4 206 '||' => 'T_BOOLEAN_OR',
Chris@4 207 '&&' => 'T_BOOLEAN_AND',
Chris@4 208 '|' => 'T_BITWISE_OR',
Chris@4 209 '&' => 'T_BITWISE_AND',
Chris@4 210 '!=' => 'T_IS_NOT_EQUAL',
Chris@4 211 '!==' => 'T_IS_NOT_IDENTICAL',
Chris@4 212 '=' => 'T_EQUAL',
Chris@4 213 '==' => 'T_IS_EQUAL',
Chris@4 214 '===' => 'T_IS_IDENTICAL',
Chris@4 215 '-=' => 'T_MINUS_EQUAL',
Chris@4 216 '+=' => 'T_PLUS_EQUAL',
Chris@4 217 '*=' => 'T_MUL_EQUAL',
Chris@4 218 '/=' => 'T_DIV_EQUAL',
Chris@4 219 '%=' => 'T_MOD_EQUAL',
Chris@4 220 '++' => 'T_INC',
Chris@4 221 '--' => 'T_DEC',
Chris@4 222 '//' => 'T_COMMENT',
Chris@4 223 '/*' => 'T_COMMENT',
Chris@4 224 '/**' => 'T_DOC_COMMENT',
Chris@4 225 '*/' => 'T_COMMENT',
Chris@4 226 ];
Chris@4 227
Chris@4 228 /**
Chris@4 229 * A list string delimiters.
Chris@4 230 *
Chris@4 231 * @var array
Chris@4 232 */
Chris@4 233 protected $stringTokens = [
Chris@4 234 '\'' => '\'',
Chris@4 235 '"' => '"',
Chris@4 236 ];
Chris@4 237
Chris@4 238 /**
Chris@4 239 * A list tokens that start and end comments.
Chris@4 240 *
Chris@4 241 * @var array
Chris@4 242 */
Chris@4 243 protected $commentTokens = [
Chris@4 244 '//' => null,
Chris@4 245 '/*' => '*/',
Chris@4 246 '/**' => '*/',
Chris@4 247 ];
Chris@4 248
Chris@4 249
Chris@4 250 /**
Chris@4 251 * Initialise the tokenizer.
Chris@4 252 *
Chris@4 253 * Pre-checks the content to see if it looks minified.
Chris@4 254 *
Chris@4 255 * @param string $content The content to tokenize,
Chris@4 256 * @param \PHP_CodeSniffer\Config $config The config data for the run.
Chris@4 257 * @param string $eolChar The EOL char used in the content.
Chris@4 258 *
Chris@4 259 * @return void
Chris@5 260 * @throws \PHP_CodeSniffer\Exceptions\TokenizerException If the file appears to be minified.
Chris@4 261 */
Chris@4 262 public function __construct($content, Config $config, $eolChar='\n')
Chris@4 263 {
Chris@4 264 if ($this->isMinifiedContent($content, $eolChar) === true) {
Chris@4 265 throw new TokenizerException('File appears to be minified and cannot be processed');
Chris@4 266 }
Chris@4 267
Chris@5 268 parent::__construct($content, $config, $eolChar);
Chris@4 269
Chris@4 270 }//end __construct()
Chris@4 271
Chris@4 272
Chris@4 273 /**
Chris@4 274 * Creates an array of tokens when given some JS code.
Chris@4 275 *
Chris@4 276 * @param string $string The string to tokenize.
Chris@4 277 *
Chris@4 278 * @return array
Chris@4 279 */
Chris@4 280 public function tokenize($string)
Chris@4 281 {
Chris@4 282 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 283 echo "\t*** START JS TOKENIZING ***".PHP_EOL;
Chris@4 284 }
Chris@4 285
Chris@4 286 $maxTokenLength = 0;
Chris@4 287 foreach ($this->tokenValues as $token => $values) {
Chris@4 288 if (strlen($token) > $maxTokenLength) {
Chris@4 289 $maxTokenLength = strlen($token);
Chris@4 290 }
Chris@4 291 }
Chris@4 292
Chris@4 293 $tokens = [];
Chris@4 294 $inString = '';
Chris@4 295 $stringChar = null;
Chris@4 296 $inComment = '';
Chris@4 297 $buffer = '';
Chris@4 298 $preStringBuffer = '';
Chris@4 299 $cleanBuffer = false;
Chris@4 300
Chris@4 301 $commentTokenizer = new Comment();
Chris@4 302
Chris@4 303 $tokens[] = [
Chris@4 304 'code' => T_OPEN_TAG,
Chris@4 305 'type' => 'T_OPEN_TAG',
Chris@4 306 'content' => '',
Chris@4 307 ];
Chris@4 308
Chris@4 309 // Convert newlines to single characters for ease of
Chris@4 310 // processing. We will change them back later.
Chris@4 311 $string = str_replace($this->eolChar, "\n", $string);
Chris@4 312
Chris@4 313 $chars = str_split($string);
Chris@4 314 $numChars = count($chars);
Chris@4 315 for ($i = 0; $i < $numChars; $i++) {
Chris@4 316 $char = $chars[$i];
Chris@4 317
Chris@4 318 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 319 $content = Util\Common::prepareForOutput($char);
Chris@4 320 $bufferContent = Util\Common::prepareForOutput($buffer);
Chris@4 321
Chris@4 322 if ($inString !== '') {
Chris@4 323 echo "\t";
Chris@4 324 }
Chris@4 325
Chris@4 326 if ($inComment !== '') {
Chris@4 327 echo "\t";
Chris@4 328 }
Chris@4 329
Chris@4 330 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@4 331 }//end if
Chris@4 332
Chris@4 333 if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@4 334 // If the buffer only has whitespace and we are about to
Chris@4 335 // add a character, store the whitespace first.
Chris@4 336 if (trim($char) !== '' && trim($buffer) === '') {
Chris@4 337 $tokens[] = [
Chris@4 338 'code' => T_WHITESPACE,
Chris@4 339 'type' => 'T_WHITESPACE',
Chris@4 340 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4 341 ];
Chris@4 342
Chris@4 343 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 344 $content = Util\Common::prepareForOutput($buffer);
Chris@4 345 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@4 346 }
Chris@4 347
Chris@4 348 $buffer = '';
Chris@4 349 }
Chris@4 350
Chris@4 351 // If the buffer is not whitespace and we are about to
Chris@4 352 // add a whitespace character, store the content first.
Chris@4 353 if ($inString === ''
Chris@4 354 && $inComment === ''
Chris@4 355 && trim($char) === ''
Chris@4 356 && trim($buffer) !== ''
Chris@4 357 ) {
Chris@4 358 $tokens[] = [
Chris@4 359 'code' => T_STRING,
Chris@4 360 'type' => 'T_STRING',
Chris@4 361 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4 362 ];
Chris@4 363
Chris@4 364 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 365 $content = Util\Common::prepareForOutput($buffer);
Chris@4 366 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4 367 }
Chris@4 368
Chris@4 369 $buffer = '';
Chris@4 370 }
Chris@4 371 }//end if
Chris@4 372
Chris@4 373 // Process strings.
Chris@4 374 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@4 375 if ($inString === $char) {
Chris@4 376 // This could be the end of the string, but make sure it
Chris@4 377 // is not escaped first.
Chris@4 378 $escapes = 0;
Chris@4 379 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@4 380 if ($chars[$x] !== '\\') {
Chris@4 381 break;
Chris@4 382 }
Chris@4 383
Chris@4 384 $escapes++;
Chris@4 385 }
Chris@4 386
Chris@4 387 if ($escapes === 0 || ($escapes % 2) === 0) {
Chris@4 388 // There is an even number escape chars,
Chris@4 389 // so this is not escaped, it is the end of the string.
Chris@4 390 $tokens[] = [
Chris@4 391 'code' => T_CONSTANT_ENCAPSED_STRING,
Chris@4 392 'type' => 'T_CONSTANT_ENCAPSED_STRING',
Chris@4 393 'content' => str_replace("\n", $this->eolChar, $buffer).$char,
Chris@4 394 ];
Chris@4 395
Chris@4 396 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 397 echo "\t\t* found end of string *".PHP_EOL;
Chris@4 398 $content = Util\Common::prepareForOutput($buffer.$char);
Chris@4 399 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@4 400 }
Chris@4 401
Chris@4 402 $buffer = '';
Chris@4 403 $preStringBuffer = '';
Chris@4 404 $inString = '';
Chris@4 405 $stringChar = null;
Chris@4 406 continue;
Chris@4 407 }//end if
Chris@4 408 } else if ($inString === '') {
Chris@4 409 $inString = $char;
Chris@4 410 $stringChar = $i;
Chris@4 411 $preStringBuffer = $buffer;
Chris@4 412
Chris@4 413 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 414 echo "\t\t* looking for string closer *".PHP_EOL;
Chris@4 415 }
Chris@4 416 }//end if
Chris@4 417 }//end if
Chris@4 418
Chris@4 419 if ($inString !== '' && $char === "\n") {
Chris@4 420 // Unless this newline character is escaped, the string did not
Chris@4 421 // end before the end of the line, which means it probably
Chris@4 422 // wasn't a string at all (maybe a regex).
Chris@4 423 if ($chars[($i - 1)] !== '\\') {
Chris@4 424 $i = $stringChar;
Chris@4 425 $buffer = $preStringBuffer;
Chris@4 426 $preStringBuffer = '';
Chris@4 427 $inString = '';
Chris@4 428 $stringChar = null;
Chris@4 429 $char = $chars[$i];
Chris@4 430
Chris@4 431 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 432 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@4 433 }
Chris@4 434 }
Chris@4 435 }
Chris@4 436
Chris@4 437 $buffer .= $char;
Chris@4 438
Chris@4 439 // We don't look for special tokens inside strings,
Chris@4 440 // so if we are in a string, we can continue here now
Chris@4 441 // that the current char is in the buffer.
Chris@4 442 if ($inString !== '') {
Chris@4 443 continue;
Chris@4 444 }
Chris@4 445
Chris@4 446 // Special case for T_DIVIDE which can actually be
Chris@4 447 // the start of a regular expression.
Chris@4 448 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@5 449 $regex = $this->getRegexToken($i, $string, $chars, $tokens);
Chris@4 450 if ($regex !== null) {
Chris@4 451 $tokens[] = [
Chris@4 452 'code' => T_REGULAR_EXPRESSION,
Chris@4 453 'type' => 'T_REGULAR_EXPRESSION',
Chris@4 454 'content' => $regex['content'],
Chris@4 455 ];
Chris@4 456
Chris@4 457 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 458 $content = Util\Common::prepareForOutput($regex['content']);
Chris@4 459 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@4 460 }
Chris@4 461
Chris@4 462 $i = $regex['end'];
Chris@4 463 $buffer = '';
Chris@4 464 $cleanBuffer = false;
Chris@4 465 continue;
Chris@4 466 }//end if
Chris@4 467 }//end if
Chris@4 468
Chris@4 469 // Check for known tokens, but ignore tokens found that are not at
Chris@4 470 // the end of a string, like FOR and this.FORmat.
Chris@4 471 if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@4 472 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
Chris@4 473 || isset($chars[($i + 1)]) === false
Chris@4 474 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
Chris@4 475 ) {
Chris@4 476 $matchedToken = false;
Chris@4 477 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@4 478
Chris@4 479 if ($lookAheadLength > 0) {
Chris@4 480 // The buffer contains a token type, but we need
Chris@4 481 // to look ahead at the next chars to see if this is
Chris@4 482 // actually part of a larger token. For example,
Chris@4 483 // FOR and FOREACH.
Chris@4 484 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 485 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@4 486 }
Chris@4 487
Chris@4 488 $charBuffer = $buffer;
Chris@4 489 for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@4 490 if (isset($chars[($i + $x)]) === false) {
Chris@4 491 break;
Chris@4 492 }
Chris@4 493
Chris@4 494 $charBuffer .= $chars[($i + $x)];
Chris@4 495
Chris@4 496 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 497 $content = Util\Common::prepareForOutput($charBuffer);
Chris@4 498 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@4 499 }
Chris@4 500
Chris@4 501 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@4 502 // We've found something larger that matches
Chris@4 503 // so we can ignore this char. Except for 1 very specific
Chris@4 504 // case where a comment like /**/ needs to tokenize as
Chris@4 505 // T_COMMENT and not T_DOC_COMMENT.
Chris@4 506 $oldType = $this->tokenValues[strtolower($buffer)];
Chris@4 507 $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@4 508 if ($oldType === 'T_COMMENT'
Chris@4 509 && $newType === 'T_DOC_COMMENT'
Chris@4 510 && $chars[($i + $x + 1)] === '/'
Chris@4 511 ) {
Chris@4 512 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 513 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@4 514 }
Chris@4 515 } else {
Chris@4 516 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 517 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@4 518 }
Chris@4 519
Chris@4 520 $matchedToken = true;
Chris@4 521 break;
Chris@4 522 }
Chris@4 523 }//end if
Chris@4 524 }//end for
Chris@4 525 }//end if
Chris@4 526
Chris@4 527 if ($matchedToken === false) {
Chris@4 528 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@4 529 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@4 530 }
Chris@4 531
Chris@4 532 $value = $this->tokenValues[strtolower($buffer)];
Chris@4 533
Chris@4 534 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@4 535 // The function keyword needs to be all lowercase or else
Chris@4 536 // it is just a function called "Function".
Chris@4 537 $value = 'T_STRING';
Chris@4 538 }
Chris@4 539
Chris@4 540 $tokens[] = [
Chris@4 541 'code' => constant($value),
Chris@4 542 'type' => $value,
Chris@4 543 'content' => $buffer,
Chris@4 544 ];
Chris@4 545
Chris@4 546 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 547 $content = Util\Common::prepareForOutput($buffer);
Chris@4 548 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4 549 }
Chris@4 550
Chris@4 551 $cleanBuffer = true;
Chris@4 552 }//end if
Chris@4 553 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@4 554 // No matter what token we end up using, we don't
Chris@4 555 // need the content in the buffer any more because we have
Chris@4 556 // found a valid token.
Chris@4 557 $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1);
Chris@4 558 if ($newContent !== '') {
Chris@4 559 $tokens[] = [
Chris@4 560 'code' => T_STRING,
Chris@4 561 'type' => 'T_STRING',
Chris@4 562 'content' => $newContent,
Chris@4 563 ];
Chris@4 564
Chris@4 565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 566 $content = Util\Common::prepareForOutput(substr($buffer, 0, -1));
Chris@4 567 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4 568 }
Chris@4 569 }
Chris@4 570
Chris@4 571 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 572 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@4 573 }
Chris@4 574
Chris@4 575 // The char is a token type, but we need to look ahead at the
Chris@4 576 // next chars to see if this is actually part of a larger token.
Chris@4 577 // For example, = and ===.
Chris@4 578 $charBuffer = $char;
Chris@4 579 $matchedToken = false;
Chris@4 580 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@4 581 if (isset($chars[($i + $x)]) === false) {
Chris@4 582 break;
Chris@4 583 }
Chris@4 584
Chris@4 585 $charBuffer .= $chars[($i + $x)];
Chris@4 586
Chris@4 587 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 588 $content = Util\Common::prepareForOutput($charBuffer);
Chris@4 589 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@4 590 }
Chris@4 591
Chris@4 592 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@4 593 // We've found something larger that matches
Chris@4 594 // so we can ignore this char.
Chris@4 595 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 596 $type = $this->tokenValues[strtolower($charBuffer)];
Chris@4 597 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@4 598 }
Chris@4 599
Chris@4 600 $matchedToken = true;
Chris@4 601 break;
Chris@4 602 }
Chris@4 603 }//end for
Chris@4 604
Chris@4 605 if ($matchedToken === false) {
Chris@4 606 $value = $this->tokenValues[strtolower($char)];
Chris@4 607 $tokens[] = [
Chris@4 608 'code' => constant($value),
Chris@4 609 'type' => $value,
Chris@4 610 'content' => $char,
Chris@4 611 ];
Chris@4 612
Chris@4 613 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 614 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@4 615 $content = Util\Common::prepareForOutput($char);
Chris@4 616 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4 617 }
Chris@4 618
Chris@4 619 $cleanBuffer = true;
Chris@4 620 } else {
Chris@4 621 $buffer = $char;
Chris@4 622 }//end if
Chris@4 623 }//end if
Chris@4 624
Chris@4 625 // Keep track of content inside comments.
Chris@4 626 if ($inComment === ''
Chris@4 627 && array_key_exists($buffer, $this->commentTokens) === true
Chris@4 628 ) {
Chris@4 629 // This is not really a comment if the content
Chris@4 630 // looks like \// (i.e., it is escaped).
Chris@4 631 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@4 632 $lastToken = array_pop($tokens);
Chris@4 633 $lastContent = $lastToken['content'];
Chris@4 634 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 635 $value = $this->tokenValues[strtolower($lastContent)];
Chris@4 636 $content = Util\Common::prepareForOutput($lastContent);
Chris@4 637 echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@4 638 }
Chris@4 639
Chris@4 640 $lastChars = str_split($lastContent);
Chris@4 641 $lastNumChars = count($lastChars);
Chris@4 642 for ($x = 0; $x < $lastNumChars; $x++) {
Chris@4 643 $lastChar = $lastChars[$x];
Chris@4 644 $value = $this->tokenValues[strtolower($lastChar)];
Chris@4 645 $tokens[] = [
Chris@4 646 'code' => constant($value),
Chris@4 647 'type' => $value,
Chris@4 648 'content' => $lastChar,
Chris@4 649 ];
Chris@4 650
Chris@4 651 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 652 $content = Util\Common::prepareForOutput($lastChar);
Chris@4 653 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4 654 }
Chris@4 655 }
Chris@4 656 } else {
Chris@4 657 // We have started a comment.
Chris@4 658 $inComment = $buffer;
Chris@4 659
Chris@4 660 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 661 echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@4 662 }
Chris@4 663 }//end if
Chris@4 664 } else if ($inComment !== '') {
Chris@4 665 if ($this->commentTokens[$inComment] === null) {
Chris@4 666 // Comment ends at the next newline.
Chris@4 667 if (strpos($buffer, "\n") !== false) {
Chris@4 668 $inComment = '';
Chris@4 669 }
Chris@4 670 } else {
Chris@4 671 if ($this->commentTokens[$inComment] === $buffer) {
Chris@4 672 $inComment = '';
Chris@4 673 }
Chris@4 674 }
Chris@4 675
Chris@4 676 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 677 if ($inComment === '') {
Chris@4 678 echo "\t\t* found end of comment *".PHP_EOL;
Chris@4 679 }
Chris@4 680 }
Chris@4 681
Chris@4 682 if ($inComment === '' && $cleanBuffer === false) {
Chris@4 683 $tokens[] = [
Chris@4 684 'code' => T_STRING,
Chris@4 685 'type' => 'T_STRING',
Chris@4 686 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4 687 ];
Chris@4 688
Chris@4 689 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 690 $content = Util\Common::prepareForOutput($buffer);
Chris@4 691 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4 692 }
Chris@4 693
Chris@4 694 $buffer = '';
Chris@4 695 }
Chris@4 696 }//end if
Chris@4 697
Chris@4 698 if ($cleanBuffer === true) {
Chris@4 699 $buffer = '';
Chris@4 700 $cleanBuffer = false;
Chris@4 701 }
Chris@4 702 }//end for
Chris@4 703
Chris@4 704 if (empty($buffer) === false) {
Chris@4 705 if ($inString !== '') {
Chris@4 706 // The string did not end before the end of the file,
Chris@4 707 // which means there was probably a syntax error somewhere.
Chris@4 708 $tokens[] = [
Chris@4 709 'code' => T_STRING,
Chris@4 710 'type' => 'T_STRING',
Chris@4 711 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4 712 ];
Chris@4 713
Chris@4 714 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 715 $content = Util\Common::prepareForOutput($buffer);
Chris@4 716 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4 717 }
Chris@4 718 } else {
Chris@4 719 // Buffer contains whitespace from the end of the file.
Chris@4 720 $tokens[] = [
Chris@4 721 'code' => T_WHITESPACE,
Chris@4 722 'type' => 'T_WHITESPACE',
Chris@4 723 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4 724 ];
Chris@4 725
Chris@4 726 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 727 $content = Util\Common::prepareForOutput($buffer);
Chris@4 728 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@4 729 }
Chris@4 730 }//end if
Chris@4 731 }//end if
Chris@4 732
Chris@4 733 $tokens[] = [
Chris@4 734 'code' => T_CLOSE_TAG,
Chris@4 735 'type' => 'T_CLOSE_TAG',
Chris@4 736 'content' => '',
Chris@4 737 ];
Chris@4 738
Chris@4 739 /*
Chris@4 740 Now that we have done some basic tokenizing, we need to
Chris@4 741 modify the tokens to join some together and split some apart
Chris@4 742 so they match what the PHP tokenizer does.
Chris@4 743 */
Chris@4 744
Chris@4 745 $finalTokens = [];
Chris@4 746 $newStackPtr = 0;
Chris@4 747 $numTokens = count($tokens);
Chris@4 748 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@4 749 $token = $tokens[$stackPtr];
Chris@4 750
Chris@4 751 /*
Chris@4 752 Look for comments and join the tokens together.
Chris@4 753 */
Chris@4 754
Chris@4 755 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
Chris@4 756 $newContent = '';
Chris@4 757 $tokenContent = $token['content'];
Chris@4 758
Chris@4 759 $endContent = null;
Chris@4 760 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@4 761 $endContent = $this->commentTokens[$tokenContent];
Chris@4 762 }
Chris@4 763
Chris@4 764 while ($tokenContent !== $endContent) {
Chris@4 765 if ($endContent === null
Chris@4 766 && strpos($tokenContent, $this->eolChar) !== false
Chris@4 767 ) {
Chris@4 768 // A null end token means the comment ends at the end of
Chris@4 769 // the line so we look for newlines and split the token.
Chris@4 770 $tokens[$stackPtr]['content'] = substr(
Chris@4 771 $tokenContent,
Chris@4 772 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@4 773 );
Chris@4 774
Chris@4 775 $tokenContent = substr(
Chris@4 776 $tokenContent,
Chris@4 777 0,
Chris@4 778 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@4 779 );
Chris@4 780
Chris@4 781 // If the substr failed, skip the token as the content
Chris@4 782 // will now be blank.
Chris@4 783 if ($tokens[$stackPtr]['content'] !== false
Chris@4 784 && $tokens[$stackPtr]['content'] !== ''
Chris@4 785 ) {
Chris@4 786 $stackPtr--;
Chris@4 787 }
Chris@4 788
Chris@4 789 break;
Chris@4 790 }//end if
Chris@4 791
Chris@4 792 $stackPtr++;
Chris@4 793 $newContent .= $tokenContent;
Chris@4 794 if (isset($tokens[$stackPtr]) === false) {
Chris@4 795 break;
Chris@4 796 }
Chris@4 797
Chris@4 798 $tokenContent = $tokens[$stackPtr]['content'];
Chris@4 799 }//end while
Chris@4 800
Chris@4 801 if ($token['code'] === T_DOC_COMMENT) {
Chris@4 802 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr);
Chris@4 803 foreach ($commentTokens as $commentToken) {
Chris@4 804 $finalTokens[$newStackPtr] = $commentToken;
Chris@4 805 $newStackPtr++;
Chris@4 806 }
Chris@4 807
Chris@4 808 continue;
Chris@4 809 } else {
Chris@4 810 // Save the new content in the current token so
Chris@4 811 // the code below can chop it up on newlines.
Chris@4 812 $token['content'] = $newContent.$tokenContent;
Chris@4 813 }
Chris@4 814 }//end if
Chris@4 815
Chris@4 816 /*
Chris@4 817 If this token has newlines in its content, split each line up
Chris@4 818 and create a new token for each line. We do this so it's easier
Chris@4 819 to ascertain where errors occur on a line.
Chris@4 820 Note that $token[1] is the token's content.
Chris@4 821 */
Chris@4 822
Chris@4 823 if (strpos($token['content'], $this->eolChar) !== false) {
Chris@4 824 $tokenLines = explode($this->eolChar, $token['content']);
Chris@4 825 $numLines = count($tokenLines);
Chris@4 826
Chris@4 827 for ($i = 0; $i < $numLines; $i++) {
Chris@5 828 $newToken = ['content' => $tokenLines[$i]];
Chris@4 829 if ($i === ($numLines - 1)) {
Chris@4 830 if ($tokenLines[$i] === '') {
Chris@4 831 break;
Chris@4 832 }
Chris@4 833 } else {
Chris@4 834 $newToken['content'] .= $this->eolChar;
Chris@4 835 }
Chris@4 836
Chris@4 837 $newToken['type'] = $token['type'];
Chris@4 838 $newToken['code'] = $token['code'];
Chris@4 839 $finalTokens[$newStackPtr] = $newToken;
Chris@4 840 $newStackPtr++;
Chris@4 841 }
Chris@4 842 } else {
Chris@4 843 $finalTokens[$newStackPtr] = $token;
Chris@4 844 $newStackPtr++;
Chris@4 845 }//end if
Chris@4 846
Chris@4 847 // Convert numbers, including decimals.
Chris@4 848 if ($token['code'] === T_STRING
Chris@4 849 || $token['code'] === T_OBJECT_OPERATOR
Chris@4 850 ) {
Chris@4 851 $newContent = '';
Chris@4 852 $oldStackPtr = $stackPtr;
Chris@4 853 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
Chris@4 854 $newContent .= $tokens[$stackPtr]['content'];
Chris@4 855 $stackPtr++;
Chris@4 856 }
Chris@4 857
Chris@4 858 if ($newContent !== '' && $newContent !== '.') {
Chris@4 859 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@4 860 if (ctype_digit($newContent) === true) {
Chris@4 861 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@4 862 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@4 863 } else {
Chris@4 864 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@4 865 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@4 866 }
Chris@4 867
Chris@4 868 $stackPtr--;
Chris@4 869 continue;
Chris@4 870 } else {
Chris@4 871 $stackPtr = $oldStackPtr;
Chris@4 872 }
Chris@4 873 }//end if
Chris@4 874
Chris@4 875 // Convert the token after an object operator into a string, in most cases.
Chris@4 876 if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@4 877 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@4 878 if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@4 879 continue;
Chris@4 880 }
Chris@4 881
Chris@4 882 if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@4 883 && $tokens[$i]['code'] !== T_LNUMBER
Chris@4 884 && $tokens[$i]['code'] !== T_DNUMBER
Chris@4 885 ) {
Chris@4 886 $tokens[$i]['code'] = T_STRING;
Chris@4 887 $tokens[$i]['type'] = 'T_STRING';
Chris@4 888 }
Chris@4 889
Chris@4 890 break;
Chris@4 891 }
Chris@4 892 }
Chris@4 893 }//end for
Chris@4 894
Chris@4 895 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 896 echo "\t*** END TOKENIZING ***".PHP_EOL;
Chris@4 897 }
Chris@4 898
Chris@4 899 return $finalTokens;
Chris@4 900
Chris@4 901 }//end tokenize()
Chris@4 902
Chris@4 903
Chris@4 904 /**
Chris@4 905 * Tokenizes a regular expression if one is found.
Chris@4 906 *
Chris@4 907 * If a regular expression is not found, NULL is returned.
Chris@4 908 *
Chris@4 909 * @param string $char The index of the possible regex start character.
Chris@4 910 * @param string $string The complete content of the string being tokenized.
Chris@4 911 * @param string $chars An array of characters being tokenized.
Chris@4 912 * @param string $tokens The current array of tokens found in the string.
Chris@4 913 *
Chris@5 914 * @return array<string, string>|null
Chris@4 915 */
Chris@4 916 public function getRegexToken($char, $string, $chars, $tokens)
Chris@4 917 {
Chris@4 918 $beforeTokens = [
Chris@4 919 T_EQUAL => true,
Chris@4 920 T_IS_NOT_EQUAL => true,
Chris@4 921 T_IS_IDENTICAL => true,
Chris@4 922 T_IS_NOT_IDENTICAL => true,
Chris@4 923 T_OPEN_PARENTHESIS => true,
Chris@4 924 T_OPEN_SQUARE_BRACKET => true,
Chris@4 925 T_RETURN => true,
Chris@4 926 T_BOOLEAN_OR => true,
Chris@4 927 T_BOOLEAN_AND => true,
Chris@4 928 T_BOOLEAN_NOT => true,
Chris@4 929 T_BITWISE_OR => true,
Chris@4 930 T_BITWISE_AND => true,
Chris@4 931 T_COMMA => true,
Chris@4 932 T_COLON => true,
Chris@4 933 T_TYPEOF => true,
Chris@4 934 T_INLINE_THEN => true,
Chris@4 935 T_INLINE_ELSE => true,
Chris@4 936 ];
Chris@4 937
Chris@4 938 $afterTokens = [
Chris@4 939 ',' => true,
Chris@4 940 ')' => true,
Chris@4 941 ']' => true,
Chris@4 942 ';' => true,
Chris@4 943 ' ' => true,
Chris@4 944 '.' => true,
Chris@4 945 ':' => true,
Chris@4 946 $this->eolChar => true,
Chris@4 947 ];
Chris@4 948
Chris@4 949 // Find the last non-whitespace token that was added
Chris@4 950 // to the tokens array.
Chris@4 951 $numTokens = count($tokens);
Chris@4 952 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@4 953 if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@4 954 break;
Chris@4 955 }
Chris@4 956 }
Chris@4 957
Chris@4 958 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@4 959 return null;
Chris@4 960 }
Chris@4 961
Chris@4 962 // This is probably a regular expression, so look for the end of it.
Chris@4 963 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 964 echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@4 965 }
Chris@4 966
Chris@4 967 $numChars = count($chars);
Chris@4 968 for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@4 969 if ($chars[$next] === '/') {
Chris@4 970 // Just make sure this is not escaped first.
Chris@4 971 if ($chars[($next - 1)] !== '\\') {
Chris@4 972 // In the simple form: /.../ so we found the end.
Chris@4 973 break;
Chris@4 974 } else if ($chars[($next - 2)] === '\\') {
Chris@4 975 // In the form: /...\\/ so we found the end.
Chris@4 976 break;
Chris@4 977 }
Chris@4 978 } else {
Chris@4 979 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@4 980 if ($possibleEolChar === $this->eolChar) {
Chris@4 981 // This is the last token on the line and regular
Chris@4 982 // expressions need to be defined on a single line,
Chris@4 983 // so this is not a regular expression.
Chris@4 984 break;
Chris@4 985 }
Chris@4 986 }
Chris@4 987 }
Chris@4 988
Chris@4 989 if ($chars[$next] !== '/') {
Chris@4 990 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 991 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@4 992 }
Chris@4 993
Chris@4 994 return null;
Chris@4 995 }
Chris@4 996
Chris@4 997 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
Chris@4 998 // The token directly after the end of the regex can
Chris@4 999 // be modifiers like global and case insensitive
Chris@4 1000 // (.e.g, /pattern/gi).
Chris@4 1001 $next++;
Chris@4 1002 }
Chris@4 1003
Chris@4 1004 $regexEnd = $next;
Chris@4 1005 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1006 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@4 1007 }
Chris@4 1008
Chris@4 1009 for ($next += 1; $next < $numChars; $next++) {
Chris@4 1010 if ($chars[$next] !== ' ') {
Chris@4 1011 break;
Chris@4 1012 } else {
Chris@4 1013 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@4 1014 if ($possibleEolChar === $this->eolChar) {
Chris@4 1015 // This is the last token on the line.
Chris@4 1016 break;
Chris@4 1017 }
Chris@4 1018 }
Chris@4 1019 }
Chris@4 1020
Chris@4 1021 if (isset($afterTokens[$chars[$next]]) === false) {
Chris@4 1022 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1023 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@4 1024 }
Chris@4 1025
Chris@4 1026 return null;
Chris@4 1027 }
Chris@4 1028
Chris@4 1029 // This is a regular expression, so join all the tokens together.
Chris@4 1030 $content = '';
Chris@4 1031 for ($x = $char; $x <= $regexEnd; $x++) {
Chris@4 1032 $content .= $chars[$x];
Chris@4 1033 }
Chris@4 1034
Chris@4 1035 $token = [
Chris@4 1036 'start' => $char,
Chris@4 1037 'end' => $regexEnd,
Chris@4 1038 'content' => $content,
Chris@4 1039 ];
Chris@4 1040
Chris@4 1041 return $token;
Chris@4 1042
Chris@4 1043 }//end getRegexToken()
Chris@4 1044
Chris@4 1045
Chris@4 1046 /**
Chris@4 1047 * Performs additional processing after main tokenizing.
Chris@4 1048 *
Chris@4 1049 * This additional processing looks for properties, closures, labels and objects.
Chris@4 1050 *
Chris@4 1051 * @return void
Chris@4 1052 */
Chris@4 1053 public function processAdditional()
Chris@4 1054 {
Chris@4 1055 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1056 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@4 1057 }
Chris@4 1058
Chris@4 1059 $numTokens = count($this->tokens);
Chris@4 1060 $classStack = [];
Chris@4 1061
Chris@4 1062 for ($i = 0; $i < $numTokens; $i++) {
Chris@4 1063 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1064 $type = $this->tokens[$i]['type'];
Chris@4 1065 $content = Util\Common::prepareForOutput($this->tokens[$i]['content']);
Chris@4 1066
Chris@4 1067 echo str_repeat("\t", count($classStack));
Chris@4 1068 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@4 1069 }
Chris@4 1070
Chris@4 1071 // Looking for functions that are actually closures.
Chris@4 1072 if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) {
Chris@4 1073 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@4 1074 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) {
Chris@4 1075 break;
Chris@4 1076 }
Chris@4 1077 }
Chris@4 1078
Chris@4 1079 if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@4 1080 $this->tokens[$i]['code'] = T_CLOSURE;
Chris@4 1081 $this->tokens[$i]['type'] = 'T_CLOSURE';
Chris@4 1082 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1083 $line = $this->tokens[$i]['line'];
Chris@4 1084 echo str_repeat("\t", count($classStack));
Chris@4 1085 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL;
Chris@4 1086 }
Chris@4 1087
Chris@4 1088 for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
Chris@4 1089 if (isset($this->tokens[$x]['conditions'][$i]) === false) {
Chris@4 1090 continue;
Chris@4 1091 }
Chris@4 1092
Chris@4 1093 $this->tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@4 1094 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1095 $type = $this->tokens[$x]['type'];
Chris@4 1096 echo str_repeat("\t", count($classStack));
Chris@4 1097 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@4 1098 }
Chris@4 1099 }
Chris@4 1100 }//end if
Chris@4 1101
Chris@4 1102 continue;
Chris@4 1103 } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@4 1104 && isset($this->tokens[$i]['scope_condition']) === false
Chris@4 1105 && isset($this->tokens[$i]['bracket_closer']) === true
Chris@4 1106 ) {
Chris@5 1107 $condition = $this->tokens[$i]['conditions'];
Chris@5 1108 $condition = end($condition);
Chris@4 1109 if ($condition === T_CLASS) {
Chris@4 1110 // Possibly an ES6 method. To be classified as one, the previous
Chris@4 1111 // non-empty tokens need to be a set of parenthesis, and then a string
Chris@4 1112 // (the method name).
Chris@4 1113 for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) {
Chris@4 1114 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) {
Chris@4 1115 break;
Chris@4 1116 }
Chris@4 1117 }
Chris@4 1118
Chris@4 1119 if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) {
Chris@4 1120 $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener'];
Chris@4 1121 for ($name = ($parenOpener - 1); $name > 0; $name--) {
Chris@4 1122 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) {
Chris@4 1123 break;
Chris@4 1124 }
Chris@4 1125 }
Chris@4 1126
Chris@4 1127 if ($this->tokens[$name]['code'] === T_STRING) {
Chris@4 1128 // We found a method name.
Chris@4 1129 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1130 $line = $this->tokens[$name]['line'];
Chris@4 1131 echo str_repeat("\t", count($classStack));
Chris@4 1132 echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL;
Chris@4 1133 }
Chris@4 1134
Chris@4 1135 $closer = $this->tokens[$i]['bracket_closer'];
Chris@4 1136
Chris@4 1137 $this->tokens[$name]['code'] = T_FUNCTION;
Chris@4 1138 $this->tokens[$name]['type'] = 'T_FUNCTION';
Chris@4 1139
Chris@4 1140 foreach ([$name, $i, $closer] as $token) {
Chris@4 1141 $this->tokens[$token]['scope_condition'] = $name;
Chris@4 1142 $this->tokens[$token]['scope_opener'] = $i;
Chris@4 1143 $this->tokens[$token]['scope_closer'] = $closer;
Chris@4 1144 $this->tokens[$token]['parenthesis_opener'] = $parenOpener;
Chris@4 1145 $this->tokens[$token]['parenthesis_closer'] = $parenCloser;
Chris@4 1146 $this->tokens[$token]['parenthesis_owner'] = $name;
Chris@4 1147 }
Chris@4 1148
Chris@4 1149 $this->tokens[$parenOpener]['parenthesis_owner'] = $name;
Chris@4 1150 $this->tokens[$parenCloser]['parenthesis_owner'] = $name;
Chris@4 1151
Chris@4 1152 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@4 1153 $this->tokens[$x]['conditions'][$name] = T_FUNCTION;
Chris@4 1154 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@4 1155 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1156 $type = $this->tokens[$x]['type'];
Chris@4 1157 echo str_repeat("\t", count($classStack));
Chris@4 1158 echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL;
Chris@4 1159 }
Chris@4 1160 }
Chris@4 1161
Chris@4 1162 continue;
Chris@4 1163 }//end if
Chris@4 1164 }//end if
Chris@4 1165 }//end if
Chris@4 1166
Chris@4 1167 $classStack[] = $i;
Chris@4 1168
Chris@4 1169 $closer = $this->tokens[$i]['bracket_closer'];
Chris@4 1170 $this->tokens[$i]['code'] = T_OBJECT;
Chris@4 1171 $this->tokens[$i]['type'] = 'T_OBJECT';
Chris@4 1172 $this->tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@4 1173 $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@4 1174
Chris@4 1175 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1176 echo str_repeat("\t", count($classStack));
Chris@4 1177 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@4 1178 echo str_repeat("\t", count($classStack));
Chris@4 1179 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@4 1180 }
Chris@4 1181
Chris@4 1182 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@4 1183 $this->tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@4 1184 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@4 1185 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1186 $type = $this->tokens[$x]['type'];
Chris@4 1187 echo str_repeat("\t", count($classStack));
Chris@4 1188 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@4 1189 }
Chris@4 1190 }
Chris@4 1191 } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@4 1192 $opener = array_pop($classStack);
Chris@4 1193 } else if ($this->tokens[$i]['code'] === T_COLON) {
Chris@4 1194 // If it is a scope opener, it belongs to a
Chris@4 1195 // DEFAULT or CASE statement.
Chris@4 1196 if (isset($this->tokens[$i]['scope_condition']) === true) {
Chris@4 1197 continue;
Chris@4 1198 }
Chris@4 1199
Chris@4 1200 // Make sure this is not part of an inline IF statement.
Chris@4 1201 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@4 1202 if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
Chris@4 1203 $this->tokens[$i]['code'] = T_INLINE_ELSE;
Chris@4 1204 $this->tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@4 1205
Chris@4 1206 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1207 echo str_repeat("\t", count($classStack));
Chris@4 1208 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@4 1209 }
Chris@4 1210
Chris@4 1211 continue(2);
Chris@4 1212 } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) {
Chris@4 1213 break;
Chris@4 1214 }
Chris@4 1215 }
Chris@4 1216
Chris@4 1217 // The string to the left of the colon is either a property or label.
Chris@4 1218 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@4 1219 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) {
Chris@4 1220 break;
Chris@4 1221 }
Chris@4 1222 }
Chris@4 1223
Chris@4 1224 if ($this->tokens[$label]['code'] !== T_STRING
Chris@4 1225 && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@4 1226 ) {
Chris@4 1227 continue;
Chris@4 1228 }
Chris@4 1229
Chris@4 1230 if (empty($classStack) === false) {
Chris@4 1231 $this->tokens[$label]['code'] = T_PROPERTY;
Chris@4 1232 $this->tokens[$label]['type'] = 'T_PROPERTY';
Chris@4 1233
Chris@4 1234 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1235 echo str_repeat("\t", count($classStack));
Chris@4 1236 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@4 1237 }
Chris@4 1238 } else {
Chris@4 1239 $this->tokens[$label]['code'] = T_LABEL;
Chris@4 1240 $this->tokens[$label]['type'] = 'T_LABEL';
Chris@4 1241
Chris@4 1242 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1243 echo str_repeat("\t", count($classStack));
Chris@4 1244 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@4 1245 }
Chris@4 1246 }//end if
Chris@4 1247 }//end if
Chris@4 1248 }//end for
Chris@4 1249
Chris@4 1250 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4 1251 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@4 1252 }
Chris@4 1253
Chris@4 1254 }//end processAdditional()
Chris@4 1255
Chris@4 1256
Chris@4 1257 }//end class