annotate vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/JS.php @ 2:5311817fb629

Theme updates
author Chris Cannam
date Tue, 10 Jul 2018 13:19:18 +0000
parents c75dbcec494b
children
rev   line source
Chris@0 1 <?php
Chris@0 2 /**
Chris@0 3 * Tokenizes JS code.
Chris@0 4 *
Chris@0 5 * PHP version 5
Chris@0 6 *
Chris@0 7 * @category PHP
Chris@0 8 * @package PHP_CodeSniffer
Chris@0 9 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@0 10 * @author Marc McIntyre <mmcintyre@squiz.net>
Chris@0 11 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@0 12 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@0 13 * @link http://pear.php.net/package/PHP_CodeSniffer
Chris@0 14 */
Chris@0 15
Chris@0 16 /**
Chris@0 17 * Tokenizes JS code.
Chris@0 18 *
Chris@0 19 * @category PHP
Chris@0 20 * @package PHP_CodeSniffer
Chris@0 21 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@0 22 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@0 23 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@0 24 * @version Release: @package_version@
Chris@0 25 * @link http://pear.php.net/package/PHP_CodeSniffer
Chris@0 26 */
Chris@0 27 class PHP_CodeSniffer_Tokenizers_JS
Chris@0 28 {
Chris@0 29
Chris@0 30 /**
Chris@0 31 * If TRUE, files that appear to be minified will not be processed.
Chris@0 32 *
Chris@0 33 * @var boolean
Chris@0 34 */
Chris@0 35 public $skipMinified = true;
Chris@0 36
Chris@0 37 /**
Chris@0 38 * A list of tokens that are allowed to open a scope.
Chris@0 39 *
Chris@0 40 * This array also contains information about what kind of token the scope
Chris@0 41 * opener uses to open and close the scope, if the token strictly requires
Chris@0 42 * an opener, if the token can share a scope closer, and who it can be shared
Chris@0 43 * with. An example of a token that shares a scope closer is a CASE scope.
Chris@0 44 *
Chris@0 45 * @var array
Chris@0 46 */
Chris@0 47 public $scopeOpeners = array(
Chris@0 48 T_IF => array(
Chris@0 49 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 50 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 51 'strict' => false,
Chris@0 52 'shared' => false,
Chris@0 53 'with' => array(),
Chris@0 54 ),
Chris@0 55 T_TRY => array(
Chris@0 56 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 57 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 58 'strict' => true,
Chris@0 59 'shared' => false,
Chris@0 60 'with' => array(),
Chris@0 61 ),
Chris@0 62 T_CATCH => array(
Chris@0 63 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 64 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 65 'strict' => true,
Chris@0 66 'shared' => false,
Chris@0 67 'with' => array(),
Chris@0 68 ),
Chris@0 69 T_ELSE => array(
Chris@0 70 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 71 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 72 'strict' => false,
Chris@0 73 'shared' => false,
Chris@0 74 'with' => array(),
Chris@0 75 ),
Chris@0 76 T_FOR => array(
Chris@0 77 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 78 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 79 'strict' => false,
Chris@0 80 'shared' => false,
Chris@0 81 'with' => array(),
Chris@0 82 ),
Chris@0 83 T_FUNCTION => array(
Chris@0 84 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 85 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 86 'strict' => false,
Chris@0 87 'shared' => false,
Chris@0 88 'with' => array(),
Chris@0 89 ),
Chris@0 90 T_WHILE => array(
Chris@0 91 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 92 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 93 'strict' => false,
Chris@0 94 'shared' => false,
Chris@0 95 'with' => array(),
Chris@0 96 ),
Chris@0 97 T_DO => array(
Chris@0 98 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 99 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 100 'strict' => true,
Chris@0 101 'shared' => false,
Chris@0 102 'with' => array(),
Chris@0 103 ),
Chris@0 104 T_SWITCH => array(
Chris@0 105 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
Chris@0 106 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
Chris@0 107 'strict' => true,
Chris@0 108 'shared' => false,
Chris@0 109 'with' => array(),
Chris@0 110 ),
Chris@0 111 T_CASE => array(
Chris@0 112 'start' => array(T_COLON => T_COLON),
Chris@0 113 'end' => array(
Chris@0 114 T_BREAK => T_BREAK,
Chris@0 115 T_RETURN => T_RETURN,
Chris@0 116 T_CONTINUE => T_CONTINUE,
Chris@0 117 T_THROW => T_THROW,
Chris@0 118 ),
Chris@0 119 'strict' => true,
Chris@0 120 'shared' => true,
Chris@0 121 'with' => array(
Chris@0 122 T_DEFAULT => T_DEFAULT,
Chris@0 123 T_CASE => T_CASE,
Chris@0 124 T_SWITCH => T_SWITCH,
Chris@0 125 ),
Chris@0 126 ),
Chris@0 127 T_DEFAULT => array(
Chris@0 128 'start' => array(T_COLON => T_COLON),
Chris@0 129 'end' => array(
Chris@0 130 T_BREAK => T_BREAK,
Chris@0 131 T_RETURN => T_RETURN,
Chris@0 132 T_CONTINUE => T_CONTINUE,
Chris@0 133 T_THROW => T_THROW,
Chris@0 134 ),
Chris@0 135 'strict' => true,
Chris@0 136 'shared' => true,
Chris@0 137 'with' => array(
Chris@0 138 T_CASE => T_CASE,
Chris@0 139 T_SWITCH => T_SWITCH,
Chris@0 140 ),
Chris@0 141 ),
Chris@0 142 );
Chris@0 143
Chris@0 144 /**
Chris@0 145 * A list of tokens that end the scope.
Chris@0 146 *
Chris@0 147 * This array is just a unique collection of the end tokens
Chris@0 148 * from the _scopeOpeners array. The data is duplicated here to
Chris@0 149 * save time during parsing of the file.
Chris@0 150 *
Chris@0 151 * @var array
Chris@0 152 */
Chris@0 153 public $endScopeTokens = array(
Chris@0 154 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@0 155 T_BREAK => T_BREAK,
Chris@0 156 );
Chris@0 157
Chris@0 158 /**
Chris@0 159 * A list of special JS tokens and their types.
Chris@0 160 *
Chris@0 161 * @var array
Chris@0 162 */
Chris@0 163 protected $tokenValues = array(
Chris@0 164 'function' => 'T_FUNCTION',
Chris@0 165 'prototype' => 'T_PROTOTYPE',
Chris@0 166 'try' => 'T_TRY',
Chris@0 167 'catch' => 'T_CATCH',
Chris@0 168 'return' => 'T_RETURN',
Chris@0 169 'throw' => 'T_THROW',
Chris@0 170 'break' => 'T_BREAK',
Chris@0 171 'switch' => 'T_SWITCH',
Chris@0 172 'continue' => 'T_CONTINUE',
Chris@0 173 'if' => 'T_IF',
Chris@0 174 'else' => 'T_ELSE',
Chris@0 175 'do' => 'T_DO',
Chris@0 176 'while' => 'T_WHILE',
Chris@0 177 'for' => 'T_FOR',
Chris@0 178 'var' => 'T_VAR',
Chris@0 179 'case' => 'T_CASE',
Chris@0 180 'default' => 'T_DEFAULT',
Chris@0 181 'true' => 'T_TRUE',
Chris@0 182 'false' => 'T_FALSE',
Chris@0 183 'null' => 'T_NULL',
Chris@0 184 'this' => 'T_THIS',
Chris@0 185 'typeof' => 'T_TYPEOF',
Chris@0 186 '(' => 'T_OPEN_PARENTHESIS',
Chris@0 187 ')' => 'T_CLOSE_PARENTHESIS',
Chris@0 188 '{' => 'T_OPEN_CURLY_BRACKET',
Chris@0 189 '}' => 'T_CLOSE_CURLY_BRACKET',
Chris@0 190 '[' => 'T_OPEN_SQUARE_BRACKET',
Chris@0 191 ']' => 'T_CLOSE_SQUARE_BRACKET',
Chris@0 192 '?' => 'T_INLINE_THEN',
Chris@0 193 '.' => 'T_OBJECT_OPERATOR',
Chris@0 194 '+' => 'T_PLUS',
Chris@0 195 '-' => 'T_MINUS',
Chris@0 196 '*' => 'T_MULTIPLY',
Chris@0 197 '%' => 'T_MODULUS',
Chris@0 198 '/' => 'T_DIVIDE',
Chris@0 199 '^' => 'T_LOGICAL_XOR',
Chris@0 200 ',' => 'T_COMMA',
Chris@0 201 ';' => 'T_SEMICOLON',
Chris@0 202 ':' => 'T_COLON',
Chris@0 203 '<' => 'T_LESS_THAN',
Chris@0 204 '>' => 'T_GREATER_THAN',
Chris@0 205 '<<' => 'T_SL',
Chris@0 206 '>>' => 'T_SR',
Chris@0 207 '>>>' => 'T_ZSR',
Chris@0 208 '<<=' => 'T_SL_EQUAL',
Chris@0 209 '>>=' => 'T_SR_EQUAL',
Chris@0 210 '>>>=' => 'T_ZSR_EQUAL',
Chris@0 211 '<=' => 'T_IS_SMALLER_OR_EQUAL',
Chris@0 212 '>=' => 'T_IS_GREATER_OR_EQUAL',
Chris@0 213 '=>' => 'T_DOUBLE_ARROW',
Chris@0 214 '!' => 'T_BOOLEAN_NOT',
Chris@0 215 '||' => 'T_BOOLEAN_OR',
Chris@0 216 '&&' => 'T_BOOLEAN_AND',
Chris@0 217 '|' => 'T_BITWISE_OR',
Chris@0 218 '&' => 'T_BITWISE_AND',
Chris@0 219 '!=' => 'T_IS_NOT_EQUAL',
Chris@0 220 '!==' => 'T_IS_NOT_IDENTICAL',
Chris@0 221 '=' => 'T_EQUAL',
Chris@0 222 '==' => 'T_IS_EQUAL',
Chris@0 223 '===' => 'T_IS_IDENTICAL',
Chris@0 224 '-=' => 'T_MINUS_EQUAL',
Chris@0 225 '+=' => 'T_PLUS_EQUAL',
Chris@0 226 '*=' => 'T_MUL_EQUAL',
Chris@0 227 '/=' => 'T_DIV_EQUAL',
Chris@0 228 '%=' => 'T_MOD_EQUAL',
Chris@0 229 '++' => 'T_INC',
Chris@0 230 '--' => 'T_DEC',
Chris@0 231 '//' => 'T_COMMENT',
Chris@0 232 '/*' => 'T_COMMENT',
Chris@0 233 '/**' => 'T_DOC_COMMENT',
Chris@0 234 '*/' => 'T_COMMENT',
Chris@0 235 );
Chris@0 236
Chris@0 237 /**
Chris@0 238 * A list string delimiters.
Chris@0 239 *
Chris@0 240 * @var array
Chris@0 241 */
Chris@0 242 protected $stringTokens = array(
Chris@0 243 '\'' => '\'',
Chris@0 244 '"' => '"',
Chris@0 245 );
Chris@0 246
Chris@0 247 /**
Chris@0 248 * A list tokens that start and end comments.
Chris@0 249 *
Chris@0 250 * @var array
Chris@0 251 */
Chris@0 252 protected $commentTokens = array(
Chris@0 253 '//' => null,
Chris@0 254 '/*' => '*/',
Chris@0 255 '/**' => '*/',
Chris@0 256 );
Chris@0 257
Chris@0 258
Chris@0 259 /**
Chris@0 260 * Creates an array of tokens when given some JS code.
Chris@0 261 *
Chris@0 262 * @param string $string The string to tokenize.
Chris@0 263 * @param string $eolChar The EOL character to use for splitting strings.
Chris@0 264 *
Chris@0 265 * @return array
Chris@0 266 */
Chris@0 267 public function tokenizeString($string, $eolChar='\n')
Chris@0 268 {
Chris@0 269 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 270 echo "\t*** START JS TOKENIZING ***".PHP_EOL;
Chris@0 271 }
Chris@0 272
Chris@0 273 $maxTokenLength = 0;
Chris@0 274 foreach ($this->tokenValues as $token => $values) {
Chris@0 275 if (strlen($token) > $maxTokenLength) {
Chris@0 276 $maxTokenLength = strlen($token);
Chris@0 277 }
Chris@0 278 }
Chris@0 279
Chris@0 280 $tokens = array();
Chris@0 281 $inString = '';
Chris@0 282 $stringChar = null;
Chris@0 283 $inComment = '';
Chris@0 284 $buffer = '';
Chris@0 285 $preStringBuffer = '';
Chris@0 286 $cleanBuffer = false;
Chris@0 287
Chris@0 288 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
Chris@0 289
Chris@0 290 $tokens[] = array(
Chris@0 291 'code' => T_OPEN_TAG,
Chris@0 292 'type' => 'T_OPEN_TAG',
Chris@0 293 'content' => '',
Chris@0 294 );
Chris@0 295
Chris@0 296 // Convert newlines to single characters for ease of
Chris@0 297 // processing. We will change them back later.
Chris@0 298 $string = str_replace($eolChar, "\n", $string);
Chris@0 299
Chris@0 300 $chars = str_split($string);
Chris@0 301 $numChars = count($chars);
Chris@0 302 for ($i = 0; $i < $numChars; $i++) {
Chris@0 303 $char = $chars[$i];
Chris@0 304
Chris@0 305 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 306 $content = PHP_CodeSniffer::prepareForOutput($char);
Chris@0 307 $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 308
Chris@0 309 if ($inString !== '') {
Chris@0 310 echo "\t";
Chris@0 311 }
Chris@0 312
Chris@0 313 if ($inComment !== '') {
Chris@0 314 echo "\t";
Chris@0 315 }
Chris@0 316
Chris@0 317 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@0 318 }//end if
Chris@0 319
Chris@0 320 if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@0 321 // If the buffer only has whitespace and we are about to
Chris@0 322 // add a character, store the whitespace first.
Chris@0 323 if (trim($char) !== '' && trim($buffer) === '') {
Chris@0 324 $tokens[] = array(
Chris@0 325 'code' => T_WHITESPACE,
Chris@0 326 'type' => 'T_WHITESPACE',
Chris@0 327 'content' => str_replace("\n", $eolChar, $buffer),
Chris@0 328 );
Chris@0 329
Chris@0 330 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 331 $content = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 332 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@0 333 }
Chris@0 334
Chris@0 335 $buffer = '';
Chris@0 336 }
Chris@0 337
Chris@0 338 // If the buffer is not whitespace and we are about to
Chris@0 339 // add a whitespace character, store the content first.
Chris@0 340 if ($inString === ''
Chris@0 341 && $inComment === ''
Chris@0 342 && trim($char) === ''
Chris@0 343 && trim($buffer) !== ''
Chris@0 344 ) {
Chris@0 345 $tokens[] = array(
Chris@0 346 'code' => T_STRING,
Chris@0 347 'type' => 'T_STRING',
Chris@0 348 'content' => str_replace("\n", $eolChar, $buffer),
Chris@0 349 );
Chris@0 350
Chris@0 351 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 352 $content = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 353 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@0 354 }
Chris@0 355
Chris@0 356 $buffer = '';
Chris@0 357 }
Chris@0 358 }//end if
Chris@0 359
Chris@0 360 // Process strings.
Chris@0 361 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@0 362 if ($inString === $char) {
Chris@0 363 // This could be the end of the string, but make sure it
Chris@0 364 // is not escaped first.
Chris@0 365 $escapes = 0;
Chris@0 366 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@0 367 if ($chars[$x] !== '\\') {
Chris@0 368 break;
Chris@0 369 }
Chris@0 370
Chris@0 371 $escapes++;
Chris@0 372 }
Chris@0 373
Chris@0 374 if ($escapes === 0 || ($escapes % 2) === 0) {
Chris@0 375 // There is an even number escape chars,
Chris@0 376 // so this is not escaped, it is the end of the string.
Chris@0 377 $tokens[] = array(
Chris@0 378 'code' => T_CONSTANT_ENCAPSED_STRING,
Chris@0 379 'type' => 'T_CONSTANT_ENCAPSED_STRING',
Chris@0 380 'content' => str_replace("\n", $eolChar, $buffer).$char,
Chris@0 381 );
Chris@0 382
Chris@0 383 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 384 echo "\t\t* found end of string *".PHP_EOL;
Chris@0 385 $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
Chris@0 386 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@0 387 }
Chris@0 388
Chris@0 389 $buffer = '';
Chris@0 390 $preStringBuffer = '';
Chris@0 391 $inString = '';
Chris@0 392 $stringChar = null;
Chris@0 393 continue;
Chris@0 394 }//end if
Chris@0 395 } else if ($inString === '') {
Chris@0 396 $inString = $char;
Chris@0 397 $stringChar = $i;
Chris@0 398 $preStringBuffer = $buffer;
Chris@0 399
Chris@0 400 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 401 echo "\t\t* looking for string closer *".PHP_EOL;
Chris@0 402 }
Chris@0 403 }//end if
Chris@0 404 }//end if
Chris@0 405
Chris@0 406 if ($inString !== '' && $char === "\n") {
Chris@0 407 // Unless this newline character is escaped, the string did not
Chris@0 408 // end before the end of the line, which means it probably
Chris@0 409 // wasn't a string at all (maybe a regex).
Chris@0 410 if ($chars[($i - 1)] !== '\\') {
Chris@0 411 $i = $stringChar;
Chris@0 412 $buffer = $preStringBuffer;
Chris@0 413 $preStringBuffer = '';
Chris@0 414 $inString = '';
Chris@0 415 $stringChar = null;
Chris@0 416 $char = $chars[$i];
Chris@0 417
Chris@0 418 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 419 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@0 420 }
Chris@0 421 }
Chris@0 422 }
Chris@0 423
Chris@0 424 $buffer .= $char;
Chris@0 425
Chris@0 426 // We don't look for special tokens inside strings,
Chris@0 427 // so if we are in a string, we can continue here now
Chris@0 428 // that the current char is in the buffer.
Chris@0 429 if ($inString !== '') {
Chris@0 430 continue;
Chris@0 431 }
Chris@0 432
Chris@0 433 // Special case for T_DIVIDE which can actually be
Chris@0 434 // the start of a regular expression.
Chris@0 435 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@0 436 $regex = $this->getRegexToken(
Chris@0 437 $i,
Chris@0 438 $string,
Chris@0 439 $chars,
Chris@0 440 $tokens,
Chris@0 441 $eolChar
Chris@0 442 );
Chris@0 443
Chris@0 444 if ($regex !== null) {
Chris@0 445 $tokens[] = array(
Chris@0 446 'code' => T_REGULAR_EXPRESSION,
Chris@0 447 'type' => 'T_REGULAR_EXPRESSION',
Chris@0 448 'content' => $regex['content'],
Chris@0 449 );
Chris@0 450
Chris@0 451 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 452 $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
Chris@0 453 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@0 454 }
Chris@0 455
Chris@0 456 $i = $regex['end'];
Chris@0 457 $buffer = '';
Chris@0 458 $cleanBuffer = false;
Chris@0 459 continue;
Chris@0 460 }//end if
Chris@0 461 }//end if
Chris@0 462
Chris@0 463 // Check for known tokens, but ignore tokens found that are not at
Chris@0 464 // the end of a string, like FOR and this.FORmat.
Chris@0 465 if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@0 466 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
Chris@0 467 || isset($chars[($i + 1)]) === false
Chris@0 468 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
Chris@0 469 ) {
Chris@0 470 $matchedToken = false;
Chris@0 471 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@0 472
Chris@0 473 if ($lookAheadLength > 0) {
Chris@0 474 // The buffer contains a token type, but we need
Chris@0 475 // to look ahead at the next chars to see if this is
Chris@0 476 // actually part of a larger token. For example,
Chris@0 477 // FOR and FOREACH.
Chris@0 478 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 479 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@0 480 }
Chris@0 481
Chris@0 482 $charBuffer = $buffer;
Chris@0 483 for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@0 484 if (isset($chars[($i + $x)]) === false) {
Chris@0 485 break;
Chris@0 486 }
Chris@0 487
Chris@0 488 $charBuffer .= $chars[($i + $x)];
Chris@0 489
Chris@0 490 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 491 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
Chris@0 492 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@0 493 }
Chris@0 494
Chris@0 495 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@0 496 // We've found something larger that matches
Chris@0 497 // so we can ignore this char. Except for 1 very specific
Chris@0 498 // case where a comment like /**/ needs to tokenize as
Chris@0 499 // T_COMMENT and not T_DOC_COMMENT.
Chris@0 500 $oldType = $this->tokenValues[strtolower($buffer)];
Chris@0 501 $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@0 502 if ($oldType === 'T_COMMENT'
Chris@0 503 && $newType === 'T_DOC_COMMENT'
Chris@0 504 && $chars[($i + $x + 1)] === '/'
Chris@0 505 ) {
Chris@0 506 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 507 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@0 508 }
Chris@0 509 } else {
Chris@0 510 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 511 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@0 512 }
Chris@0 513
Chris@0 514 $matchedToken = true;
Chris@0 515 break;
Chris@0 516 }
Chris@0 517 }//end if
Chris@0 518 }//end for
Chris@0 519 }//end if
Chris@0 520
Chris@0 521 if ($matchedToken === false) {
Chris@0 522 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@0 523 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@0 524 }
Chris@0 525
Chris@0 526 $value = $this->tokenValues[strtolower($buffer)];
Chris@0 527
Chris@0 528 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@0 529 // The function keyword needs to be all lowercase or else
Chris@0 530 // it is just a function called "Function".
Chris@0 531 $value = 'T_STRING';
Chris@0 532 }
Chris@0 533
Chris@0 534 $tokens[] = array(
Chris@0 535 'code' => constant($value),
Chris@0 536 'type' => $value,
Chris@0 537 'content' => $buffer,
Chris@0 538 );
Chris@0 539
Chris@0 540 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 541 $content = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 542 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@0 543 }
Chris@0 544
Chris@0 545 $cleanBuffer = true;
Chris@0 546 }//end if
Chris@0 547 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@0 548 // No matter what token we end up using, we don't
Chris@0 549 // need the content in the buffer any more because we have
Chris@0 550 // found a valid token.
Chris@0 551 $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
Chris@0 552 if ($newContent !== '') {
Chris@0 553 $tokens[] = array(
Chris@0 554 'code' => T_STRING,
Chris@0 555 'type' => 'T_STRING',
Chris@0 556 'content' => $newContent,
Chris@0 557 );
Chris@0 558
Chris@0 559 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 560 $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
Chris@0 561 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@0 562 }
Chris@0 563 }
Chris@0 564
Chris@0 565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 566 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@0 567 }
Chris@0 568
Chris@0 569 // The char is a token type, but we need to look ahead at the
Chris@0 570 // next chars to see if this is actually part of a larger token.
Chris@0 571 // For example, = and ===.
Chris@0 572 $charBuffer = $char;
Chris@0 573 $matchedToken = false;
Chris@0 574 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@0 575 if (isset($chars[($i + $x)]) === false) {
Chris@0 576 break;
Chris@0 577 }
Chris@0 578
Chris@0 579 $charBuffer .= $chars[($i + $x)];
Chris@0 580
Chris@0 581 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 582 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
Chris@0 583 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@0 584 }
Chris@0 585
Chris@0 586 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@0 587 // We've found something larger that matches
Chris@0 588 // so we can ignore this char.
Chris@0 589 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 590 $type = $this->tokenValues[strtolower($charBuffer)];
Chris@0 591 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@0 592 }
Chris@0 593
Chris@0 594 $matchedToken = true;
Chris@0 595 break;
Chris@0 596 }
Chris@0 597 }//end for
Chris@0 598
Chris@0 599 if ($matchedToken === false) {
Chris@0 600 $value = $this->tokenValues[strtolower($char)];
Chris@0 601 $tokens[] = array(
Chris@0 602 'code' => constant($value),
Chris@0 603 'type' => $value,
Chris@0 604 'content' => $char,
Chris@0 605 );
Chris@0 606
Chris@0 607 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 608 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@0 609 $content = PHP_CodeSniffer::prepareForOutput($char);
Chris@0 610 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@0 611 }
Chris@0 612
Chris@0 613 $cleanBuffer = true;
Chris@0 614 } else {
Chris@0 615 $buffer = $char;
Chris@0 616 }//end if
Chris@0 617 }//end if
Chris@0 618
Chris@0 619 // Keep track of content inside comments.
Chris@0 620 if ($inComment === ''
Chris@0 621 && array_key_exists($buffer, $this->commentTokens) === true
Chris@0 622 ) {
Chris@0 623 // This is not really a comment if the content
Chris@0 624 // looks like \// (i.e., it is escaped).
Chris@0 625 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@0 626 $lastToken = array_pop($tokens);
Chris@0 627 $lastContent = $lastToken['content'];
Chris@0 628 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 629 $value = $this->tokenValues[strtolower($lastContent)];
Chris@0 630 $content = PHP_CodeSniffer::prepareForOutput($lastContent);
Chris@0 631 echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@0 632 }
Chris@0 633
Chris@0 634 $lastChars = str_split($lastContent);
Chris@0 635 $lastNumChars = count($lastChars);
Chris@0 636 for ($x = 0; $x < $lastNumChars; $x++) {
Chris@0 637 $lastChar = $lastChars[$x];
Chris@0 638 $value = $this->tokenValues[strtolower($lastChar)];
Chris@0 639 $tokens[] = array(
Chris@0 640 'code' => constant($value),
Chris@0 641 'type' => $value,
Chris@0 642 'content' => $lastChar,
Chris@0 643 );
Chris@0 644
Chris@0 645 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 646 $content = PHP_CodeSniffer::prepareForOutput($lastChar);
Chris@0 647 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@0 648 }
Chris@0 649 }
Chris@0 650 } else {
Chris@0 651 // We have started a comment.
Chris@0 652 $inComment = $buffer;
Chris@0 653
Chris@0 654 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 655 echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@0 656 }
Chris@0 657 }//end if
Chris@0 658 } else if ($inComment !== '') {
Chris@0 659 if ($this->commentTokens[$inComment] === null) {
Chris@0 660 // Comment ends at the next newline.
Chris@0 661 if (strpos($buffer, "\n") !== false) {
Chris@0 662 $inComment = '';
Chris@0 663 }
Chris@0 664 } else {
Chris@0 665 if ($this->commentTokens[$inComment] === $buffer) {
Chris@0 666 $inComment = '';
Chris@0 667 }
Chris@0 668 }
Chris@0 669
Chris@0 670 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 671 if ($inComment === '') {
Chris@0 672 echo "\t\t* found end of comment *".PHP_EOL;
Chris@0 673 }
Chris@0 674 }
Chris@0 675
Chris@0 676 if ($inComment === '' && $cleanBuffer === false) {
Chris@0 677 $tokens[] = array(
Chris@0 678 'code' => T_STRING,
Chris@0 679 'type' => 'T_STRING',
Chris@0 680 'content' => str_replace("\n", $eolChar, $buffer),
Chris@0 681 );
Chris@0 682
Chris@0 683 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 684 $content = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 685 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@0 686 }
Chris@0 687
Chris@0 688 $buffer = '';
Chris@0 689 }
Chris@0 690 }//end if
Chris@0 691
Chris@0 692 if ($cleanBuffer === true) {
Chris@0 693 $buffer = '';
Chris@0 694 $cleanBuffer = false;
Chris@0 695 }
Chris@0 696 }//end for
Chris@0 697
Chris@0 698 if (empty($buffer) === false) {
Chris@0 699 // Buffer contains whitespace from the end of the file.
Chris@0 700 $tokens[] = array(
Chris@0 701 'code' => T_WHITESPACE,
Chris@0 702 'type' => 'T_WHITESPACE',
Chris@0 703 'content' => str_replace("\n", $eolChar, $buffer),
Chris@0 704 );
Chris@0 705
Chris@0 706 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 707 $content = PHP_CodeSniffer::prepareForOutput($buffer);
Chris@0 708 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@0 709 }
Chris@0 710 }
Chris@0 711
Chris@0 712 $tokens[] = array(
Chris@0 713 'code' => T_CLOSE_TAG,
Chris@0 714 'type' => 'T_CLOSE_TAG',
Chris@0 715 'content' => '',
Chris@0 716 );
Chris@0 717
Chris@0 718 /*
Chris@0 719 Now that we have done some basic tokenizing, we need to
Chris@0 720 modify the tokens to join some together and split some apart
Chris@0 721 so they match what the PHP tokenizer does.
Chris@0 722 */
Chris@0 723
Chris@0 724 $finalTokens = array();
Chris@0 725 $newStackPtr = 0;
Chris@0 726 $numTokens = count($tokens);
Chris@0 727 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@0 728 $token = $tokens[$stackPtr];
Chris@0 729
Chris@0 730 /*
Chris@0 731 Look for comments and join the tokens together.
Chris@0 732 */
Chris@0 733
Chris@0 734 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
Chris@0 735 $newContent = '';
Chris@0 736 $tokenContent = $token['content'];
Chris@0 737
Chris@0 738 $endContent = null;
Chris@0 739 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@0 740 $endContent = $this->commentTokens[$tokenContent];
Chris@0 741 }
Chris@0 742
Chris@0 743 while ($tokenContent !== $endContent) {
Chris@0 744 if ($endContent === null
Chris@0 745 && strpos($tokenContent, $eolChar) !== false
Chris@0 746 ) {
Chris@0 747 // A null end token means the comment ends at the end of
Chris@0 748 // the line so we look for newlines and split the token.
Chris@0 749 $tokens[$stackPtr]['content'] = substr(
Chris@0 750 $tokenContent,
Chris@0 751 (strpos($tokenContent, $eolChar) + strlen($eolChar))
Chris@0 752 );
Chris@0 753
Chris@0 754 $tokenContent = substr(
Chris@0 755 $tokenContent,
Chris@0 756 0,
Chris@0 757 (strpos($tokenContent, $eolChar) + strlen($eolChar))
Chris@0 758 );
Chris@0 759
Chris@0 760 // If the substr failed, skip the token as the content
Chris@0 761 // will now be blank.
Chris@0 762 if ($tokens[$stackPtr]['content'] !== false
Chris@0 763 && $tokens[$stackPtr]['content'] !== ''
Chris@0 764 ) {
Chris@0 765 $stackPtr--;
Chris@0 766 }
Chris@0 767
Chris@0 768 break;
Chris@0 769 }//end if
Chris@0 770
Chris@0 771 $stackPtr++;
Chris@0 772 $newContent .= $tokenContent;
Chris@0 773 if (isset($tokens[$stackPtr]) === false) {
Chris@0 774 break;
Chris@0 775 }
Chris@0 776
Chris@0 777 $tokenContent = $tokens[$stackPtr]['content'];
Chris@0 778 }//end while
Chris@0 779
Chris@0 780 if ($token['code'] === T_DOC_COMMENT) {
Chris@0 781 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
Chris@0 782 foreach ($commentTokens as $commentToken) {
Chris@0 783 $finalTokens[$newStackPtr] = $commentToken;
Chris@0 784 $newStackPtr++;
Chris@0 785 }
Chris@0 786
Chris@0 787 continue;
Chris@0 788 } else {
Chris@0 789 // Save the new content in the current token so
Chris@0 790 // the code below can chop it up on newlines.
Chris@0 791 $token['content'] = $newContent.$tokenContent;
Chris@0 792 }
Chris@0 793 }//end if
Chris@0 794
Chris@0 795 /*
Chris@0 796 If this token has newlines in its content, split each line up
Chris@0 797 and create a new token for each line. We do this so it's easier
Chris@0 798 to ascertain where errors occur on a line.
Chris@0 799 Note that $token[1] is the token's content.
Chris@0 800 */
Chris@0 801
Chris@0 802 if (strpos($token['content'], $eolChar) !== false) {
Chris@0 803 $tokenLines = explode($eolChar, $token['content']);
Chris@0 804 $numLines = count($tokenLines);
Chris@0 805
Chris@0 806 for ($i = 0; $i < $numLines; $i++) {
Chris@0 807 $newToken['content'] = $tokenLines[$i];
Chris@0 808 if ($i === ($numLines - 1)) {
Chris@0 809 if ($tokenLines[$i] === '') {
Chris@0 810 break;
Chris@0 811 }
Chris@0 812 } else {
Chris@0 813 $newToken['content'] .= $eolChar;
Chris@0 814 }
Chris@0 815
Chris@0 816 $newToken['type'] = $token['type'];
Chris@0 817 $newToken['code'] = $token['code'];
Chris@0 818 $finalTokens[$newStackPtr] = $newToken;
Chris@0 819 $newStackPtr++;
Chris@0 820 }
Chris@0 821 } else {
Chris@0 822 $finalTokens[$newStackPtr] = $token;
Chris@0 823 $newStackPtr++;
Chris@0 824 }//end if
Chris@0 825
Chris@0 826 // Convert numbers, including decimals.
Chris@0 827 if ($token['code'] === T_STRING
Chris@0 828 || $token['code'] === T_OBJECT_OPERATOR
Chris@0 829 ) {
Chris@0 830 $newContent = '';
Chris@0 831 $oldStackPtr = $stackPtr;
Chris@0 832 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
Chris@0 833 $newContent .= $tokens[$stackPtr]['content'];
Chris@0 834 $stackPtr++;
Chris@0 835 }
Chris@0 836
Chris@0 837 if ($newContent !== '' && $newContent !== '.') {
Chris@0 838 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@0 839 if (ctype_digit($newContent) === true) {
Chris@0 840 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@0 841 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@0 842 } else {
Chris@0 843 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@0 844 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@0 845 }
Chris@0 846
Chris@0 847 $stackPtr--;
Chris@0 848 continue;
Chris@0 849 } else {
Chris@0 850 $stackPtr = $oldStackPtr;
Chris@0 851 }
Chris@0 852 }//end if
Chris@0 853
Chris@0 854 // Convert the token after an object operator into a string, in most cases.
Chris@0 855 if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@0 856 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@0 857 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@0 858 continue;
Chris@0 859 }
Chris@0 860
Chris@0 861 if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@0 862 && $tokens[$i]['code'] !== T_LNUMBER
Chris@0 863 && $tokens[$i]['code'] !== T_DNUMBER
Chris@0 864 ) {
Chris@0 865 $tokens[$i]['code'] = T_STRING;
Chris@0 866 $tokens[$i]['type'] = 'T_STRING';
Chris@0 867 }
Chris@0 868
Chris@0 869 break;
Chris@0 870 }
Chris@0 871 }
Chris@0 872 }//end for
Chris@0 873
Chris@0 874 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 875 echo "\t*** END TOKENIZING ***".PHP_EOL;
Chris@0 876 }
Chris@0 877
Chris@0 878 return $finalTokens;
Chris@0 879
Chris@0 880 }//end tokenizeString()
Chris@0 881
Chris@0 882
Chris@0 883 /**
Chris@0 884 * Tokenizes a regular expression if one is found.
Chris@0 885 *
Chris@0 886 * If a regular expression is not found, NULL is returned.
Chris@0 887 *
Chris@0 888 * @param string $char The index of the possible regex start character.
Chris@0 889 * @param string $string The complete content of the string being tokenized.
Chris@0 890 * @param string $chars An array of characters being tokenized.
Chris@0 891 * @param string $tokens The current array of tokens found in the string.
Chris@0 892 * @param string $eolChar The EOL character to use for splitting strings.
Chris@0 893 *
Chris@0 894 * @return void
Chris@0 895 */
Chris@0 896 public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
Chris@0 897 {
Chris@0 898 $beforeTokens = array(
Chris@0 899 T_EQUAL => true,
Chris@0 900 T_IS_NOT_EQUAL => true,
Chris@0 901 T_IS_IDENTICAL => true,
Chris@0 902 T_IS_NOT_IDENTICAL => true,
Chris@0 903 T_OPEN_PARENTHESIS => true,
Chris@0 904 T_OPEN_SQUARE_BRACKET => true,
Chris@0 905 T_RETURN => true,
Chris@0 906 T_BOOLEAN_OR => true,
Chris@0 907 T_BOOLEAN_AND => true,
Chris@0 908 T_BITWISE_OR => true,
Chris@0 909 T_BITWISE_AND => true,
Chris@0 910 T_COMMA => true,
Chris@0 911 T_COLON => true,
Chris@0 912 T_TYPEOF => true,
Chris@0 913 T_INLINE_THEN => true,
Chris@0 914 T_INLINE_ELSE => true,
Chris@0 915 );
Chris@0 916
Chris@0 917 $afterTokens = array(
Chris@0 918 ',' => true,
Chris@0 919 ')' => true,
Chris@0 920 ']' => true,
Chris@0 921 ';' => true,
Chris@0 922 ' ' => true,
Chris@0 923 '.' => true,
Chris@0 924 ':' => true,
Chris@0 925 $eolChar => true,
Chris@0 926 );
Chris@0 927
Chris@0 928 // Find the last non-whitespace token that was added
Chris@0 929 // to the tokens array.
Chris@0 930 $numTokens = count($tokens);
Chris@0 931 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@0 932 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@0 933 break;
Chris@0 934 }
Chris@0 935 }
Chris@0 936
Chris@0 937 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@0 938 return null;
Chris@0 939 }
Chris@0 940
Chris@0 941 // This is probably a regular expression, so look for the end of it.
Chris@0 942 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 943 echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@0 944 }
Chris@0 945
Chris@0 946 $numChars = count($chars);
Chris@0 947 for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@0 948 if ($chars[$next] === '/') {
Chris@0 949 // Just make sure this is not escaped first.
Chris@0 950 if ($chars[($next - 1)] !== '\\') {
Chris@0 951 // In the simple form: /.../ so we found the end.
Chris@0 952 break;
Chris@0 953 } else if ($chars[($next - 2)] === '\\') {
Chris@0 954 // In the form: /...\\/ so we found the end.
Chris@0 955 break;
Chris@0 956 }
Chris@0 957 } else {
Chris@0 958 $possibleEolChar = substr($string, $next, strlen($eolChar));
Chris@0 959 if ($possibleEolChar === $eolChar) {
Chris@0 960 // This is the last token on the line and regular
Chris@0 961 // expressions need to be defined on a single line,
Chris@0 962 // so this is not a regular expression.
Chris@0 963 break;
Chris@0 964 }
Chris@0 965 }
Chris@0 966 }
Chris@0 967
Chris@0 968 if ($chars[$next] !== '/') {
Chris@0 969 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 970 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@0 971 }
Chris@0 972
Chris@0 973 return null;
Chris@0 974 }
Chris@0 975
Chris@0 976 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
Chris@0 977 // The token directly after the end of the regex can
Chris@0 978 // be modifiers like global and case insensitive
Chris@0 979 // (.e.g, /pattern/gi).
Chris@0 980 $next++;
Chris@0 981 }
Chris@0 982
Chris@0 983 $regexEnd = $next;
Chris@0 984 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 985 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@0 986 }
Chris@0 987
Chris@0 988 for ($next = ($next + 1); $next < $numChars; $next++) {
Chris@0 989 if ($chars[$next] !== ' ') {
Chris@0 990 break;
Chris@0 991 } else {
Chris@0 992 $possibleEolChar = substr($string, $next, strlen($eolChar));
Chris@0 993 if ($possibleEolChar === $eolChar) {
Chris@0 994 // This is the last token on the line.
Chris@0 995 break;
Chris@0 996 }
Chris@0 997 }
Chris@0 998 }
Chris@0 999
Chris@0 1000 if (isset($afterTokens[$chars[$next]]) === false) {
Chris@0 1001 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1002 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@0 1003 }
Chris@0 1004
Chris@0 1005 return null;
Chris@0 1006 }
Chris@0 1007
Chris@0 1008 // This is a regular expression, so join all the tokens together.
Chris@0 1009 $content = '';
Chris@0 1010 for ($x = $char; $x <= $regexEnd; $x++) {
Chris@0 1011 $content .= $chars[$x];
Chris@0 1012 }
Chris@0 1013
Chris@0 1014 $token = array(
Chris@0 1015 'start' => $char,
Chris@0 1016 'end' => $regexEnd,
Chris@0 1017 'content' => $content,
Chris@0 1018 );
Chris@0 1019
Chris@0 1020 return $token;
Chris@0 1021
Chris@0 1022 }//end getRegexToken()
Chris@0 1023
Chris@0 1024
Chris@0 1025 /**
Chris@0 1026 * Performs additional processing after main tokenizing.
Chris@0 1027 *
Chris@0 1028 * This additional processing looks for properties, closures, labels and objects.
Chris@0 1029 *
Chris@0 1030 * @param array $tokens The array of tokens to process.
Chris@0 1031 * @param string $eolChar The EOL character to use for splitting strings.
Chris@0 1032 *
Chris@0 1033 * @return void
Chris@0 1034 */
Chris@0 1035 public function processAdditional(&$tokens, $eolChar)
Chris@0 1036 {
Chris@0 1037 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1038 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@0 1039 }
Chris@0 1040
Chris@0 1041 $numTokens = count($tokens);
Chris@0 1042 $classStack = array();
Chris@0 1043
Chris@0 1044 for ($i = 0; $i < $numTokens; $i++) {
Chris@0 1045 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1046 $type = $tokens[$i]['type'];
Chris@0 1047 $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
Chris@0 1048
Chris@0 1049 echo str_repeat("\t", count($classStack));
Chris@0 1050 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@0 1051 }
Chris@0 1052
Chris@0 1053 // Looking for functions that are actually closures.
Chris@0 1054 if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
Chris@0 1055 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@0 1056 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
Chris@0 1057 break;
Chris@0 1058 }
Chris@0 1059 }
Chris@0 1060
Chris@0 1061 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@0 1062 $tokens[$i]['code'] = T_CLOSURE;
Chris@0 1063 $tokens[$i]['type'] = 'T_CLOSURE';
Chris@0 1064 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1065 $line = $tokens[$i]['line'];
Chris@0 1066 echo str_repeat("\t", count($classStack));
Chris@0 1067 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
Chris@0 1068 }
Chris@0 1069
Chris@0 1070 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
Chris@0 1071 if (isset($tokens[$x]['conditions'][$i]) === false) {
Chris@0 1072 continue;
Chris@0 1073 }
Chris@0 1074
Chris@0 1075 $tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@0 1076 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1077 $type = $tokens[$x]['type'];
Chris@0 1078 echo str_repeat("\t", count($classStack));
Chris@0 1079 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@0 1080 }
Chris@0 1081 }
Chris@0 1082 }//end if
Chris@0 1083
Chris@0 1084 continue;
Chris@0 1085 } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@0 1086 && isset($tokens[$i]['scope_condition']) === false
Chris@0 1087 && isset($tokens[$i]['bracket_closer']) === true
Chris@0 1088 ) {
Chris@0 1089 $classStack[] = $i;
Chris@0 1090
Chris@0 1091 $closer = $tokens[$i]['bracket_closer'];
Chris@0 1092 $tokens[$i]['code'] = T_OBJECT;
Chris@0 1093 $tokens[$i]['type'] = 'T_OBJECT';
Chris@0 1094 $tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@0 1095 $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@0 1096
Chris@0 1097 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1098 echo str_repeat("\t", count($classStack));
Chris@0 1099 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@0 1100 echo str_repeat("\t", count($classStack));
Chris@0 1101 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@0 1102 }
Chris@0 1103
Chris@0 1104 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@0 1105 $tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@0 1106 ksort($tokens[$x]['conditions'], SORT_NUMERIC);
Chris@0 1107 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1108 $type = $tokens[$x]['type'];
Chris@0 1109 echo str_repeat("\t", count($classStack));
Chris@0 1110 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@0 1111 }
Chris@0 1112 }
Chris@0 1113 } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@0 1114 $opener = array_pop($classStack);
Chris@0 1115 } else if ($tokens[$i]['code'] === T_COLON) {
Chris@0 1116 // If it is a scope opener, it belongs to a
Chris@0 1117 // DEFAULT or CASE statement.
Chris@0 1118 if (isset($tokens[$i]['scope_condition']) === true) {
Chris@0 1119 continue;
Chris@0 1120 }
Chris@0 1121
Chris@0 1122 // Make sure this is not part of an inline IF statement.
Chris@0 1123 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@0 1124 if ($tokens[$x]['code'] === T_INLINE_THEN) {
Chris@0 1125 $tokens[$i]['code'] = T_INLINE_ELSE;
Chris@0 1126 $tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@0 1127
Chris@0 1128 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1129 echo str_repeat("\t", count($classStack));
Chris@0 1130 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@0 1131 }
Chris@0 1132
Chris@0 1133 continue(2);
Chris@0 1134 } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
Chris@0 1135 break;
Chris@0 1136 }
Chris@0 1137 }
Chris@0 1138
Chris@0 1139 // The string to the left of the colon is either a property or label.
Chris@0 1140 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@0 1141 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
Chris@0 1142 break;
Chris@0 1143 }
Chris@0 1144 }
Chris@0 1145
Chris@0 1146 if ($tokens[$label]['code'] !== T_STRING
Chris@0 1147 && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@0 1148 ) {
Chris@0 1149 continue;
Chris@0 1150 }
Chris@0 1151
Chris@0 1152 if (empty($classStack) === false) {
Chris@0 1153 $tokens[$label]['code'] = T_PROPERTY;
Chris@0 1154 $tokens[$label]['type'] = 'T_PROPERTY';
Chris@0 1155
Chris@0 1156 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1157 echo str_repeat("\t", count($classStack));
Chris@0 1158 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@0 1159 }
Chris@0 1160 } else {
Chris@0 1161 $tokens[$label]['code'] = T_LABEL;
Chris@0 1162 $tokens[$label]['type'] = 'T_LABEL';
Chris@0 1163
Chris@0 1164 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1165 echo str_repeat("\t", count($classStack));
Chris@0 1166 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@0 1167 }
Chris@0 1168 }//end if
Chris@0 1169 }//end if
Chris@0 1170 }//end for
Chris@0 1171
Chris@0 1172 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0 1173 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
Chris@0 1174 }
Chris@0 1175
Chris@0 1176 }//end processAdditional()
Chris@0 1177
Chris@0 1178
Chris@0 1179 }//end class