cmmr2012-drupal-site: vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php annotate

annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 5:12f9dff5fda9 tip

Update to Drupal core 8.7.1

author	Chris Cannam
date	Thu, 09 May 2019 15:34:47 +0100
parents	a9cd425dd02b
children

rev	line source
Chris@4	1 <?php
Chris@4	2 /**
Chris@4	3 * Tokenizes JS code.
Chris@4	4 *
Chris@4	5 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@4	6 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@4	7 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@4	8 */
Chris@4	9
Chris@4	10 namespace PHP_CodeSniffer\Tokenizers;
Chris@4	11
Chris@4	12 use PHP_CodeSniffer\Util;
Chris@4	13 use PHP_CodeSniffer\Exceptions\TokenizerException;
Chris@4	14 use PHP_CodeSniffer\Config;
Chris@4	15
Chris@4	16 class JS extends Tokenizer
Chris@4	17 {
Chris@4	18
Chris@4	19
Chris@4	20 /**
Chris@4	21 * A list of tokens that are allowed to open a scope.
Chris@4	22 *
Chris@4	23 * This array also contains information about what kind of token the scope
Chris@4	24 * opener uses to open and close the scope, if the token strictly requires
Chris@4	25 * an opener, if the token can share a scope closer, and who it can be shared
Chris@4	26 * with. An example of a token that shares a scope closer is a CASE scope.
Chris@4	27 *
Chris@4	28 * @var array
Chris@4	29 */
Chris@4	30 public $scopeOpeners = [
Chris@4	31 T_IF => [
Chris@4	32 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	33 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	34 'strict' => false,
Chris@4	35 'shared' => false,
Chris@4	36 'with' => [],
Chris@4	37 ],
Chris@4	38 T_TRY => [
Chris@4	39 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	40 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	41 'strict' => true,
Chris@4	42 'shared' => false,
Chris@4	43 'with' => [],
Chris@4	44 ],
Chris@4	45 T_CATCH => [
Chris@4	46 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	47 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	48 'strict' => true,
Chris@4	49 'shared' => false,
Chris@4	50 'with' => [],
Chris@4	51 ],
Chris@4	52 T_ELSE => [
Chris@4	53 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	54 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	55 'strict' => false,
Chris@4	56 'shared' => false,
Chris@4	57 'with' => [],
Chris@4	58 ],
Chris@4	59 T_FOR => [
Chris@4	60 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	61 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	62 'strict' => false,
Chris@4	63 'shared' => false,
Chris@4	64 'with' => [],
Chris@4	65 ],
Chris@4	66 T_CLASS => [
Chris@4	67 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	68 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	69 'strict' => true,
Chris@4	70 'shared' => false,
Chris@4	71 'with' => [],
Chris@4	72 ],
Chris@4	73 T_FUNCTION => [
Chris@4	74 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	75 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	76 'strict' => false,
Chris@4	77 'shared' => false,
Chris@4	78 'with' => [],
Chris@4	79 ],
Chris@4	80 T_WHILE => [
Chris@4	81 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	82 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	83 'strict' => false,
Chris@4	84 'shared' => false,
Chris@4	85 'with' => [],
Chris@4	86 ],
Chris@4	87 T_DO => [
Chris@4	88 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	89 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	90 'strict' => true,
Chris@4	91 'shared' => false,
Chris@4	92 'with' => [],
Chris@4	93 ],
Chris@4	94 T_SWITCH => [
Chris@4	95 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@4	96 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@4	97 'strict' => true,
Chris@4	98 'shared' => false,
Chris@4	99 'with' => [],
Chris@4	100 ],
Chris@4	101 T_CASE => [
Chris@4	102 'start' => [T_COLON => T_COLON],
Chris@4	103 'end' => [
Chris@4	104 T_BREAK => T_BREAK,
Chris@4	105 T_RETURN => T_RETURN,
Chris@4	106 T_CONTINUE => T_CONTINUE,
Chris@4	107 T_THROW => T_THROW,
Chris@4	108 ],
Chris@4	109 'strict' => true,
Chris@4	110 'shared' => true,
Chris@4	111 'with' => [
Chris@4	112 T_DEFAULT => T_DEFAULT,
Chris@4	113 T_CASE => T_CASE,
Chris@4	114 T_SWITCH => T_SWITCH,
Chris@4	115 ],
Chris@4	116 ],
Chris@4	117 T_DEFAULT => [
Chris@4	118 'start' => [T_COLON => T_COLON],
Chris@4	119 'end' => [
Chris@4	120 T_BREAK => T_BREAK,
Chris@4	121 T_RETURN => T_RETURN,
Chris@4	122 T_CONTINUE => T_CONTINUE,
Chris@4	123 T_THROW => T_THROW,
Chris@4	124 ],
Chris@4	125 'strict' => true,
Chris@4	126 'shared' => true,
Chris@4	127 'with' => [
Chris@4	128 T_CASE => T_CASE,
Chris@4	129 T_SWITCH => T_SWITCH,
Chris@4	130 ],
Chris@4	131 ],
Chris@4	132 ];
Chris@4	133
Chris@4	134 /**
Chris@4	135 * A list of tokens that end the scope.
Chris@4	136 *
Chris@4	137 * This array is just a unique collection of the end tokens
Chris@4	138 * from the _scopeOpeners array. The data is duplicated here to
Chris@4	139 * save time during parsing of the file.
Chris@4	140 *
Chris@4	141 * @var array
Chris@4	142 */
Chris@4	143 public $endScopeTokens = [
Chris@4	144 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@4	145 T_BREAK => T_BREAK,
Chris@4	146 ];
Chris@4	147
Chris@4	148 /**
Chris@4	149 * A list of special JS tokens and their types.
Chris@4	150 *
Chris@4	151 * @var array
Chris@4	152 */
Chris@4	153 protected $tokenValues = [
Chris@4	154 'class' => 'T_CLASS',
Chris@4	155 'function' => 'T_FUNCTION',
Chris@4	156 'prototype' => 'T_PROTOTYPE',
Chris@4	157 'try' => 'T_TRY',
Chris@4	158 'catch' => 'T_CATCH',
Chris@4	159 'return' => 'T_RETURN',
Chris@4	160 'throw' => 'T_THROW',
Chris@4	161 'break' => 'T_BREAK',
Chris@4	162 'switch' => 'T_SWITCH',
Chris@4	163 'continue' => 'T_CONTINUE',
Chris@4	164 'if' => 'T_IF',
Chris@4	165 'else' => 'T_ELSE',
Chris@4	166 'do' => 'T_DO',
Chris@4	167 'while' => 'T_WHILE',
Chris@4	168 'for' => 'T_FOR',
Chris@4	169 'var' => 'T_VAR',
Chris@4	170 'case' => 'T_CASE',
Chris@4	171 'default' => 'T_DEFAULT',
Chris@4	172 'true' => 'T_TRUE',
Chris@4	173 'false' => 'T_FALSE',
Chris@4	174 'null' => 'T_NULL',
Chris@4	175 'this' => 'T_THIS',
Chris@4	176 'typeof' => 'T_TYPEOF',
Chris@4	177 '(' => 'T_OPEN_PARENTHESIS',
Chris@4	178 ')' => 'T_CLOSE_PARENTHESIS',
Chris@4	179 '{' => 'T_OPEN_CURLY_BRACKET',
Chris@4	180 '}' => 'T_CLOSE_CURLY_BRACKET',
Chris@4	181 '[' => 'T_OPEN_SQUARE_BRACKET',
Chris@4	182 ']' => 'T_CLOSE_SQUARE_BRACKET',
Chris@4	183 '?' => 'T_INLINE_THEN',
Chris@4	184 '.' => 'T_OBJECT_OPERATOR',
Chris@4	185 '+' => 'T_PLUS',
Chris@4	186 '-' => 'T_MINUS',
Chris@4	187 '*' => 'T_MULTIPLY',
Chris@4	188 '%' => 'T_MODULUS',
Chris@4	189 '/' => 'T_DIVIDE',
Chris@4	190 '^' => 'T_LOGICAL_XOR',
Chris@4	191 ',' => 'T_COMMA',
Chris@4	192 ';' => 'T_SEMICOLON',
Chris@4	193 ':' => 'T_COLON',
Chris@4	194 '<' => 'T_LESS_THAN',
Chris@4	195 '>' => 'T_GREATER_THAN',
Chris@4	196 '<<' => 'T_SL',
Chris@4	197 '>>' => 'T_SR',
Chris@4	198 '>>>' => 'T_ZSR',
Chris@4	199 '<<=' => 'T_SL_EQUAL',
Chris@4	200 '>>=' => 'T_SR_EQUAL',
Chris@4	201 '>>>=' => 'T_ZSR_EQUAL',
Chris@4	202 '<=' => 'T_IS_SMALLER_OR_EQUAL',
Chris@4	203 '>=' => 'T_IS_GREATER_OR_EQUAL',
Chris@4	204 '=>' => 'T_DOUBLE_ARROW',
Chris@4	205 '!' => 'T_BOOLEAN_NOT',
Chris@4	206 '\|\|' => 'T_BOOLEAN_OR',
Chris@4	207 '&&' => 'T_BOOLEAN_AND',
Chris@4	208 '\|' => 'T_BITWISE_OR',
Chris@4	209 '&' => 'T_BITWISE_AND',
Chris@4	210 '!=' => 'T_IS_NOT_EQUAL',
Chris@4	211 '!==' => 'T_IS_NOT_IDENTICAL',
Chris@4	212 '=' => 'T_EQUAL',
Chris@4	213 '==' => 'T_IS_EQUAL',
Chris@4	214 '===' => 'T_IS_IDENTICAL',
Chris@4	215 '-=' => 'T_MINUS_EQUAL',
Chris@4	216 '+=' => 'T_PLUS_EQUAL',
Chris@4	217 '*=' => 'T_MUL_EQUAL',
Chris@4	218 '/=' => 'T_DIV_EQUAL',
Chris@4	219 '%=' => 'T_MOD_EQUAL',
Chris@4	220 '++' => 'T_INC',
Chris@4	221 '--' => 'T_DEC',
Chris@4	222 '//' => 'T_COMMENT',
Chris@4	223 '/*' => 'T_COMMENT',
Chris@4	224 '/**' => 'T_DOC_COMMENT',
Chris@4	225 '*/' => 'T_COMMENT',
Chris@4	226 ];
Chris@4	227
Chris@4	228 /**
Chris@4	229 * A list string delimiters.
Chris@4	230 *
Chris@4	231 * @var array
Chris@4	232 */
Chris@4	233 protected $stringTokens = [
Chris@4	234 '\'' => '\'',
Chris@4	235 '"' => '"',
Chris@4	236 ];
Chris@4	237
Chris@4	238 /**
Chris@4	239 * A list tokens that start and end comments.
Chris@4	240 *
Chris@4	241 * @var array
Chris@4	242 */
Chris@4	243 protected $commentTokens = [
Chris@4	244 '//' => null,
Chris@4	245 '/' => '/',
Chris@4	246 '/*' => '/',
Chris@4	247 ];
Chris@4	248
Chris@4	249
Chris@4	250 /**
Chris@4	251 * Initialise the tokenizer.
Chris@4	252 *
Chris@4	253 * Pre-checks the content to see if it looks minified.
Chris@4	254 *
Chris@4	255 * @param string $content The content to tokenize,
Chris@4	256 * @param \PHP_CodeSniffer\Config $config The config data for the run.
Chris@4	257 * @param string $eolChar The EOL char used in the content.
Chris@4	258 *
Chris@4	259 * @return void
Chris@5	260 * @throws \PHP_CodeSniffer\Exceptions\TokenizerException If the file appears to be minified.
Chris@4	261 */
Chris@4	262 public function __construct($content, Config $config, $eolChar='\n')
Chris@4	263 {
Chris@4	264 if ($this->isMinifiedContent($content, $eolChar) === true) {
Chris@4	265 throw new TokenizerException('File appears to be minified and cannot be processed');
Chris@4	266 }
Chris@4	267
Chris@5	268 parent::__construct($content, $config, $eolChar);
Chris@4	269
Chris@4	270 }//end __construct()
Chris@4	271
Chris@4	272
Chris@4	273 /**
Chris@4	274 * Creates an array of tokens when given some JS code.
Chris@4	275 *
Chris@4	276 * @param string $string The string to tokenize.
Chris@4	277 *
Chris@4	278 * @return array
Chris@4	279 */
Chris@4	280 public function tokenize($string)
Chris@4	281 {
Chris@4	282 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	283 echo "\t* START JS TOKENIZING *".PHP_EOL;
Chris@4	284 }
Chris@4	285
Chris@4	286 $maxTokenLength = 0;
Chris@4	287 foreach ($this->tokenValues as $token => $values) {
Chris@4	288 if (strlen($token) > $maxTokenLength) {
Chris@4	289 $maxTokenLength = strlen($token);
Chris@4	290 }
Chris@4	291 }
Chris@4	292
Chris@4	293 $tokens = [];
Chris@4	294 $inString = '';
Chris@4	295 $stringChar = null;
Chris@4	296 $inComment = '';
Chris@4	297 $buffer = '';
Chris@4	298 $preStringBuffer = '';
Chris@4	299 $cleanBuffer = false;
Chris@4	300
Chris@4	301 $commentTokenizer = new Comment();
Chris@4	302
Chris@4	303 $tokens[] = [
Chris@4	304 'code' => T_OPEN_TAG,
Chris@4	305 'type' => 'T_OPEN_TAG',
Chris@4	306 'content' => '',
Chris@4	307 ];
Chris@4	308
Chris@4	309 // Convert newlines to single characters for ease of
Chris@4	310 // processing. We will change them back later.
Chris@4	311 $string = str_replace($this->eolChar, "\n", $string);
Chris@4	312
Chris@4	313 $chars = str_split($string);
Chris@4	314 $numChars = count($chars);
Chris@4	315 for ($i = 0; $i < $numChars; $i++) {
Chris@4	316 $char = $chars[$i];
Chris@4	317
Chris@4	318 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	319 $content = Util\Common::prepareForOutput($char);
Chris@4	320 $bufferContent = Util\Common::prepareForOutput($buffer);
Chris@4	321
Chris@4	322 if ($inString !== '') {
Chris@4	323 echo "\t";
Chris@4	324 }
Chris@4	325
Chris@4	326 if ($inComment !== '') {
Chris@4	327 echo "\t";
Chris@4	328 }
Chris@4	329
Chris@4	330 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@4	331 }//end if
Chris@4	332
Chris@4	333 if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@4	334 // If the buffer only has whitespace and we are about to
Chris@4	335 // add a character, store the whitespace first.
Chris@4	336 if (trim($char) !== '' && trim($buffer) === '') {
Chris@4	337 $tokens[] = [
Chris@4	338 'code' => T_WHITESPACE,
Chris@4	339 'type' => 'T_WHITESPACE',
Chris@4	340 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4	341 ];
Chris@4	342
Chris@4	343 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	344 $content = Util\Common::prepareForOutput($buffer);
Chris@4	345 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@4	346 }
Chris@4	347
Chris@4	348 $buffer = '';
Chris@4	349 }
Chris@4	350
Chris@4	351 // If the buffer is not whitespace and we are about to
Chris@4	352 // add a whitespace character, store the content first.
Chris@4	353 if ($inString === ''
Chris@4	354 && $inComment === ''
Chris@4	355 && trim($char) === ''
Chris@4	356 && trim($buffer) !== ''
Chris@4	357 ) {
Chris@4	358 $tokens[] = [
Chris@4	359 'code' => T_STRING,
Chris@4	360 'type' => 'T_STRING',
Chris@4	361 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4	362 ];
Chris@4	363
Chris@4	364 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	365 $content = Util\Common::prepareForOutput($buffer);
Chris@4	366 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4	367 }
Chris@4	368
Chris@4	369 $buffer = '';
Chris@4	370 }
Chris@4	371 }//end if
Chris@4	372
Chris@4	373 // Process strings.
Chris@4	374 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@4	375 if ($inString === $char) {
Chris@4	376 // This could be the end of the string, but make sure it
Chris@4	377 // is not escaped first.
Chris@4	378 $escapes = 0;
Chris@4	379 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@4	380 if ($chars[$x] !== '\\') {
Chris@4	381 break;
Chris@4	382 }
Chris@4	383
Chris@4	384 $escapes++;
Chris@4	385 }
Chris@4	386
Chris@4	387 if ($escapes === 0 \|\| ($escapes % 2) === 0) {
Chris@4	388 // There is an even number escape chars,
Chris@4	389 // so this is not escaped, it is the end of the string.
Chris@4	390 $tokens[] = [
Chris@4	391 'code' => T_CONSTANT_ENCAPSED_STRING,
Chris@4	392 'type' => 'T_CONSTANT_ENCAPSED_STRING',
Chris@4	393 'content' => str_replace("\n", $this->eolChar, $buffer).$char,
Chris@4	394 ];
Chris@4	395
Chris@4	396 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	397 echo "\t\t* found end of string *".PHP_EOL;
Chris@4	398 $content = Util\Common::prepareForOutput($buffer.$char);
Chris@4	399 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@4	400 }
Chris@4	401
Chris@4	402 $buffer = '';
Chris@4	403 $preStringBuffer = '';
Chris@4	404 $inString = '';
Chris@4	405 $stringChar = null;
Chris@4	406 continue;
Chris@4	407 }//end if
Chris@4	408 } else if ($inString === '') {
Chris@4	409 $inString = $char;
Chris@4	410 $stringChar = $i;
Chris@4	411 $preStringBuffer = $buffer;
Chris@4	412
Chris@4	413 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	414 echo "\t\t* looking for string closer *".PHP_EOL;
Chris@4	415 }
Chris@4	416 }//end if
Chris@4	417 }//end if
Chris@4	418
Chris@4	419 if ($inString !== '' && $char === "\n") {
Chris@4	420 // Unless this newline character is escaped, the string did not
Chris@4	421 // end before the end of the line, which means it probably
Chris@4	422 // wasn't a string at all (maybe a regex).
Chris@4	423 if ($chars[($i - 1)] !== '\\') {
Chris@4	424 $i = $stringChar;
Chris@4	425 $buffer = $preStringBuffer;
Chris@4	426 $preStringBuffer = '';
Chris@4	427 $inString = '';
Chris@4	428 $stringChar = null;
Chris@4	429 $char = $chars[$i];
Chris@4	430
Chris@4	431 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	432 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@4	433 }
Chris@4	434 }
Chris@4	435 }
Chris@4	436
Chris@4	437 $buffer .= $char;
Chris@4	438
Chris@4	439 // We don't look for special tokens inside strings,
Chris@4	440 // so if we are in a string, we can continue here now
Chris@4	441 // that the current char is in the buffer.
Chris@4	442 if ($inString !== '') {
Chris@4	443 continue;
Chris@4	444 }
Chris@4	445
Chris@4	446 // Special case for T_DIVIDE which can actually be
Chris@4	447 // the start of a regular expression.
Chris@4	448 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@5	449 $regex = $this->getRegexToken($i, $string, $chars, $tokens);
Chris@4	450 if ($regex !== null) {
Chris@4	451 $tokens[] = [
Chris@4	452 'code' => T_REGULAR_EXPRESSION,
Chris@4	453 'type' => 'T_REGULAR_EXPRESSION',
Chris@4	454 'content' => $regex['content'],
Chris@4	455 ];
Chris@4	456
Chris@4	457 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	458 $content = Util\Common::prepareForOutput($regex['content']);
Chris@4	459 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@4	460 }
Chris@4	461
Chris@4	462 $i = $regex['end'];
Chris@4	463 $buffer = '';
Chris@4	464 $cleanBuffer = false;
Chris@4	465 continue;
Chris@4	466 }//end if
Chris@4	467 }//end if
Chris@4	468
Chris@4	469 // Check for known tokens, but ignore tokens found that are not at
Chris@4	470 // the end of a string, like FOR and this.FORmat.
Chris@4	471 if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@4	472 && (preg_match('\|[a-zA-z0-9_]\|', $char) === 0
Chris@4	473 \|\| isset($chars[($i + 1)]) === false
Chris@4	474 \|\| preg_match('\|[a-zA-z0-9_]\|', $chars[($i + 1)]) === 0)
Chris@4	475 ) {
Chris@4	476 $matchedToken = false;
Chris@4	477 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@4	478
Chris@4	479 if ($lookAheadLength > 0) {
Chris@4	480 // The buffer contains a token type, but we need
Chris@4	481 // to look ahead at the next chars to see if this is
Chris@4	482 // actually part of a larger token. For example,
Chris@4	483 // FOR and FOREACH.
Chris@4	484 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	485 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@4	486 }
Chris@4	487
Chris@4	488 $charBuffer = $buffer;
Chris@4	489 for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@4	490 if (isset($chars[($i + $x)]) === false) {
Chris@4	491 break;
Chris@4	492 }
Chris@4	493
Chris@4	494 $charBuffer .= $chars[($i + $x)];
Chris@4	495
Chris@4	496 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	497 $content = Util\Common::prepareForOutput($charBuffer);
Chris@4	498 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@4	499 }
Chris@4	500
Chris@4	501 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@4	502 // We've found something larger that matches
Chris@4	503 // so we can ignore this char. Except for 1 very specific
Chris@4	504 // case where a comment like /**/ needs to tokenize as
Chris@4	505 // T_COMMENT and not T_DOC_COMMENT.
Chris@4	506 $oldType = $this->tokenValues[strtolower($buffer)];
Chris@4	507 $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@4	508 if ($oldType === 'T_COMMENT'
Chris@4	509 && $newType === 'T_DOC_COMMENT'
Chris@4	510 && $chars[($i + $x + 1)] === '/'
Chris@4	511 ) {
Chris@4	512 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	513 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@4	514 }
Chris@4	515 } else {
Chris@4	516 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	517 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@4	518 }
Chris@4	519
Chris@4	520 $matchedToken = true;
Chris@4	521 break;
Chris@4	522 }
Chris@4	523 }//end if
Chris@4	524 }//end for
Chris@4	525 }//end if
Chris@4	526
Chris@4	527 if ($matchedToken === false) {
Chris@4	528 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@4	529 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@4	530 }
Chris@4	531
Chris@4	532 $value = $this->tokenValues[strtolower($buffer)];
Chris@4	533
Chris@4	534 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@4	535 // The function keyword needs to be all lowercase or else
Chris@4	536 // it is just a function called "Function".
Chris@4	537 $value = 'T_STRING';
Chris@4	538 }
Chris@4	539
Chris@4	540 $tokens[] = [
Chris@4	541 'code' => constant($value),
Chris@4	542 'type' => $value,
Chris@4	543 'content' => $buffer,
Chris@4	544 ];
Chris@4	545
Chris@4	546 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	547 $content = Util\Common::prepareForOutput($buffer);
Chris@4	548 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4	549 }
Chris@4	550
Chris@4	551 $cleanBuffer = true;
Chris@4	552 }//end if
Chris@4	553 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@4	554 // No matter what token we end up using, we don't
Chris@4	555 // need the content in the buffer any more because we have
Chris@4	556 // found a valid token.
Chris@4	557 $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1);
Chris@4	558 if ($newContent !== '') {
Chris@4	559 $tokens[] = [
Chris@4	560 'code' => T_STRING,
Chris@4	561 'type' => 'T_STRING',
Chris@4	562 'content' => $newContent,
Chris@4	563 ];
Chris@4	564
Chris@4	565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	566 $content = Util\Common::prepareForOutput(substr($buffer, 0, -1));
Chris@4	567 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4	568 }
Chris@4	569 }
Chris@4	570
Chris@4	571 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	572 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@4	573 }
Chris@4	574
Chris@4	575 // The char is a token type, but we need to look ahead at the
Chris@4	576 // next chars to see if this is actually part of a larger token.
Chris@4	577 // For example, = and ===.
Chris@4	578 $charBuffer = $char;
Chris@4	579 $matchedToken = false;
Chris@4	580 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@4	581 if (isset($chars[($i + $x)]) === false) {
Chris@4	582 break;
Chris@4	583 }
Chris@4	584
Chris@4	585 $charBuffer .= $chars[($i + $x)];
Chris@4	586
Chris@4	587 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	588 $content = Util\Common::prepareForOutput($charBuffer);
Chris@4	589 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@4	590 }
Chris@4	591
Chris@4	592 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@4	593 // We've found something larger that matches
Chris@4	594 // so we can ignore this char.
Chris@4	595 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	596 $type = $this->tokenValues[strtolower($charBuffer)];
Chris@4	597 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@4	598 }
Chris@4	599
Chris@4	600 $matchedToken = true;
Chris@4	601 break;
Chris@4	602 }
Chris@4	603 }//end for
Chris@4	604
Chris@4	605 if ($matchedToken === false) {
Chris@4	606 $value = $this->tokenValues[strtolower($char)];
Chris@4	607 $tokens[] = [
Chris@4	608 'code' => constant($value),
Chris@4	609 'type' => $value,
Chris@4	610 'content' => $char,
Chris@4	611 ];
Chris@4	612
Chris@4	613 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	614 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@4	615 $content = Util\Common::prepareForOutput($char);
Chris@4	616 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4	617 }
Chris@4	618
Chris@4	619 $cleanBuffer = true;
Chris@4	620 } else {
Chris@4	621 $buffer = $char;
Chris@4	622 }//end if
Chris@4	623 }//end if
Chris@4	624
Chris@4	625 // Keep track of content inside comments.
Chris@4	626 if ($inComment === ''
Chris@4	627 && array_key_exists($buffer, $this->commentTokens) === true
Chris@4	628 ) {
Chris@4	629 // This is not really a comment if the content
Chris@4	630 // looks like \// (i.e., it is escaped).
Chris@4	631 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@4	632 $lastToken = array_pop($tokens);
Chris@4	633 $lastContent = $lastToken['content'];
Chris@4	634 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	635 $value = $this->tokenValues[strtolower($lastContent)];
Chris@4	636 $content = Util\Common::prepareForOutput($lastContent);
Chris@4	637 echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@4	638 }
Chris@4	639
Chris@4	640 $lastChars = str_split($lastContent);
Chris@4	641 $lastNumChars = count($lastChars);
Chris@4	642 for ($x = 0; $x < $lastNumChars; $x++) {
Chris@4	643 $lastChar = $lastChars[$x];
Chris@4	644 $value = $this->tokenValues[strtolower($lastChar)];
Chris@4	645 $tokens[] = [
Chris@4	646 'code' => constant($value),
Chris@4	647 'type' => $value,
Chris@4	648 'content' => $lastChar,
Chris@4	649 ];
Chris@4	650
Chris@4	651 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	652 $content = Util\Common::prepareForOutput($lastChar);
Chris@4	653 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@4	654 }
Chris@4	655 }
Chris@4	656 } else {
Chris@4	657 // We have started a comment.
Chris@4	658 $inComment = $buffer;
Chris@4	659
Chris@4	660 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	661 echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@4	662 }
Chris@4	663 }//end if
Chris@4	664 } else if ($inComment !== '') {
Chris@4	665 if ($this->commentTokens[$inComment] === null) {
Chris@4	666 // Comment ends at the next newline.
Chris@4	667 if (strpos($buffer, "\n") !== false) {
Chris@4	668 $inComment = '';
Chris@4	669 }
Chris@4	670 } else {
Chris@4	671 if ($this->commentTokens[$inComment] === $buffer) {
Chris@4	672 $inComment = '';
Chris@4	673 }
Chris@4	674 }
Chris@4	675
Chris@4	676 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	677 if ($inComment === '') {
Chris@4	678 echo "\t\t* found end of comment *".PHP_EOL;
Chris@4	679 }
Chris@4	680 }
Chris@4	681
Chris@4	682 if ($inComment === '' && $cleanBuffer === false) {
Chris@4	683 $tokens[] = [
Chris@4	684 'code' => T_STRING,
Chris@4	685 'type' => 'T_STRING',
Chris@4	686 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4	687 ];
Chris@4	688
Chris@4	689 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	690 $content = Util\Common::prepareForOutput($buffer);
Chris@4	691 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4	692 }
Chris@4	693
Chris@4	694 $buffer = '';
Chris@4	695 }
Chris@4	696 }//end if
Chris@4	697
Chris@4	698 if ($cleanBuffer === true) {
Chris@4	699 $buffer = '';
Chris@4	700 $cleanBuffer = false;
Chris@4	701 }
Chris@4	702 }//end for
Chris@4	703
Chris@4	704 if (empty($buffer) === false) {
Chris@4	705 if ($inString !== '') {
Chris@4	706 // The string did not end before the end of the file,
Chris@4	707 // which means there was probably a syntax error somewhere.
Chris@4	708 $tokens[] = [
Chris@4	709 'code' => T_STRING,
Chris@4	710 'type' => 'T_STRING',
Chris@4	711 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4	712 ];
Chris@4	713
Chris@4	714 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	715 $content = Util\Common::prepareForOutput($buffer);
Chris@4	716 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@4	717 }
Chris@4	718 } else {
Chris@4	719 // Buffer contains whitespace from the end of the file.
Chris@4	720 $tokens[] = [
Chris@4	721 'code' => T_WHITESPACE,
Chris@4	722 'type' => 'T_WHITESPACE',
Chris@4	723 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@4	724 ];
Chris@4	725
Chris@4	726 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	727 $content = Util\Common::prepareForOutput($buffer);
Chris@4	728 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@4	729 }
Chris@4	730 }//end if
Chris@4	731 }//end if
Chris@4	732
Chris@4	733 $tokens[] = [
Chris@4	734 'code' => T_CLOSE_TAG,
Chris@4	735 'type' => 'T_CLOSE_TAG',
Chris@4	736 'content' => '',
Chris@4	737 ];
Chris@4	738
Chris@4	739 /*
Chris@4	740 Now that we have done some basic tokenizing, we need to
Chris@4	741 modify the tokens to join some together and split some apart
Chris@4	742 so they match what the PHP tokenizer does.
Chris@4	743 */
Chris@4	744
Chris@4	745 $finalTokens = [];
Chris@4	746 $newStackPtr = 0;
Chris@4	747 $numTokens = count($tokens);
Chris@4	748 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@4	749 $token = $tokens[$stackPtr];
Chris@4	750
Chris@4	751 /*
Chris@4	752 Look for comments and join the tokens together.
Chris@4	753 */
Chris@4	754
Chris@4	755 if ($token['code'] === T_COMMENT \|\| $token['code'] === T_DOC_COMMENT) {
Chris@4	756 $newContent = '';
Chris@4	757 $tokenContent = $token['content'];
Chris@4	758
Chris@4	759 $endContent = null;
Chris@4	760 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@4	761 $endContent = $this->commentTokens[$tokenContent];
Chris@4	762 }
Chris@4	763
Chris@4	764 while ($tokenContent !== $endContent) {
Chris@4	765 if ($endContent === null
Chris@4	766 && strpos($tokenContent, $this->eolChar) !== false
Chris@4	767 ) {
Chris@4	768 // A null end token means the comment ends at the end of
Chris@4	769 // the line so we look for newlines and split the token.
Chris@4	770 $tokens[$stackPtr]['content'] = substr(
Chris@4	771 $tokenContent,
Chris@4	772 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@4	773 );
Chris@4	774
Chris@4	775 $tokenContent = substr(
Chris@4	776 $tokenContent,
Chris@4	777 0,
Chris@4	778 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@4	779 );
Chris@4	780
Chris@4	781 // If the substr failed, skip the token as the content
Chris@4	782 // will now be blank.
Chris@4	783 if ($tokens[$stackPtr]['content'] !== false
Chris@4	784 && $tokens[$stackPtr]['content'] !== ''
Chris@4	785 ) {
Chris@4	786 $stackPtr--;
Chris@4	787 }
Chris@4	788
Chris@4	789 break;
Chris@4	790 }//end if
Chris@4	791
Chris@4	792 $stackPtr++;
Chris@4	793 $newContent .= $tokenContent;
Chris@4	794 if (isset($tokens[$stackPtr]) === false) {
Chris@4	795 break;
Chris@4	796 }
Chris@4	797
Chris@4	798 $tokenContent = $tokens[$stackPtr]['content'];
Chris@4	799 }//end while
Chris@4	800
Chris@4	801 if ($token['code'] === T_DOC_COMMENT) {
Chris@4	802 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr);
Chris@4	803 foreach ($commentTokens as $commentToken) {
Chris@4	804 $finalTokens[$newStackPtr] = $commentToken;
Chris@4	805 $newStackPtr++;
Chris@4	806 }
Chris@4	807
Chris@4	808 continue;
Chris@4	809 } else {
Chris@4	810 // Save the new content in the current token so
Chris@4	811 // the code below can chop it up on newlines.
Chris@4	812 $token['content'] = $newContent.$tokenContent;
Chris@4	813 }
Chris@4	814 }//end if
Chris@4	815
Chris@4	816 /*
Chris@4	817 If this token has newlines in its content, split each line up
Chris@4	818 and create a new token for each line. We do this so it's easier
Chris@4	819 to ascertain where errors occur on a line.
Chris@4	820 Note that $token[1] is the token's content.
Chris@4	821 */
Chris@4	822
Chris@4	823 if (strpos($token['content'], $this->eolChar) !== false) {
Chris@4	824 $tokenLines = explode($this->eolChar, $token['content']);
Chris@4	825 $numLines = count($tokenLines);
Chris@4	826
Chris@4	827 for ($i = 0; $i < $numLines; $i++) {
Chris@5	828 $newToken = ['content' => $tokenLines[$i]];
Chris@4	829 if ($i === ($numLines - 1)) {
Chris@4	830 if ($tokenLines[$i] === '') {
Chris@4	831 break;
Chris@4	832 }
Chris@4	833 } else {
Chris@4	834 $newToken['content'] .= $this->eolChar;
Chris@4	835 }
Chris@4	836
Chris@4	837 $newToken['type'] = $token['type'];
Chris@4	838 $newToken['code'] = $token['code'];
Chris@4	839 $finalTokens[$newStackPtr] = $newToken;
Chris@4	840 $newStackPtr++;
Chris@4	841 }
Chris@4	842 } else {
Chris@4	843 $finalTokens[$newStackPtr] = $token;
Chris@4	844 $newStackPtr++;
Chris@4	845 }//end if
Chris@4	846
Chris@4	847 // Convert numbers, including decimals.
Chris@4	848 if ($token['code'] === T_STRING
Chris@4	849 \|\| $token['code'] === T_OBJECT_OPERATOR
Chris@4	850 ) {
Chris@4	851 $newContent = '';
Chris@4	852 $oldStackPtr = $stackPtr;
Chris@4	853 while (preg_match('\|^[0-9\.]+$\|', $tokens[$stackPtr]['content']) !== 0) {
Chris@4	854 $newContent .= $tokens[$stackPtr]['content'];
Chris@4	855 $stackPtr++;
Chris@4	856 }
Chris@4	857
Chris@4	858 if ($newContent !== '' && $newContent !== '.') {
Chris@4	859 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@4	860 if (ctype_digit($newContent) === true) {
Chris@4	861 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@4	862 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@4	863 } else {
Chris@4	864 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@4	865 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@4	866 }
Chris@4	867
Chris@4	868 $stackPtr--;
Chris@4	869 continue;
Chris@4	870 } else {
Chris@4	871 $stackPtr = $oldStackPtr;
Chris@4	872 }
Chris@4	873 }//end if
Chris@4	874
Chris@4	875 // Convert the token after an object operator into a string, in most cases.
Chris@4	876 if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@4	877 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@4	878 if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@4	879 continue;
Chris@4	880 }
Chris@4	881
Chris@4	882 if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@4	883 && $tokens[$i]['code'] !== T_LNUMBER
Chris@4	884 && $tokens[$i]['code'] !== T_DNUMBER
Chris@4	885 ) {
Chris@4	886 $tokens[$i]['code'] = T_STRING;
Chris@4	887 $tokens[$i]['type'] = 'T_STRING';
Chris@4	888 }
Chris@4	889
Chris@4	890 break;
Chris@4	891 }
Chris@4	892 }
Chris@4	893 }//end for
Chris@4	894
Chris@4	895 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	896 echo "\t* END TOKENIZING *".PHP_EOL;
Chris@4	897 }
Chris@4	898
Chris@4	899 return $finalTokens;
Chris@4	900
Chris@4	901 }//end tokenize()
Chris@4	902
Chris@4	903
Chris@4	904 /**
Chris@4	905 * Tokenizes a regular expression if one is found.
Chris@4	906 *
Chris@4	907 * If a regular expression is not found, NULL is returned.
Chris@4	908 *
Chris@4	909 * @param string $char The index of the possible regex start character.
Chris@4	910 * @param string $string The complete content of the string being tokenized.
Chris@4	911 * @param string $chars An array of characters being tokenized.
Chris@4	912 * @param string $tokens The current array of tokens found in the string.
Chris@4	913 *
Chris@5	914 * @return array<string, string>\|null
Chris@4	915 */
Chris@4	916 public function getRegexToken($char, $string, $chars, $tokens)
Chris@4	917 {
Chris@4	918 $beforeTokens = [
Chris@4	919 T_EQUAL => true,
Chris@4	920 T_IS_NOT_EQUAL => true,
Chris@4	921 T_IS_IDENTICAL => true,
Chris@4	922 T_IS_NOT_IDENTICAL => true,
Chris@4	923 T_OPEN_PARENTHESIS => true,
Chris@4	924 T_OPEN_SQUARE_BRACKET => true,
Chris@4	925 T_RETURN => true,
Chris@4	926 T_BOOLEAN_OR => true,
Chris@4	927 T_BOOLEAN_AND => true,
Chris@4	928 T_BOOLEAN_NOT => true,
Chris@4	929 T_BITWISE_OR => true,
Chris@4	930 T_BITWISE_AND => true,
Chris@4	931 T_COMMA => true,
Chris@4	932 T_COLON => true,
Chris@4	933 T_TYPEOF => true,
Chris@4	934 T_INLINE_THEN => true,
Chris@4	935 T_INLINE_ELSE => true,
Chris@4	936 ];
Chris@4	937
Chris@4	938 $afterTokens = [
Chris@4	939 ',' => true,
Chris@4	940 ')' => true,
Chris@4	941 ']' => true,
Chris@4	942 ';' => true,
Chris@4	943 ' ' => true,
Chris@4	944 '.' => true,
Chris@4	945 ':' => true,
Chris@4	946 $this->eolChar => true,
Chris@4	947 ];
Chris@4	948
Chris@4	949 // Find the last non-whitespace token that was added
Chris@4	950 // to the tokens array.
Chris@4	951 $numTokens = count($tokens);
Chris@4	952 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@4	953 if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@4	954 break;
Chris@4	955 }
Chris@4	956 }
Chris@4	957
Chris@4	958 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@4	959 return null;
Chris@4	960 }
Chris@4	961
Chris@4	962 // This is probably a regular expression, so look for the end of it.
Chris@4	963 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	964 echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@4	965 }
Chris@4	966
Chris@4	967 $numChars = count($chars);
Chris@4	968 for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@4	969 if ($chars[$next] === '/') {
Chris@4	970 // Just make sure this is not escaped first.
Chris@4	971 if ($chars[($next - 1)] !== '\\') {
Chris@4	972 // In the simple form: /.../ so we found the end.
Chris@4	973 break;
Chris@4	974 } else if ($chars[($next - 2)] === '\\') {
Chris@4	975 // In the form: /...\\/ so we found the end.
Chris@4	976 break;
Chris@4	977 }
Chris@4	978 } else {
Chris@4	979 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@4	980 if ($possibleEolChar === $this->eolChar) {
Chris@4	981 // This is the last token on the line and regular
Chris@4	982 // expressions need to be defined on a single line,
Chris@4	983 // so this is not a regular expression.
Chris@4	984 break;
Chris@4	985 }
Chris@4	986 }
Chris@4	987 }
Chris@4	988
Chris@4	989 if ($chars[$next] !== '/') {
Chris@4	990 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	991 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@4	992 }
Chris@4	993
Chris@4	994 return null;
Chris@4	995 }
Chris@4	996
Chris@4	997 while (preg_match('\|[a-zA-Z]\|', $chars[($next + 1)]) !== 0) {
Chris@4	998 // The token directly after the end of the regex can
Chris@4	999 // be modifiers like global and case insensitive
Chris@4	1000 // (.e.g, /pattern/gi).
Chris@4	1001 $next++;
Chris@4	1002 }
Chris@4	1003
Chris@4	1004 $regexEnd = $next;
Chris@4	1005 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1006 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@4	1007 }
Chris@4	1008
Chris@4	1009 for ($next += 1; $next < $numChars; $next++) {
Chris@4	1010 if ($chars[$next] !== ' ') {
Chris@4	1011 break;
Chris@4	1012 } else {
Chris@4	1013 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@4	1014 if ($possibleEolChar === $this->eolChar) {
Chris@4	1015 // This is the last token on the line.
Chris@4	1016 break;
Chris@4	1017 }
Chris@4	1018 }
Chris@4	1019 }
Chris@4	1020
Chris@4	1021 if (isset($afterTokens[$chars[$next]]) === false) {
Chris@4	1022 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1023 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@4	1024 }
Chris@4	1025
Chris@4	1026 return null;
Chris@4	1027 }
Chris@4	1028
Chris@4	1029 // This is a regular expression, so join all the tokens together.
Chris@4	1030 $content = '';
Chris@4	1031 for ($x = $char; $x <= $regexEnd; $x++) {
Chris@4	1032 $content .= $chars[$x];
Chris@4	1033 }
Chris@4	1034
Chris@4	1035 $token = [
Chris@4	1036 'start' => $char,
Chris@4	1037 'end' => $regexEnd,
Chris@4	1038 'content' => $content,
Chris@4	1039 ];
Chris@4	1040
Chris@4	1041 return $token;
Chris@4	1042
Chris@4	1043 }//end getRegexToken()
Chris@4	1044
Chris@4	1045
Chris@4	1046 /**
Chris@4	1047 * Performs additional processing after main tokenizing.
Chris@4	1048 *
Chris@4	1049 * This additional processing looks for properties, closures, labels and objects.
Chris@4	1050 *
Chris@4	1051 * @return void
Chris@4	1052 */
Chris@4	1053 public function processAdditional()
Chris@4	1054 {
Chris@4	1055 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1056 echo "\t* START ADDITIONAL JS PROCESSING *".PHP_EOL;
Chris@4	1057 }
Chris@4	1058
Chris@4	1059 $numTokens = count($this->tokens);
Chris@4	1060 $classStack = [];
Chris@4	1061
Chris@4	1062 for ($i = 0; $i < $numTokens; $i++) {
Chris@4	1063 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1064 $type = $this->tokens[$i]['type'];
Chris@4	1065 $content = Util\Common::prepareForOutput($this->tokens[$i]['content']);
Chris@4	1066
Chris@4	1067 echo str_repeat("\t", count($classStack));
Chris@4	1068 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@4	1069 }
Chris@4	1070
Chris@4	1071 // Looking for functions that are actually closures.
Chris@4	1072 if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) {
Chris@4	1073 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@4	1074 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) {
Chris@4	1075 break;
Chris@4	1076 }
Chris@4	1077 }
Chris@4	1078
Chris@4	1079 if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@4	1080 $this->tokens[$i]['code'] = T_CLOSURE;
Chris@4	1081 $this->tokens[$i]['type'] = 'T_CLOSURE';
Chris@4	1082 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1083 $line = $this->tokens[$i]['line'];
Chris@4	1084 echo str_repeat("\t", count($classStack));
Chris@4	1085 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL;
Chris@4	1086 }
Chris@4	1087
Chris@4	1088 for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
Chris@4	1089 if (isset($this->tokens[$x]['conditions'][$i]) === false) {
Chris@4	1090 continue;
Chris@4	1091 }
Chris@4	1092
Chris@4	1093 $this->tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@4	1094 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1095 $type = $this->tokens[$x]['type'];
Chris@4	1096 echo str_repeat("\t", count($classStack));
Chris@4	1097 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@4	1098 }
Chris@4	1099 }
Chris@4	1100 }//end if
Chris@4	1101
Chris@4	1102 continue;
Chris@4	1103 } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@4	1104 && isset($this->tokens[$i]['scope_condition']) === false
Chris@4	1105 && isset($this->tokens[$i]['bracket_closer']) === true
Chris@4	1106 ) {
Chris@5	1107 $condition = $this->tokens[$i]['conditions'];
Chris@5	1108 $condition = end($condition);
Chris@4	1109 if ($condition === T_CLASS) {
Chris@4	1110 // Possibly an ES6 method. To be classified as one, the previous
Chris@4	1111 // non-empty tokens need to be a set of parenthesis, and then a string
Chris@4	1112 // (the method name).
Chris@4	1113 for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) {
Chris@4	1114 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) {
Chris@4	1115 break;
Chris@4	1116 }
Chris@4	1117 }
Chris@4	1118
Chris@4	1119 if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) {
Chris@4	1120 $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener'];
Chris@4	1121 for ($name = ($parenOpener - 1); $name > 0; $name--) {
Chris@4	1122 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) {
Chris@4	1123 break;
Chris@4	1124 }
Chris@4	1125 }
Chris@4	1126
Chris@4	1127 if ($this->tokens[$name]['code'] === T_STRING) {
Chris@4	1128 // We found a method name.
Chris@4	1129 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1130 $line = $this->tokens[$name]['line'];
Chris@4	1131 echo str_repeat("\t", count($classStack));
Chris@4	1132 echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL;
Chris@4	1133 }
Chris@4	1134
Chris@4	1135 $closer = $this->tokens[$i]['bracket_closer'];
Chris@4	1136
Chris@4	1137 $this->tokens[$name]['code'] = T_FUNCTION;
Chris@4	1138 $this->tokens[$name]['type'] = 'T_FUNCTION';
Chris@4	1139
Chris@4	1140 foreach ([$name, $i, $closer] as $token) {
Chris@4	1141 $this->tokens[$token]['scope_condition'] = $name;
Chris@4	1142 $this->tokens[$token]['scope_opener'] = $i;
Chris@4	1143 $this->tokens[$token]['scope_closer'] = $closer;
Chris@4	1144 $this->tokens[$token]['parenthesis_opener'] = $parenOpener;
Chris@4	1145 $this->tokens[$token]['parenthesis_closer'] = $parenCloser;
Chris@4	1146 $this->tokens[$token]['parenthesis_owner'] = $name;
Chris@4	1147 }
Chris@4	1148
Chris@4	1149 $this->tokens[$parenOpener]['parenthesis_owner'] = $name;
Chris@4	1150 $this->tokens[$parenCloser]['parenthesis_owner'] = $name;
Chris@4	1151
Chris@4	1152 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@4	1153 $this->tokens[$x]['conditions'][$name] = T_FUNCTION;
Chris@4	1154 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@4	1155 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1156 $type = $this->tokens[$x]['type'];
Chris@4	1157 echo str_repeat("\t", count($classStack));
Chris@4	1158 echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL;
Chris@4	1159 }
Chris@4	1160 }
Chris@4	1161
Chris@4	1162 continue;
Chris@4	1163 }//end if
Chris@4	1164 }//end if
Chris@4	1165 }//end if
Chris@4	1166
Chris@4	1167 $classStack[] = $i;
Chris@4	1168
Chris@4	1169 $closer = $this->tokens[$i]['bracket_closer'];
Chris@4	1170 $this->tokens[$i]['code'] = T_OBJECT;
Chris@4	1171 $this->tokens[$i]['type'] = 'T_OBJECT';
Chris@4	1172 $this->tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@4	1173 $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@4	1174
Chris@4	1175 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1176 echo str_repeat("\t", count($classStack));
Chris@4	1177 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@4	1178 echo str_repeat("\t", count($classStack));
Chris@4	1179 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@4	1180 }
Chris@4	1181
Chris@4	1182 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@4	1183 $this->tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@4	1184 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@4	1185 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1186 $type = $this->tokens[$x]['type'];
Chris@4	1187 echo str_repeat("\t", count($classStack));
Chris@4	1188 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@4	1189 }
Chris@4	1190 }
Chris@4	1191 } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@4	1192 $opener = array_pop($classStack);
Chris@4	1193 } else if ($this->tokens[$i]['code'] === T_COLON) {
Chris@4	1194 // If it is a scope opener, it belongs to a
Chris@4	1195 // DEFAULT or CASE statement.
Chris@4	1196 if (isset($this->tokens[$i]['scope_condition']) === true) {
Chris@4	1197 continue;
Chris@4	1198 }
Chris@4	1199
Chris@4	1200 // Make sure this is not part of an inline IF statement.
Chris@4	1201 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@4	1202 if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
Chris@4	1203 $this->tokens[$i]['code'] = T_INLINE_ELSE;
Chris@4	1204 $this->tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@4	1205
Chris@4	1206 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1207 echo str_repeat("\t", count($classStack));
Chris@4	1208 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@4	1209 }
Chris@4	1210
Chris@4	1211 continue(2);
Chris@4	1212 } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) {
Chris@4	1213 break;
Chris@4	1214 }
Chris@4	1215 }
Chris@4	1216
Chris@4	1217 // The string to the left of the colon is either a property or label.
Chris@4	1218 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@4	1219 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) {
Chris@4	1220 break;
Chris@4	1221 }
Chris@4	1222 }
Chris@4	1223
Chris@4	1224 if ($this->tokens[$label]['code'] !== T_STRING
Chris@4	1225 && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@4	1226 ) {
Chris@4	1227 continue;
Chris@4	1228 }
Chris@4	1229
Chris@4	1230 if (empty($classStack) === false) {
Chris@4	1231 $this->tokens[$label]['code'] = T_PROPERTY;
Chris@4	1232 $this->tokens[$label]['type'] = 'T_PROPERTY';
Chris@4	1233
Chris@4	1234 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1235 echo str_repeat("\t", count($classStack));
Chris@4	1236 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@4	1237 }
Chris@4	1238 } else {
Chris@4	1239 $this->tokens[$label]['code'] = T_LABEL;
Chris@4	1240 $this->tokens[$label]['type'] = 'T_LABEL';
Chris@4	1241
Chris@4	1242 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1243 echo str_repeat("\t", count($classStack));
Chris@4	1244 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@4	1245 }
Chris@4	1246 }//end if
Chris@4	1247 }//end if
Chris@4	1248 }//end for
Chris@4	1249
Chris@4	1250 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@4	1251 echo "\t* END ADDITIONAL JS PROCESSING *".PHP_EOL;
Chris@4	1252 }
Chris@4	1253
Chris@4	1254 }//end processAdditional()
Chris@4	1255
Chris@4	1256
Chris@4	1257 }//end class

Mercurial > hg > cmmr2012-drupal-site

annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 5:12f9dff5fda9 tip