isophonics-drupal-site: vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php annotate

annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 19:fa3358dc1485 tip

Add ndrum files

author	Chris Cannam
date	Wed, 28 Aug 2019 13:14:47 +0100
parents	af1871eacc83
children

rev	line source
Chris@17	1 <?php
Chris@17	2 /**
Chris@17	3 * Tokenizes JS code.
Chris@17	4 *
Chris@17	5 * @author Greg Sherwood <gsherwood@squiz.net>
Chris@17	6 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@17	7 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@17	8 */
Chris@17	9
Chris@17	10 namespace PHP_CodeSniffer\Tokenizers;
Chris@17	11
Chris@17	12 use PHP_CodeSniffer\Util;
Chris@17	13 use PHP_CodeSniffer\Exceptions\TokenizerException;
Chris@17	14 use PHP_CodeSniffer\Config;
Chris@17	15
Chris@17	16 class JS extends Tokenizer
Chris@17	17 {
Chris@17	18
Chris@17	19
Chris@17	20 /**
Chris@17	21 * A list of tokens that are allowed to open a scope.
Chris@17	22 *
Chris@17	23 * This array also contains information about what kind of token the scope
Chris@17	24 * opener uses to open and close the scope, if the token strictly requires
Chris@17	25 * an opener, if the token can share a scope closer, and who it can be shared
Chris@17	26 * with. An example of a token that shares a scope closer is a CASE scope.
Chris@17	27 *
Chris@17	28 * @var array
Chris@17	29 */
Chris@17	30 public $scopeOpeners = [
Chris@17	31 T_IF => [
Chris@17	32 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	33 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	34 'strict' => false,
Chris@17	35 'shared' => false,
Chris@17	36 'with' => [],
Chris@17	37 ],
Chris@17	38 T_TRY => [
Chris@17	39 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	40 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	41 'strict' => true,
Chris@17	42 'shared' => false,
Chris@17	43 'with' => [],
Chris@17	44 ],
Chris@17	45 T_CATCH => [
Chris@17	46 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	47 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	48 'strict' => true,
Chris@17	49 'shared' => false,
Chris@17	50 'with' => [],
Chris@17	51 ],
Chris@17	52 T_ELSE => [
Chris@17	53 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	54 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	55 'strict' => false,
Chris@17	56 'shared' => false,
Chris@17	57 'with' => [],
Chris@17	58 ],
Chris@17	59 T_FOR => [
Chris@17	60 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	61 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	62 'strict' => false,
Chris@17	63 'shared' => false,
Chris@17	64 'with' => [],
Chris@17	65 ],
Chris@17	66 T_CLASS => [
Chris@17	67 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	68 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	69 'strict' => true,
Chris@17	70 'shared' => false,
Chris@17	71 'with' => [],
Chris@17	72 ],
Chris@17	73 T_FUNCTION => [
Chris@17	74 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	75 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	76 'strict' => false,
Chris@17	77 'shared' => false,
Chris@17	78 'with' => [],
Chris@17	79 ],
Chris@17	80 T_WHILE => [
Chris@17	81 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	82 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	83 'strict' => false,
Chris@17	84 'shared' => false,
Chris@17	85 'with' => [],
Chris@17	86 ],
Chris@17	87 T_DO => [
Chris@17	88 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	89 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	90 'strict' => true,
Chris@17	91 'shared' => false,
Chris@17	92 'with' => [],
Chris@17	93 ],
Chris@17	94 T_SWITCH => [
Chris@17	95 'start' => [T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET],
Chris@17	96 'end' => [T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET],
Chris@17	97 'strict' => true,
Chris@17	98 'shared' => false,
Chris@17	99 'with' => [],
Chris@17	100 ],
Chris@17	101 T_CASE => [
Chris@17	102 'start' => [T_COLON => T_COLON],
Chris@17	103 'end' => [
Chris@17	104 T_BREAK => T_BREAK,
Chris@17	105 T_RETURN => T_RETURN,
Chris@17	106 T_CONTINUE => T_CONTINUE,
Chris@17	107 T_THROW => T_THROW,
Chris@17	108 ],
Chris@17	109 'strict' => true,
Chris@17	110 'shared' => true,
Chris@17	111 'with' => [
Chris@17	112 T_DEFAULT => T_DEFAULT,
Chris@17	113 T_CASE => T_CASE,
Chris@17	114 T_SWITCH => T_SWITCH,
Chris@17	115 ],
Chris@17	116 ],
Chris@17	117 T_DEFAULT => [
Chris@17	118 'start' => [T_COLON => T_COLON],
Chris@17	119 'end' => [
Chris@17	120 T_BREAK => T_BREAK,
Chris@17	121 T_RETURN => T_RETURN,
Chris@17	122 T_CONTINUE => T_CONTINUE,
Chris@17	123 T_THROW => T_THROW,
Chris@17	124 ],
Chris@17	125 'strict' => true,
Chris@17	126 'shared' => true,
Chris@17	127 'with' => [
Chris@17	128 T_CASE => T_CASE,
Chris@17	129 T_SWITCH => T_SWITCH,
Chris@17	130 ],
Chris@17	131 ],
Chris@17	132 ];
Chris@17	133
Chris@17	134 /**
Chris@17	135 * A list of tokens that end the scope.
Chris@17	136 *
Chris@17	137 * This array is just a unique collection of the end tokens
Chris@17	138 * from the _scopeOpeners array. The data is duplicated here to
Chris@17	139 * save time during parsing of the file.
Chris@17	140 *
Chris@17	141 * @var array
Chris@17	142 */
Chris@17	143 public $endScopeTokens = [
Chris@17	144 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
Chris@17	145 T_BREAK => T_BREAK,
Chris@17	146 ];
Chris@17	147
Chris@17	148 /**
Chris@17	149 * A list of special JS tokens and their types.
Chris@17	150 *
Chris@17	151 * @var array
Chris@17	152 */
Chris@17	153 protected $tokenValues = [
Chris@17	154 'class' => 'T_CLASS',
Chris@17	155 'function' => 'T_FUNCTION',
Chris@17	156 'prototype' => 'T_PROTOTYPE',
Chris@17	157 'try' => 'T_TRY',
Chris@17	158 'catch' => 'T_CATCH',
Chris@17	159 'return' => 'T_RETURN',
Chris@17	160 'throw' => 'T_THROW',
Chris@17	161 'break' => 'T_BREAK',
Chris@17	162 'switch' => 'T_SWITCH',
Chris@17	163 'continue' => 'T_CONTINUE',
Chris@17	164 'if' => 'T_IF',
Chris@17	165 'else' => 'T_ELSE',
Chris@17	166 'do' => 'T_DO',
Chris@17	167 'while' => 'T_WHILE',
Chris@17	168 'for' => 'T_FOR',
Chris@17	169 'var' => 'T_VAR',
Chris@17	170 'case' => 'T_CASE',
Chris@17	171 'default' => 'T_DEFAULT',
Chris@17	172 'true' => 'T_TRUE',
Chris@17	173 'false' => 'T_FALSE',
Chris@17	174 'null' => 'T_NULL',
Chris@17	175 'this' => 'T_THIS',
Chris@17	176 'typeof' => 'T_TYPEOF',
Chris@17	177 '(' => 'T_OPEN_PARENTHESIS',
Chris@17	178 ')' => 'T_CLOSE_PARENTHESIS',
Chris@17	179 '{' => 'T_OPEN_CURLY_BRACKET',
Chris@17	180 '}' => 'T_CLOSE_CURLY_BRACKET',
Chris@17	181 '[' => 'T_OPEN_SQUARE_BRACKET',
Chris@17	182 ']' => 'T_CLOSE_SQUARE_BRACKET',
Chris@17	183 '?' => 'T_INLINE_THEN',
Chris@17	184 '.' => 'T_OBJECT_OPERATOR',
Chris@17	185 '+' => 'T_PLUS',
Chris@17	186 '-' => 'T_MINUS',
Chris@17	187 '*' => 'T_MULTIPLY',
Chris@17	188 '%' => 'T_MODULUS',
Chris@17	189 '/' => 'T_DIVIDE',
Chris@17	190 '^' => 'T_LOGICAL_XOR',
Chris@17	191 ',' => 'T_COMMA',
Chris@17	192 ';' => 'T_SEMICOLON',
Chris@17	193 ':' => 'T_COLON',
Chris@17	194 '<' => 'T_LESS_THAN',
Chris@17	195 '>' => 'T_GREATER_THAN',
Chris@17	196 '<<' => 'T_SL',
Chris@17	197 '>>' => 'T_SR',
Chris@17	198 '>>>' => 'T_ZSR',
Chris@17	199 '<<=' => 'T_SL_EQUAL',
Chris@17	200 '>>=' => 'T_SR_EQUAL',
Chris@17	201 '>>>=' => 'T_ZSR_EQUAL',
Chris@17	202 '<=' => 'T_IS_SMALLER_OR_EQUAL',
Chris@17	203 '>=' => 'T_IS_GREATER_OR_EQUAL',
Chris@17	204 '=>' => 'T_DOUBLE_ARROW',
Chris@17	205 '!' => 'T_BOOLEAN_NOT',
Chris@17	206 '\|\|' => 'T_BOOLEAN_OR',
Chris@17	207 '&&' => 'T_BOOLEAN_AND',
Chris@17	208 '\|' => 'T_BITWISE_OR',
Chris@17	209 '&' => 'T_BITWISE_AND',
Chris@17	210 '!=' => 'T_IS_NOT_EQUAL',
Chris@17	211 '!==' => 'T_IS_NOT_IDENTICAL',
Chris@17	212 '=' => 'T_EQUAL',
Chris@17	213 '==' => 'T_IS_EQUAL',
Chris@17	214 '===' => 'T_IS_IDENTICAL',
Chris@17	215 '-=' => 'T_MINUS_EQUAL',
Chris@17	216 '+=' => 'T_PLUS_EQUAL',
Chris@17	217 '*=' => 'T_MUL_EQUAL',
Chris@17	218 '/=' => 'T_DIV_EQUAL',
Chris@17	219 '%=' => 'T_MOD_EQUAL',
Chris@17	220 '++' => 'T_INC',
Chris@17	221 '--' => 'T_DEC',
Chris@17	222 '//' => 'T_COMMENT',
Chris@17	223 '/*' => 'T_COMMENT',
Chris@17	224 '/**' => 'T_DOC_COMMENT',
Chris@17	225 '*/' => 'T_COMMENT',
Chris@17	226 ];
Chris@17	227
Chris@17	228 /**
Chris@17	229 * A list string delimiters.
Chris@17	230 *
Chris@17	231 * @var array
Chris@17	232 */
Chris@17	233 protected $stringTokens = [
Chris@17	234 '\'' => '\'',
Chris@17	235 '"' => '"',
Chris@17	236 ];
Chris@17	237
Chris@17	238 /**
Chris@17	239 * A list tokens that start and end comments.
Chris@17	240 *
Chris@17	241 * @var array
Chris@17	242 */
Chris@17	243 protected $commentTokens = [
Chris@17	244 '//' => null,
Chris@17	245 '/' => '/',
Chris@17	246 '/*' => '/',
Chris@17	247 ];
Chris@17	248
Chris@17	249
Chris@17	250 /**
Chris@17	251 * Initialise the tokenizer.
Chris@17	252 *
Chris@17	253 * Pre-checks the content to see if it looks minified.
Chris@17	254 *
Chris@17	255 * @param string $content The content to tokenize,
Chris@17	256 * @param \PHP_CodeSniffer\Config $config The config data for the run.
Chris@17	257 * @param string $eolChar The EOL char used in the content.
Chris@17	258 *
Chris@17	259 * @return void
Chris@18	260 * @throws \PHP_CodeSniffer\Exceptions\TokenizerException If the file appears to be minified.
Chris@17	261 */
Chris@17	262 public function __construct($content, Config $config, $eolChar='\n')
Chris@17	263 {
Chris@17	264 if ($this->isMinifiedContent($content, $eolChar) === true) {
Chris@17	265 throw new TokenizerException('File appears to be minified and cannot be processed');
Chris@17	266 }
Chris@17	267
Chris@18	268 parent::__construct($content, $config, $eolChar);
Chris@17	269
Chris@17	270 }//end __construct()
Chris@17	271
Chris@17	272
Chris@17	273 /**
Chris@17	274 * Creates an array of tokens when given some JS code.
Chris@17	275 *
Chris@17	276 * @param string $string The string to tokenize.
Chris@17	277 *
Chris@17	278 * @return array
Chris@17	279 */
Chris@17	280 public function tokenize($string)
Chris@17	281 {
Chris@17	282 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	283 echo "\t* START JS TOKENIZING *".PHP_EOL;
Chris@17	284 }
Chris@17	285
Chris@17	286 $maxTokenLength = 0;
Chris@17	287 foreach ($this->tokenValues as $token => $values) {
Chris@17	288 if (strlen($token) > $maxTokenLength) {
Chris@17	289 $maxTokenLength = strlen($token);
Chris@17	290 }
Chris@17	291 }
Chris@17	292
Chris@17	293 $tokens = [];
Chris@17	294 $inString = '';
Chris@17	295 $stringChar = null;
Chris@17	296 $inComment = '';
Chris@17	297 $buffer = '';
Chris@17	298 $preStringBuffer = '';
Chris@17	299 $cleanBuffer = false;
Chris@17	300
Chris@17	301 $commentTokenizer = new Comment();
Chris@17	302
Chris@17	303 $tokens[] = [
Chris@17	304 'code' => T_OPEN_TAG,
Chris@17	305 'type' => 'T_OPEN_TAG',
Chris@17	306 'content' => '',
Chris@17	307 ];
Chris@17	308
Chris@17	309 // Convert newlines to single characters for ease of
Chris@17	310 // processing. We will change them back later.
Chris@17	311 $string = str_replace($this->eolChar, "\n", $string);
Chris@17	312
Chris@17	313 $chars = str_split($string);
Chris@17	314 $numChars = count($chars);
Chris@17	315 for ($i = 0; $i < $numChars; $i++) {
Chris@17	316 $char = $chars[$i];
Chris@17	317
Chris@17	318 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	319 $content = Util\Common::prepareForOutput($char);
Chris@17	320 $bufferContent = Util\Common::prepareForOutput($buffer);
Chris@17	321
Chris@17	322 if ($inString !== '') {
Chris@17	323 echo "\t";
Chris@17	324 }
Chris@17	325
Chris@17	326 if ($inComment !== '') {
Chris@17	327 echo "\t";
Chris@17	328 }
Chris@17	329
Chris@17	330 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
Chris@17	331 }//end if
Chris@17	332
Chris@17	333 if ($inString === '' && $inComment === '' && $buffer !== '') {
Chris@17	334 // If the buffer only has whitespace and we are about to
Chris@17	335 // add a character, store the whitespace first.
Chris@17	336 if (trim($char) !== '' && trim($buffer) === '') {
Chris@17	337 $tokens[] = [
Chris@17	338 'code' => T_WHITESPACE,
Chris@17	339 'type' => 'T_WHITESPACE',
Chris@17	340 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17	341 ];
Chris@17	342
Chris@17	343 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	344 $content = Util\Common::prepareForOutput($buffer);
Chris@17	345 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17	346 }
Chris@17	347
Chris@17	348 $buffer = '';
Chris@17	349 }
Chris@17	350
Chris@17	351 // If the buffer is not whitespace and we are about to
Chris@17	352 // add a whitespace character, store the content first.
Chris@17	353 if ($inString === ''
Chris@17	354 && $inComment === ''
Chris@17	355 && trim($char) === ''
Chris@17	356 && trim($buffer) !== ''
Chris@17	357 ) {
Chris@17	358 $tokens[] = [
Chris@17	359 'code' => T_STRING,
Chris@17	360 'type' => 'T_STRING',
Chris@17	361 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17	362 ];
Chris@17	363
Chris@17	364 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	365 $content = Util\Common::prepareForOutput($buffer);
Chris@17	366 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17	367 }
Chris@17	368
Chris@17	369 $buffer = '';
Chris@17	370 }
Chris@17	371 }//end if
Chris@17	372
Chris@17	373 // Process strings.
Chris@17	374 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
Chris@17	375 if ($inString === $char) {
Chris@17	376 // This could be the end of the string, but make sure it
Chris@17	377 // is not escaped first.
Chris@17	378 $escapes = 0;
Chris@17	379 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17	380 if ($chars[$x] !== '\\') {
Chris@17	381 break;
Chris@17	382 }
Chris@17	383
Chris@17	384 $escapes++;
Chris@17	385 }
Chris@17	386
Chris@17	387 if ($escapes === 0 \|\| ($escapes % 2) === 0) {
Chris@17	388 // There is an even number escape chars,
Chris@17	389 // so this is not escaped, it is the end of the string.
Chris@17	390 $tokens[] = [
Chris@17	391 'code' => T_CONSTANT_ENCAPSED_STRING,
Chris@17	392 'type' => 'T_CONSTANT_ENCAPSED_STRING',
Chris@17	393 'content' => str_replace("\n", $this->eolChar, $buffer).$char,
Chris@17	394 ];
Chris@17	395
Chris@17	396 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	397 echo "\t\t* found end of string *".PHP_EOL;
Chris@17	398 $content = Util\Common::prepareForOutput($buffer.$char);
Chris@17	399 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
Chris@17	400 }
Chris@17	401
Chris@17	402 $buffer = '';
Chris@17	403 $preStringBuffer = '';
Chris@17	404 $inString = '';
Chris@17	405 $stringChar = null;
Chris@17	406 continue;
Chris@17	407 }//end if
Chris@17	408 } else if ($inString === '') {
Chris@17	409 $inString = $char;
Chris@17	410 $stringChar = $i;
Chris@17	411 $preStringBuffer = $buffer;
Chris@17	412
Chris@17	413 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	414 echo "\t\t* looking for string closer *".PHP_EOL;
Chris@17	415 }
Chris@17	416 }//end if
Chris@17	417 }//end if
Chris@17	418
Chris@17	419 if ($inString !== '' && $char === "\n") {
Chris@17	420 // Unless this newline character is escaped, the string did not
Chris@17	421 // end before the end of the line, which means it probably
Chris@17	422 // wasn't a string at all (maybe a regex).
Chris@17	423 if ($chars[($i - 1)] !== '\\') {
Chris@17	424 $i = $stringChar;
Chris@17	425 $buffer = $preStringBuffer;
Chris@17	426 $preStringBuffer = '';
Chris@17	427 $inString = '';
Chris@17	428 $stringChar = null;
Chris@17	429 $char = $chars[$i];
Chris@17	430
Chris@17	431 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	432 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
Chris@17	433 }
Chris@17	434 }
Chris@17	435 }
Chris@17	436
Chris@17	437 $buffer .= $char;
Chris@17	438
Chris@17	439 // We don't look for special tokens inside strings,
Chris@17	440 // so if we are in a string, we can continue here now
Chris@17	441 // that the current char is in the buffer.
Chris@17	442 if ($inString !== '') {
Chris@17	443 continue;
Chris@17	444 }
Chris@17	445
Chris@17	446 // Special case for T_DIVIDE which can actually be
Chris@17	447 // the start of a regular expression.
Chris@17	448 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
Chris@18	449 $regex = $this->getRegexToken($i, $string, $chars, $tokens);
Chris@17	450 if ($regex !== null) {
Chris@17	451 $tokens[] = [
Chris@17	452 'code' => T_REGULAR_EXPRESSION,
Chris@17	453 'type' => 'T_REGULAR_EXPRESSION',
Chris@17	454 'content' => $regex['content'],
Chris@17	455 ];
Chris@17	456
Chris@17	457 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	458 $content = Util\Common::prepareForOutput($regex['content']);
Chris@17	459 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
Chris@17	460 }
Chris@17	461
Chris@17	462 $i = $regex['end'];
Chris@17	463 $buffer = '';
Chris@17	464 $cleanBuffer = false;
Chris@17	465 continue;
Chris@17	466 }//end if
Chris@17	467 }//end if
Chris@17	468
Chris@17	469 // Check for known tokens, but ignore tokens found that are not at
Chris@17	470 // the end of a string, like FOR and this.FORmat.
Chris@17	471 if (isset($this->tokenValues[strtolower($buffer)]) === true
Chris@17	472 && (preg_match('\|[a-zA-z0-9_]\|', $char) === 0
Chris@17	473 \|\| isset($chars[($i + 1)]) === false
Chris@17	474 \|\| preg_match('\|[a-zA-z0-9_]\|', $chars[($i + 1)]) === 0)
Chris@17	475 ) {
Chris@17	476 $matchedToken = false;
Chris@17	477 $lookAheadLength = ($maxTokenLength - strlen($buffer));
Chris@17	478
Chris@17	479 if ($lookAheadLength > 0) {
Chris@17	480 // The buffer contains a token type, but we need
Chris@17	481 // to look ahead at the next chars to see if this is
Chris@17	482 // actually part of a larger token. For example,
Chris@17	483 // FOR and FOREACH.
Chris@17	484 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	485 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
Chris@17	486 }
Chris@17	487
Chris@17	488 $charBuffer = $buffer;
Chris@17	489 for ($x = 1; $x <= $lookAheadLength; $x++) {
Chris@17	490 if (isset($chars[($i + $x)]) === false) {
Chris@17	491 break;
Chris@17	492 }
Chris@17	493
Chris@17	494 $charBuffer .= $chars[($i + $x)];
Chris@17	495
Chris@17	496 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	497 $content = Util\Common::prepareForOutput($charBuffer);
Chris@17	498 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17	499 }
Chris@17	500
Chris@17	501 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17	502 // We've found something larger that matches
Chris@17	503 // so we can ignore this char. Except for 1 very specific
Chris@17	504 // case where a comment like /**/ needs to tokenize as
Chris@17	505 // T_COMMENT and not T_DOC_COMMENT.
Chris@17	506 $oldType = $this->tokenValues[strtolower($buffer)];
Chris@17	507 $newType = $this->tokenValues[strtolower($charBuffer)];
Chris@17	508 if ($oldType === 'T_COMMENT'
Chris@17	509 && $newType === 'T_DOC_COMMENT'
Chris@17	510 && $chars[($i + $x + 1)] === '/'
Chris@17	511 ) {
Chris@17	512 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	513 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
Chris@17	514 }
Chris@17	515 } else {
Chris@17	516 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	517 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
Chris@17	518 }
Chris@17	519
Chris@17	520 $matchedToken = true;
Chris@17	521 break;
Chris@17	522 }
Chris@17	523 }//end if
Chris@17	524 }//end for
Chris@17	525 }//end if
Chris@17	526
Chris@17	527 if ($matchedToken === false) {
Chris@17	528 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
Chris@17	529 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17	530 }
Chris@17	531
Chris@17	532 $value = $this->tokenValues[strtolower($buffer)];
Chris@17	533
Chris@17	534 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
Chris@17	535 // The function keyword needs to be all lowercase or else
Chris@17	536 // it is just a function called "Function".
Chris@17	537 $value = 'T_STRING';
Chris@17	538 }
Chris@17	539
Chris@17	540 $tokens[] = [
Chris@17	541 'code' => constant($value),
Chris@17	542 'type' => $value,
Chris@17	543 'content' => $buffer,
Chris@17	544 ];
Chris@17	545
Chris@17	546 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	547 $content = Util\Common::prepareForOutput($buffer);
Chris@17	548 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17	549 }
Chris@17	550
Chris@17	551 $cleanBuffer = true;
Chris@17	552 }//end if
Chris@17	553 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
Chris@17	554 // No matter what token we end up using, we don't
Chris@17	555 // need the content in the buffer any more because we have
Chris@17	556 // found a valid token.
Chris@17	557 $newContent = substr(str_replace("\n", $this->eolChar, $buffer), 0, -1);
Chris@17	558 if ($newContent !== '') {
Chris@17	559 $tokens[] = [
Chris@17	560 'code' => T_STRING,
Chris@17	561 'type' => 'T_STRING',
Chris@17	562 'content' => $newContent,
Chris@17	563 ];
Chris@17	564
Chris@17	565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	566 $content = Util\Common::prepareForOutput(substr($buffer, 0, -1));
Chris@17	567 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17	568 }
Chris@17	569 }
Chris@17	570
Chris@17	571 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	572 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
Chris@17	573 }
Chris@17	574
Chris@17	575 // The char is a token type, but we need to look ahead at the
Chris@17	576 // next chars to see if this is actually part of a larger token.
Chris@17	577 // For example, = and ===.
Chris@17	578 $charBuffer = $char;
Chris@17	579 $matchedToken = false;
Chris@17	580 for ($x = 1; $x <= $maxTokenLength; $x++) {
Chris@17	581 if (isset($chars[($i + $x)]) === false) {
Chris@17	582 break;
Chris@17	583 }
Chris@17	584
Chris@17	585 $charBuffer .= $chars[($i + $x)];
Chris@17	586
Chris@17	587 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	588 $content = Util\Common::prepareForOutput($charBuffer);
Chris@17	589 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
Chris@17	590 }
Chris@17	591
Chris@17	592 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
Chris@17	593 // We've found something larger that matches
Chris@17	594 // so we can ignore this char.
Chris@17	595 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	596 $type = $this->tokenValues[strtolower($charBuffer)];
Chris@17	597 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
Chris@17	598 }
Chris@17	599
Chris@17	600 $matchedToken = true;
Chris@17	601 break;
Chris@17	602 }
Chris@17	603 }//end for
Chris@17	604
Chris@17	605 if ($matchedToken === false) {
Chris@17	606 $value = $this->tokenValues[strtolower($char)];
Chris@17	607 $tokens[] = [
Chris@17	608 'code' => constant($value),
Chris@17	609 'type' => $value,
Chris@17	610 'content' => $char,
Chris@17	611 ];
Chris@17	612
Chris@17	613 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	614 echo "\t\t* look ahead found nothing *".PHP_EOL;
Chris@17	615 $content = Util\Common::prepareForOutput($char);
Chris@17	616 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17	617 }
Chris@17	618
Chris@17	619 $cleanBuffer = true;
Chris@17	620 } else {
Chris@17	621 $buffer = $char;
Chris@17	622 }//end if
Chris@17	623 }//end if
Chris@17	624
Chris@17	625 // Keep track of content inside comments.
Chris@17	626 if ($inComment === ''
Chris@17	627 && array_key_exists($buffer, $this->commentTokens) === true
Chris@17	628 ) {
Chris@17	629 // This is not really a comment if the content
Chris@17	630 // looks like \// (i.e., it is escaped).
Chris@17	631 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
Chris@17	632 $lastToken = array_pop($tokens);
Chris@17	633 $lastContent = $lastToken['content'];
Chris@17	634 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	635 $value = $this->tokenValues[strtolower($lastContent)];
Chris@17	636 $content = Util\Common::prepareForOutput($lastContent);
Chris@17	637 echo "\t=> Removed token $value ($content)".PHP_EOL;
Chris@17	638 }
Chris@17	639
Chris@17	640 $lastChars = str_split($lastContent);
Chris@17	641 $lastNumChars = count($lastChars);
Chris@17	642 for ($x = 0; $x < $lastNumChars; $x++) {
Chris@17	643 $lastChar = $lastChars[$x];
Chris@17	644 $value = $this->tokenValues[strtolower($lastChar)];
Chris@17	645 $tokens[] = [
Chris@17	646 'code' => constant($value),
Chris@17	647 'type' => $value,
Chris@17	648 'content' => $lastChar,
Chris@17	649 ];
Chris@17	650
Chris@17	651 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	652 $content = Util\Common::prepareForOutput($lastChar);
Chris@17	653 echo "\t=> Added token $value ($content)".PHP_EOL;
Chris@17	654 }
Chris@17	655 }
Chris@17	656 } else {
Chris@17	657 // We have started a comment.
Chris@17	658 $inComment = $buffer;
Chris@17	659
Chris@17	660 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	661 echo "\t\t* looking for end of comment *".PHP_EOL;
Chris@17	662 }
Chris@17	663 }//end if
Chris@17	664 } else if ($inComment !== '') {
Chris@17	665 if ($this->commentTokens[$inComment] === null) {
Chris@17	666 // Comment ends at the next newline.
Chris@17	667 if (strpos($buffer, "\n") !== false) {
Chris@17	668 $inComment = '';
Chris@17	669 }
Chris@17	670 } else {
Chris@17	671 if ($this->commentTokens[$inComment] === $buffer) {
Chris@17	672 $inComment = '';
Chris@17	673 }
Chris@17	674 }
Chris@17	675
Chris@17	676 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	677 if ($inComment === '') {
Chris@17	678 echo "\t\t* found end of comment *".PHP_EOL;
Chris@17	679 }
Chris@17	680 }
Chris@17	681
Chris@17	682 if ($inComment === '' && $cleanBuffer === false) {
Chris@17	683 $tokens[] = [
Chris@17	684 'code' => T_STRING,
Chris@17	685 'type' => 'T_STRING',
Chris@17	686 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17	687 ];
Chris@17	688
Chris@17	689 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	690 $content = Util\Common::prepareForOutput($buffer);
Chris@17	691 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17	692 }
Chris@17	693
Chris@17	694 $buffer = '';
Chris@17	695 }
Chris@17	696 }//end if
Chris@17	697
Chris@17	698 if ($cleanBuffer === true) {
Chris@17	699 $buffer = '';
Chris@17	700 $cleanBuffer = false;
Chris@17	701 }
Chris@17	702 }//end for
Chris@17	703
Chris@17	704 if (empty($buffer) === false) {
Chris@17	705 if ($inString !== '') {
Chris@17	706 // The string did not end before the end of the file,
Chris@17	707 // which means there was probably a syntax error somewhere.
Chris@17	708 $tokens[] = [
Chris@17	709 'code' => T_STRING,
Chris@17	710 'type' => 'T_STRING',
Chris@17	711 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17	712 ];
Chris@17	713
Chris@17	714 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	715 $content = Util\Common::prepareForOutput($buffer);
Chris@17	716 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
Chris@17	717 }
Chris@17	718 } else {
Chris@17	719 // Buffer contains whitespace from the end of the file.
Chris@17	720 $tokens[] = [
Chris@17	721 'code' => T_WHITESPACE,
Chris@17	722 'type' => 'T_WHITESPACE',
Chris@17	723 'content' => str_replace("\n", $this->eolChar, $buffer),
Chris@17	724 ];
Chris@17	725
Chris@17	726 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	727 $content = Util\Common::prepareForOutput($buffer);
Chris@17	728 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
Chris@17	729 }
Chris@17	730 }//end if
Chris@17	731 }//end if
Chris@17	732
Chris@17	733 $tokens[] = [
Chris@17	734 'code' => T_CLOSE_TAG,
Chris@17	735 'type' => 'T_CLOSE_TAG',
Chris@17	736 'content' => '',
Chris@17	737 ];
Chris@17	738
Chris@17	739 /*
Chris@17	740 Now that we have done some basic tokenizing, we need to
Chris@17	741 modify the tokens to join some together and split some apart
Chris@17	742 so they match what the PHP tokenizer does.
Chris@17	743 */
Chris@17	744
Chris@17	745 $finalTokens = [];
Chris@17	746 $newStackPtr = 0;
Chris@17	747 $numTokens = count($tokens);
Chris@17	748 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
Chris@17	749 $token = $tokens[$stackPtr];
Chris@17	750
Chris@17	751 /*
Chris@17	752 Look for comments and join the tokens together.
Chris@17	753 */
Chris@17	754
Chris@17	755 if ($token['code'] === T_COMMENT \|\| $token['code'] === T_DOC_COMMENT) {
Chris@17	756 $newContent = '';
Chris@17	757 $tokenContent = $token['content'];
Chris@17	758
Chris@17	759 $endContent = null;
Chris@17	760 if (isset($this->commentTokens[$tokenContent]) === true) {
Chris@17	761 $endContent = $this->commentTokens[$tokenContent];
Chris@17	762 }
Chris@17	763
Chris@17	764 while ($tokenContent !== $endContent) {
Chris@17	765 if ($endContent === null
Chris@17	766 && strpos($tokenContent, $this->eolChar) !== false
Chris@17	767 ) {
Chris@17	768 // A null end token means the comment ends at the end of
Chris@17	769 // the line so we look for newlines and split the token.
Chris@17	770 $tokens[$stackPtr]['content'] = substr(
Chris@17	771 $tokenContent,
Chris@17	772 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17	773 );
Chris@17	774
Chris@17	775 $tokenContent = substr(
Chris@17	776 $tokenContent,
Chris@17	777 0,
Chris@17	778 (strpos($tokenContent, $this->eolChar) + strlen($this->eolChar))
Chris@17	779 );
Chris@17	780
Chris@17	781 // If the substr failed, skip the token as the content
Chris@17	782 // will now be blank.
Chris@17	783 if ($tokens[$stackPtr]['content'] !== false
Chris@17	784 && $tokens[$stackPtr]['content'] !== ''
Chris@17	785 ) {
Chris@17	786 $stackPtr--;
Chris@17	787 }
Chris@17	788
Chris@17	789 break;
Chris@17	790 }//end if
Chris@17	791
Chris@17	792 $stackPtr++;
Chris@17	793 $newContent .= $tokenContent;
Chris@17	794 if (isset($tokens[$stackPtr]) === false) {
Chris@17	795 break;
Chris@17	796 }
Chris@17	797
Chris@17	798 $tokenContent = $tokens[$stackPtr]['content'];
Chris@17	799 }//end while
Chris@17	800
Chris@17	801 if ($token['code'] === T_DOC_COMMENT) {
Chris@17	802 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $this->eolChar, $newStackPtr);
Chris@17	803 foreach ($commentTokens as $commentToken) {
Chris@17	804 $finalTokens[$newStackPtr] = $commentToken;
Chris@17	805 $newStackPtr++;
Chris@17	806 }
Chris@17	807
Chris@17	808 continue;
Chris@17	809 } else {
Chris@17	810 // Save the new content in the current token so
Chris@17	811 // the code below can chop it up on newlines.
Chris@17	812 $token['content'] = $newContent.$tokenContent;
Chris@17	813 }
Chris@17	814 }//end if
Chris@17	815
Chris@17	816 /*
Chris@17	817 If this token has newlines in its content, split each line up
Chris@17	818 and create a new token for each line. We do this so it's easier
Chris@17	819 to ascertain where errors occur on a line.
Chris@17	820 Note that $token[1] is the token's content.
Chris@17	821 */
Chris@17	822
Chris@17	823 if (strpos($token['content'], $this->eolChar) !== false) {
Chris@17	824 $tokenLines = explode($this->eolChar, $token['content']);
Chris@17	825 $numLines = count($tokenLines);
Chris@17	826
Chris@17	827 for ($i = 0; $i < $numLines; $i++) {
Chris@18	828 $newToken = ['content' => $tokenLines[$i]];
Chris@17	829 if ($i === ($numLines - 1)) {
Chris@17	830 if ($tokenLines[$i] === '') {
Chris@17	831 break;
Chris@17	832 }
Chris@17	833 } else {
Chris@17	834 $newToken['content'] .= $this->eolChar;
Chris@17	835 }
Chris@17	836
Chris@17	837 $newToken['type'] = $token['type'];
Chris@17	838 $newToken['code'] = $token['code'];
Chris@17	839 $finalTokens[$newStackPtr] = $newToken;
Chris@17	840 $newStackPtr++;
Chris@17	841 }
Chris@17	842 } else {
Chris@17	843 $finalTokens[$newStackPtr] = $token;
Chris@17	844 $newStackPtr++;
Chris@17	845 }//end if
Chris@17	846
Chris@17	847 // Convert numbers, including decimals.
Chris@17	848 if ($token['code'] === T_STRING
Chris@17	849 \|\| $token['code'] === T_OBJECT_OPERATOR
Chris@17	850 ) {
Chris@17	851 $newContent = '';
Chris@17	852 $oldStackPtr = $stackPtr;
Chris@17	853 while (preg_match('\|^[0-9\.]+$\|', $tokens[$stackPtr]['content']) !== 0) {
Chris@17	854 $newContent .= $tokens[$stackPtr]['content'];
Chris@17	855 $stackPtr++;
Chris@17	856 }
Chris@17	857
Chris@17	858 if ($newContent !== '' && $newContent !== '.') {
Chris@17	859 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
Chris@17	860 if (ctype_digit($newContent) === true) {
Chris@17	861 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
Chris@17	862 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
Chris@17	863 } else {
Chris@17	864 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
Chris@17	865 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
Chris@17	866 }
Chris@17	867
Chris@17	868 $stackPtr--;
Chris@17	869 continue;
Chris@17	870 } else {
Chris@17	871 $stackPtr = $oldStackPtr;
Chris@17	872 }
Chris@17	873 }//end if
Chris@17	874
Chris@17	875 // Convert the token after an object operator into a string, in most cases.
Chris@17	876 if ($token['code'] === T_OBJECT_OPERATOR) {
Chris@17	877 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
Chris@17	878 if (isset(Util\Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
Chris@17	879 continue;
Chris@17	880 }
Chris@17	881
Chris@17	882 if ($tokens[$i]['code'] !== T_PROTOTYPE
Chris@17	883 && $tokens[$i]['code'] !== T_LNUMBER
Chris@17	884 && $tokens[$i]['code'] !== T_DNUMBER
Chris@17	885 ) {
Chris@17	886 $tokens[$i]['code'] = T_STRING;
Chris@17	887 $tokens[$i]['type'] = 'T_STRING';
Chris@17	888 }
Chris@17	889
Chris@17	890 break;
Chris@17	891 }
Chris@17	892 }
Chris@17	893 }//end for
Chris@17	894
Chris@17	895 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	896 echo "\t* END TOKENIZING *".PHP_EOL;
Chris@17	897 }
Chris@17	898
Chris@17	899 return $finalTokens;
Chris@17	900
Chris@17	901 }//end tokenize()
Chris@17	902
Chris@17	903
Chris@17	904 /**
Chris@17	905 * Tokenizes a regular expression if one is found.
Chris@17	906 *
Chris@17	907 * If a regular expression is not found, NULL is returned.
Chris@17	908 *
Chris@17	909 * @param string $char The index of the possible regex start character.
Chris@17	910 * @param string $string The complete content of the string being tokenized.
Chris@17	911 * @param string $chars An array of characters being tokenized.
Chris@17	912 * @param string $tokens The current array of tokens found in the string.
Chris@17	913 *
Chris@18	914 * @return array<string, string>\|null
Chris@17	915 */
Chris@17	916 public function getRegexToken($char, $string, $chars, $tokens)
Chris@17	917 {
Chris@17	918 $beforeTokens = [
Chris@17	919 T_EQUAL => true,
Chris@17	920 T_IS_NOT_EQUAL => true,
Chris@17	921 T_IS_IDENTICAL => true,
Chris@17	922 T_IS_NOT_IDENTICAL => true,
Chris@17	923 T_OPEN_PARENTHESIS => true,
Chris@17	924 T_OPEN_SQUARE_BRACKET => true,
Chris@17	925 T_RETURN => true,
Chris@17	926 T_BOOLEAN_OR => true,
Chris@17	927 T_BOOLEAN_AND => true,
Chris@17	928 T_BOOLEAN_NOT => true,
Chris@17	929 T_BITWISE_OR => true,
Chris@17	930 T_BITWISE_AND => true,
Chris@17	931 T_COMMA => true,
Chris@17	932 T_COLON => true,
Chris@17	933 T_TYPEOF => true,
Chris@17	934 T_INLINE_THEN => true,
Chris@17	935 T_INLINE_ELSE => true,
Chris@17	936 ];
Chris@17	937
Chris@17	938 $afterTokens = [
Chris@17	939 ',' => true,
Chris@17	940 ')' => true,
Chris@17	941 ']' => true,
Chris@17	942 ';' => true,
Chris@17	943 ' ' => true,
Chris@17	944 '.' => true,
Chris@17	945 ':' => true,
Chris@17	946 $this->eolChar => true,
Chris@17	947 ];
Chris@17	948
Chris@17	949 // Find the last non-whitespace token that was added
Chris@17	950 // to the tokens array.
Chris@17	951 $numTokens = count($tokens);
Chris@17	952 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
Chris@17	953 if (isset(Util\Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
Chris@17	954 break;
Chris@17	955 }
Chris@17	956 }
Chris@17	957
Chris@17	958 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
Chris@17	959 return null;
Chris@17	960 }
Chris@17	961
Chris@17	962 // This is probably a regular expression, so look for the end of it.
Chris@17	963 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	964 echo "\t* token possibly starts a regular expression *".PHP_EOL;
Chris@17	965 }
Chris@17	966
Chris@17	967 $numChars = count($chars);
Chris@17	968 for ($next = ($char + 1); $next < $numChars; $next++) {
Chris@17	969 if ($chars[$next] === '/') {
Chris@17	970 // Just make sure this is not escaped first.
Chris@17	971 if ($chars[($next - 1)] !== '\\') {
Chris@17	972 // In the simple form: /.../ so we found the end.
Chris@17	973 break;
Chris@17	974 } else if ($chars[($next - 2)] === '\\') {
Chris@17	975 // In the form: /...\\/ so we found the end.
Chris@17	976 break;
Chris@17	977 }
Chris@17	978 } else {
Chris@17	979 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17	980 if ($possibleEolChar === $this->eolChar) {
Chris@17	981 // This is the last token on the line and regular
Chris@17	982 // expressions need to be defined on a single line,
Chris@17	983 // so this is not a regular expression.
Chris@17	984 break;
Chris@17	985 }
Chris@17	986 }
Chris@17	987 }
Chris@17	988
Chris@17	989 if ($chars[$next] !== '/') {
Chris@17	990 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	991 echo "\t* could not find end of regular expression *".PHP_EOL;
Chris@17	992 }
Chris@17	993
Chris@17	994 return null;
Chris@17	995 }
Chris@17	996
Chris@17	997 while (preg_match('\|[a-zA-Z]\|', $chars[($next + 1)]) !== 0) {
Chris@17	998 // The token directly after the end of the regex can
Chris@17	999 // be modifiers like global and case insensitive
Chris@17	1000 // (.e.g, /pattern/gi).
Chris@17	1001 $next++;
Chris@17	1002 }
Chris@17	1003
Chris@17	1004 $regexEnd = $next;
Chris@17	1005 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1006 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
Chris@17	1007 }
Chris@17	1008
Chris@17	1009 for ($next += 1; $next < $numChars; $next++) {
Chris@17	1010 if ($chars[$next] !== ' ') {
Chris@17	1011 break;
Chris@17	1012 } else {
Chris@17	1013 $possibleEolChar = substr($string, $next, strlen($this->eolChar));
Chris@17	1014 if ($possibleEolChar === $this->eolChar) {
Chris@17	1015 // This is the last token on the line.
Chris@17	1016 break;
Chris@17	1017 }
Chris@17	1018 }
Chris@17	1019 }
Chris@17	1020
Chris@17	1021 if (isset($afterTokens[$chars[$next]]) === false) {
Chris@17	1022 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1023 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
Chris@17	1024 }
Chris@17	1025
Chris@17	1026 return null;
Chris@17	1027 }
Chris@17	1028
Chris@17	1029 // This is a regular expression, so join all the tokens together.
Chris@17	1030 $content = '';
Chris@17	1031 for ($x = $char; $x <= $regexEnd; $x++) {
Chris@17	1032 $content .= $chars[$x];
Chris@17	1033 }
Chris@17	1034
Chris@17	1035 $token = [
Chris@17	1036 'start' => $char,
Chris@17	1037 'end' => $regexEnd,
Chris@17	1038 'content' => $content,
Chris@17	1039 ];
Chris@17	1040
Chris@17	1041 return $token;
Chris@17	1042
Chris@17	1043 }//end getRegexToken()
Chris@17	1044
Chris@17	1045
Chris@17	1046 /**
Chris@17	1047 * Performs additional processing after main tokenizing.
Chris@17	1048 *
Chris@17	1049 * This additional processing looks for properties, closures, labels and objects.
Chris@17	1050 *
Chris@17	1051 * @return void
Chris@17	1052 */
Chris@17	1053 public function processAdditional()
Chris@17	1054 {
Chris@17	1055 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1056 echo "\t* START ADDITIONAL JS PROCESSING *".PHP_EOL;
Chris@17	1057 }
Chris@17	1058
Chris@17	1059 $numTokens = count($this->tokens);
Chris@17	1060 $classStack = [];
Chris@17	1061
Chris@17	1062 for ($i = 0; $i < $numTokens; $i++) {
Chris@17	1063 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1064 $type = $this->tokens[$i]['type'];
Chris@17	1065 $content = Util\Common::prepareForOutput($this->tokens[$i]['content']);
Chris@17	1066
Chris@17	1067 echo str_repeat("\t", count($classStack));
Chris@17	1068 echo "\tProcess token $i: $type => $content".PHP_EOL;
Chris@17	1069 }
Chris@17	1070
Chris@17	1071 // Looking for functions that are actually closures.
Chris@17	1072 if ($this->tokens[$i]['code'] === T_FUNCTION && isset($this->tokens[$i]['scope_opener']) === true) {
Chris@17	1073 for ($x = ($i + 1); $x < $numTokens; $x++) {
Chris@17	1074 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$x]['code']]) === false) {
Chris@17	1075 break;
Chris@17	1076 }
Chris@17	1077 }
Chris@17	1078
Chris@17	1079 if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
Chris@17	1080 $this->tokens[$i]['code'] = T_CLOSURE;
Chris@17	1081 $this->tokens[$i]['type'] = 'T_CLOSURE';
Chris@17	1082 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1083 $line = $this->tokens[$i]['line'];
Chris@17	1084 echo str_repeat("\t", count($classStack));
Chris@17	1085 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE *".PHP_EOL;
Chris@17	1086 }
Chris@17	1087
Chris@17	1088 for ($x = ($this->tokens[$i]['scope_opener'] + 1); $x < $this->tokens[$i]['scope_closer']; $x++) {
Chris@17	1089 if (isset($this->tokens[$x]['conditions'][$i]) === false) {
Chris@17	1090 continue;
Chris@17	1091 }
Chris@17	1092
Chris@17	1093 $this->tokens[$x]['conditions'][$i] = T_CLOSURE;
Chris@17	1094 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1095 $type = $this->tokens[$x]['type'];
Chris@17	1096 echo str_repeat("\t", count($classStack));
Chris@17	1097 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
Chris@17	1098 }
Chris@17	1099 }
Chris@17	1100 }//end if
Chris@17	1101
Chris@17	1102 continue;
Chris@17	1103 } else if ($this->tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
Chris@17	1104 && isset($this->tokens[$i]['scope_condition']) === false
Chris@17	1105 && isset($this->tokens[$i]['bracket_closer']) === true
Chris@17	1106 ) {
Chris@18	1107 $condition = $this->tokens[$i]['conditions'];
Chris@18	1108 $condition = end($condition);
Chris@17	1109 if ($condition === T_CLASS) {
Chris@17	1110 // Possibly an ES6 method. To be classified as one, the previous
Chris@17	1111 // non-empty tokens need to be a set of parenthesis, and then a string
Chris@17	1112 // (the method name).
Chris@17	1113 for ($parenCloser = ($i - 1); $parenCloser > 0; $parenCloser--) {
Chris@17	1114 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$parenCloser]['code']]) === false) {
Chris@17	1115 break;
Chris@17	1116 }
Chris@17	1117 }
Chris@17	1118
Chris@17	1119 if ($this->tokens[$parenCloser]['code'] === T_CLOSE_PARENTHESIS) {
Chris@17	1120 $parenOpener = $this->tokens[$parenCloser]['parenthesis_opener'];
Chris@17	1121 for ($name = ($parenOpener - 1); $name > 0; $name--) {
Chris@17	1122 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$name]['code']]) === false) {
Chris@17	1123 break;
Chris@17	1124 }
Chris@17	1125 }
Chris@17	1126
Chris@17	1127 if ($this->tokens[$name]['code'] === T_STRING) {
Chris@17	1128 // We found a method name.
Chris@17	1129 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1130 $line = $this->tokens[$name]['line'];
Chris@17	1131 echo str_repeat("\t", count($classStack));
Chris@17	1132 echo "\t* token $name on line $line changed from T_STRING to T_FUNCTION *".PHP_EOL;
Chris@17	1133 }
Chris@17	1134
Chris@17	1135 $closer = $this->tokens[$i]['bracket_closer'];
Chris@17	1136
Chris@17	1137 $this->tokens[$name]['code'] = T_FUNCTION;
Chris@17	1138 $this->tokens[$name]['type'] = 'T_FUNCTION';
Chris@17	1139
Chris@17	1140 foreach ([$name, $i, $closer] as $token) {
Chris@17	1141 $this->tokens[$token]['scope_condition'] = $name;
Chris@17	1142 $this->tokens[$token]['scope_opener'] = $i;
Chris@17	1143 $this->tokens[$token]['scope_closer'] = $closer;
Chris@17	1144 $this->tokens[$token]['parenthesis_opener'] = $parenOpener;
Chris@17	1145 $this->tokens[$token]['parenthesis_closer'] = $parenCloser;
Chris@17	1146 $this->tokens[$token]['parenthesis_owner'] = $name;
Chris@17	1147 }
Chris@17	1148
Chris@17	1149 $this->tokens[$parenOpener]['parenthesis_owner'] = $name;
Chris@17	1150 $this->tokens[$parenCloser]['parenthesis_owner'] = $name;
Chris@17	1151
Chris@17	1152 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17	1153 $this->tokens[$x]['conditions'][$name] = T_FUNCTION;
Chris@17	1154 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17	1155 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1156 $type = $this->tokens[$x]['type'];
Chris@17	1157 echo str_repeat("\t", count($classStack));
Chris@17	1158 echo "\t\t* added T_FUNCTION condition to $x ($type) *".PHP_EOL;
Chris@17	1159 }
Chris@17	1160 }
Chris@17	1161
Chris@17	1162 continue;
Chris@17	1163 }//end if
Chris@17	1164 }//end if
Chris@17	1165 }//end if
Chris@17	1166
Chris@17	1167 $classStack[] = $i;
Chris@17	1168
Chris@17	1169 $closer = $this->tokens[$i]['bracket_closer'];
Chris@17	1170 $this->tokens[$i]['code'] = T_OBJECT;
Chris@17	1171 $this->tokens[$i]['type'] = 'T_OBJECT';
Chris@17	1172 $this->tokens[$closer]['code'] = T_CLOSE_OBJECT;
Chris@17	1173 $this->tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
Chris@17	1174
Chris@17	1175 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1176 echo str_repeat("\t", count($classStack));
Chris@17	1177 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
Chris@17	1178 echo str_repeat("\t", count($classStack));
Chris@17	1179 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
Chris@17	1180 }
Chris@17	1181
Chris@17	1182 for ($x = ($i + 1); $x < $closer; $x++) {
Chris@17	1183 $this->tokens[$x]['conditions'][$i] = T_OBJECT;
Chris@17	1184 ksort($this->tokens[$x]['conditions'], SORT_NUMERIC);
Chris@17	1185 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1186 $type = $this->tokens[$x]['type'];
Chris@17	1187 echo str_repeat("\t", count($classStack));
Chris@17	1188 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
Chris@17	1189 }
Chris@17	1190 }
Chris@17	1191 } else if ($this->tokens[$i]['code'] === T_CLOSE_OBJECT) {
Chris@17	1192 $opener = array_pop($classStack);
Chris@17	1193 } else if ($this->tokens[$i]['code'] === T_COLON) {
Chris@17	1194 // If it is a scope opener, it belongs to a
Chris@17	1195 // DEFAULT or CASE statement.
Chris@17	1196 if (isset($this->tokens[$i]['scope_condition']) === true) {
Chris@17	1197 continue;
Chris@17	1198 }
Chris@17	1199
Chris@17	1200 // Make sure this is not part of an inline IF statement.
Chris@17	1201 for ($x = ($i - 1); $x >= 0; $x--) {
Chris@17	1202 if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
Chris@17	1203 $this->tokens[$i]['code'] = T_INLINE_ELSE;
Chris@17	1204 $this->tokens[$i]['type'] = 'T_INLINE_ELSE';
Chris@17	1205
Chris@17	1206 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1207 echo str_repeat("\t", count($classStack));
Chris@17	1208 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
Chris@17	1209 }
Chris@17	1210
Chris@17	1211 continue(2);
Chris@17	1212 } else if ($this->tokens[$x]['line'] < $this->tokens[$i]['line']) {
Chris@17	1213 break;
Chris@17	1214 }
Chris@17	1215 }
Chris@17	1216
Chris@17	1217 // The string to the left of the colon is either a property or label.
Chris@17	1218 for ($label = ($i - 1); $label >= 0; $label--) {
Chris@17	1219 if (isset(Util\Tokens::$emptyTokens[$this->tokens[$label]['code']]) === false) {
Chris@17	1220 break;
Chris@17	1221 }
Chris@17	1222 }
Chris@17	1223
Chris@17	1224 if ($this->tokens[$label]['code'] !== T_STRING
Chris@17	1225 && $this->tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
Chris@17	1226 ) {
Chris@17	1227 continue;
Chris@17	1228 }
Chris@17	1229
Chris@17	1230 if (empty($classStack) === false) {
Chris@17	1231 $this->tokens[$label]['code'] = T_PROPERTY;
Chris@17	1232 $this->tokens[$label]['type'] = 'T_PROPERTY';
Chris@17	1233
Chris@17	1234 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1235 echo str_repeat("\t", count($classStack));
Chris@17	1236 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
Chris@17	1237 }
Chris@17	1238 } else {
Chris@17	1239 $this->tokens[$label]['code'] = T_LABEL;
Chris@17	1240 $this->tokens[$label]['type'] = 'T_LABEL';
Chris@17	1241
Chris@17	1242 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1243 echo str_repeat("\t", count($classStack));
Chris@17	1244 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
Chris@17	1245 }
Chris@17	1246 }//end if
Chris@17	1247 }//end if
Chris@17	1248 }//end for
Chris@17	1249
Chris@17	1250 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17	1251 echo "\t* END ADDITIONAL JS PROCESSING *".PHP_EOL;
Chris@17	1252 }
Chris@17	1253
Chris@17	1254 }//end processAdditional()
Chris@17	1255
Chris@17	1256
Chris@17	1257 }//end class

Mercurial > hg > isophonics-drupal-site

annotate vendor/squizlabs/php_codesniffer/src/Tokenizers/JS.php @ 19:fa3358dc1485 tip