comparison vendor/squizlabs/php_codesniffer/CodeSniffer/Tokenizers/JS.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2 /**
3 * Tokenizes JS code.
4 *
5 * PHP version 5
6 *
7 * @category PHP
8 * @package PHP_CodeSniffer
9 * @author Greg Sherwood <gsherwood@squiz.net>
10 * @author Marc McIntyre <mmcintyre@squiz.net>
11 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
12 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
13 * @link http://pear.php.net/package/PHP_CodeSniffer
14 */
15
16 /**
17 * Tokenizes JS code.
18 *
19 * @category PHP
20 * @package PHP_CodeSniffer
21 * @author Greg Sherwood <gsherwood@squiz.net>
22 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
23 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
24 * @version Release: @package_version@
25 * @link http://pear.php.net/package/PHP_CodeSniffer
26 */
27 class PHP_CodeSniffer_Tokenizers_JS
28 {
29
30 /**
31 * If TRUE, files that appear to be minified will not be processed.
32 *
33 * @var boolean
34 */
35 public $skipMinified = true;
36
37 /**
38 * A list of tokens that are allowed to open a scope.
39 *
40 * This array also contains information about what kind of token the scope
41 * opener uses to open and close the scope, if the token strictly requires
42 * an opener, if the token can share a scope closer, and who it can be shared
43 * with. An example of a token that shares a scope closer is a CASE scope.
44 *
45 * @var array
46 */
47 public $scopeOpeners = array(
48 T_IF => array(
49 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
50 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
51 'strict' => false,
52 'shared' => false,
53 'with' => array(),
54 ),
55 T_TRY => array(
56 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
57 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
58 'strict' => true,
59 'shared' => false,
60 'with' => array(),
61 ),
62 T_CATCH => array(
63 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
64 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
65 'strict' => true,
66 'shared' => false,
67 'with' => array(),
68 ),
69 T_ELSE => array(
70 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
71 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
72 'strict' => false,
73 'shared' => false,
74 'with' => array(),
75 ),
76 T_FOR => array(
77 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
78 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
79 'strict' => false,
80 'shared' => false,
81 'with' => array(),
82 ),
83 T_FUNCTION => array(
84 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
85 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
86 'strict' => false,
87 'shared' => false,
88 'with' => array(),
89 ),
90 T_WHILE => array(
91 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
92 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
93 'strict' => false,
94 'shared' => false,
95 'with' => array(),
96 ),
97 T_DO => array(
98 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
99 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
100 'strict' => true,
101 'shared' => false,
102 'with' => array(),
103 ),
104 T_SWITCH => array(
105 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
106 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
107 'strict' => true,
108 'shared' => false,
109 'with' => array(),
110 ),
111 T_CASE => array(
112 'start' => array(T_COLON => T_COLON),
113 'end' => array(
114 T_BREAK => T_BREAK,
115 T_RETURN => T_RETURN,
116 T_CONTINUE => T_CONTINUE,
117 T_THROW => T_THROW,
118 ),
119 'strict' => true,
120 'shared' => true,
121 'with' => array(
122 T_DEFAULT => T_DEFAULT,
123 T_CASE => T_CASE,
124 T_SWITCH => T_SWITCH,
125 ),
126 ),
127 T_DEFAULT => array(
128 'start' => array(T_COLON => T_COLON),
129 'end' => array(
130 T_BREAK => T_BREAK,
131 T_RETURN => T_RETURN,
132 T_CONTINUE => T_CONTINUE,
133 T_THROW => T_THROW,
134 ),
135 'strict' => true,
136 'shared' => true,
137 'with' => array(
138 T_CASE => T_CASE,
139 T_SWITCH => T_SWITCH,
140 ),
141 ),
142 );
143
144 /**
145 * A list of tokens that end the scope.
146 *
147 * This array is just a unique collection of the end tokens
148 * from the _scopeOpeners array. The data is duplicated here to
149 * save time during parsing of the file.
150 *
151 * @var array
152 */
153 public $endScopeTokens = array(
154 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
155 T_BREAK => T_BREAK,
156 );
157
158 /**
159 * A list of special JS tokens and their types.
160 *
161 * @var array
162 */
163 protected $tokenValues = array(
164 'function' => 'T_FUNCTION',
165 'prototype' => 'T_PROTOTYPE',
166 'try' => 'T_TRY',
167 'catch' => 'T_CATCH',
168 'return' => 'T_RETURN',
169 'throw' => 'T_THROW',
170 'break' => 'T_BREAK',
171 'switch' => 'T_SWITCH',
172 'continue' => 'T_CONTINUE',
173 'if' => 'T_IF',
174 'else' => 'T_ELSE',
175 'do' => 'T_DO',
176 'while' => 'T_WHILE',
177 'for' => 'T_FOR',
178 'var' => 'T_VAR',
179 'case' => 'T_CASE',
180 'default' => 'T_DEFAULT',
181 'true' => 'T_TRUE',
182 'false' => 'T_FALSE',
183 'null' => 'T_NULL',
184 'this' => 'T_THIS',
185 'typeof' => 'T_TYPEOF',
186 '(' => 'T_OPEN_PARENTHESIS',
187 ')' => 'T_CLOSE_PARENTHESIS',
188 '{' => 'T_OPEN_CURLY_BRACKET',
189 '}' => 'T_CLOSE_CURLY_BRACKET',
190 '[' => 'T_OPEN_SQUARE_BRACKET',
191 ']' => 'T_CLOSE_SQUARE_BRACKET',
192 '?' => 'T_INLINE_THEN',
193 '.' => 'T_OBJECT_OPERATOR',
194 '+' => 'T_PLUS',
195 '-' => 'T_MINUS',
196 '*' => 'T_MULTIPLY',
197 '%' => 'T_MODULUS',
198 '/' => 'T_DIVIDE',
199 '^' => 'T_LOGICAL_XOR',
200 ',' => 'T_COMMA',
201 ';' => 'T_SEMICOLON',
202 ':' => 'T_COLON',
203 '<' => 'T_LESS_THAN',
204 '>' => 'T_GREATER_THAN',
205 '<<' => 'T_SL',
206 '>>' => 'T_SR',
207 '>>>' => 'T_ZSR',
208 '<<=' => 'T_SL_EQUAL',
209 '>>=' => 'T_SR_EQUAL',
210 '>>>=' => 'T_ZSR_EQUAL',
211 '<=' => 'T_IS_SMALLER_OR_EQUAL',
212 '>=' => 'T_IS_GREATER_OR_EQUAL',
213 '=>' => 'T_DOUBLE_ARROW',
214 '!' => 'T_BOOLEAN_NOT',
215 '||' => 'T_BOOLEAN_OR',
216 '&&' => 'T_BOOLEAN_AND',
217 '|' => 'T_BITWISE_OR',
218 '&' => 'T_BITWISE_AND',
219 '!=' => 'T_IS_NOT_EQUAL',
220 '!==' => 'T_IS_NOT_IDENTICAL',
221 '=' => 'T_EQUAL',
222 '==' => 'T_IS_EQUAL',
223 '===' => 'T_IS_IDENTICAL',
224 '-=' => 'T_MINUS_EQUAL',
225 '+=' => 'T_PLUS_EQUAL',
226 '*=' => 'T_MUL_EQUAL',
227 '/=' => 'T_DIV_EQUAL',
228 '%=' => 'T_MOD_EQUAL',
229 '++' => 'T_INC',
230 '--' => 'T_DEC',
231 '//' => 'T_COMMENT',
232 '/*' => 'T_COMMENT',
233 '/**' => 'T_DOC_COMMENT',
234 '*/' => 'T_COMMENT',
235 );
236
237 /**
238 * A list string delimiters.
239 *
240 * @var array
241 */
242 protected $stringTokens = array(
243 '\'' => '\'',
244 '"' => '"',
245 );
246
247 /**
248 * A list tokens that start and end comments.
249 *
250 * @var array
251 */
252 protected $commentTokens = array(
253 '//' => null,
254 '/*' => '*/',
255 '/**' => '*/',
256 );
257
258
259 /**
260 * Creates an array of tokens when given some JS code.
261 *
262 * @param string $string The string to tokenize.
263 * @param string $eolChar The EOL character to use for splitting strings.
264 *
265 * @return array
266 */
267 public function tokenizeString($string, $eolChar='\n')
268 {
269 if (PHP_CODESNIFFER_VERBOSITY > 1) {
270 echo "\t*** START JS TOKENIZING ***".PHP_EOL;
271 }
272
273 $maxTokenLength = 0;
274 foreach ($this->tokenValues as $token => $values) {
275 if (strlen($token) > $maxTokenLength) {
276 $maxTokenLength = strlen($token);
277 }
278 }
279
280 $tokens = array();
281 $inString = '';
282 $stringChar = null;
283 $inComment = '';
284 $buffer = '';
285 $preStringBuffer = '';
286 $cleanBuffer = false;
287
288 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
289
290 $tokens[] = array(
291 'code' => T_OPEN_TAG,
292 'type' => 'T_OPEN_TAG',
293 'content' => '',
294 );
295
296 // Convert newlines to single characters for ease of
297 // processing. We will change them back later.
298 $string = str_replace($eolChar, "\n", $string);
299
300 $chars = str_split($string);
301 $numChars = count($chars);
302 for ($i = 0; $i < $numChars; $i++) {
303 $char = $chars[$i];
304
305 if (PHP_CODESNIFFER_VERBOSITY > 1) {
306 $content = PHP_CodeSniffer::prepareForOutput($char);
307 $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
308
309 if ($inString !== '') {
310 echo "\t";
311 }
312
313 if ($inComment !== '') {
314 echo "\t";
315 }
316
317 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
318 }//end if
319
320 if ($inString === '' && $inComment === '' && $buffer !== '') {
321 // If the buffer only has whitespace and we are about to
322 // add a character, store the whitespace first.
323 if (trim($char) !== '' && trim($buffer) === '') {
324 $tokens[] = array(
325 'code' => T_WHITESPACE,
326 'type' => 'T_WHITESPACE',
327 'content' => str_replace("\n", $eolChar, $buffer),
328 );
329
330 if (PHP_CODESNIFFER_VERBOSITY > 1) {
331 $content = PHP_CodeSniffer::prepareForOutput($buffer);
332 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
333 }
334
335 $buffer = '';
336 }
337
338 // If the buffer is not whitespace and we are about to
339 // add a whitespace character, store the content first.
340 if ($inString === ''
341 && $inComment === ''
342 && trim($char) === ''
343 && trim($buffer) !== ''
344 ) {
345 $tokens[] = array(
346 'code' => T_STRING,
347 'type' => 'T_STRING',
348 'content' => str_replace("\n", $eolChar, $buffer),
349 );
350
351 if (PHP_CODESNIFFER_VERBOSITY > 1) {
352 $content = PHP_CodeSniffer::prepareForOutput($buffer);
353 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
354 }
355
356 $buffer = '';
357 }
358 }//end if
359
360 // Process strings.
361 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
362 if ($inString === $char) {
363 // This could be the end of the string, but make sure it
364 // is not escaped first.
365 $escapes = 0;
366 for ($x = ($i - 1); $x >= 0; $x--) {
367 if ($chars[$x] !== '\\') {
368 break;
369 }
370
371 $escapes++;
372 }
373
374 if ($escapes === 0 || ($escapes % 2) === 0) {
375 // There is an even number escape chars,
376 // so this is not escaped, it is the end of the string.
377 $tokens[] = array(
378 'code' => T_CONSTANT_ENCAPSED_STRING,
379 'type' => 'T_CONSTANT_ENCAPSED_STRING',
380 'content' => str_replace("\n", $eolChar, $buffer).$char,
381 );
382
383 if (PHP_CODESNIFFER_VERBOSITY > 1) {
384 echo "\t\t* found end of string *".PHP_EOL;
385 $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
386 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
387 }
388
389 $buffer = '';
390 $preStringBuffer = '';
391 $inString = '';
392 $stringChar = null;
393 continue;
394 }//end if
395 } else if ($inString === '') {
396 $inString = $char;
397 $stringChar = $i;
398 $preStringBuffer = $buffer;
399
400 if (PHP_CODESNIFFER_VERBOSITY > 1) {
401 echo "\t\t* looking for string closer *".PHP_EOL;
402 }
403 }//end if
404 }//end if
405
406 if ($inString !== '' && $char === "\n") {
407 // Unless this newline character is escaped, the string did not
408 // end before the end of the line, which means it probably
409 // wasn't a string at all (maybe a regex).
410 if ($chars[($i - 1)] !== '\\') {
411 $i = $stringChar;
412 $buffer = $preStringBuffer;
413 $preStringBuffer = '';
414 $inString = '';
415 $stringChar = null;
416 $char = $chars[$i];
417
418 if (PHP_CODESNIFFER_VERBOSITY > 1) {
419 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
420 }
421 }
422 }
423
424 $buffer .= $char;
425
426 // We don't look for special tokens inside strings,
427 // so if we are in a string, we can continue here now
428 // that the current char is in the buffer.
429 if ($inString !== '') {
430 continue;
431 }
432
433 // Special case for T_DIVIDE which can actually be
434 // the start of a regular expression.
435 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
436 $regex = $this->getRegexToken(
437 $i,
438 $string,
439 $chars,
440 $tokens,
441 $eolChar
442 );
443
444 if ($regex !== null) {
445 $tokens[] = array(
446 'code' => T_REGULAR_EXPRESSION,
447 'type' => 'T_REGULAR_EXPRESSION',
448 'content' => $regex['content'],
449 );
450
451 if (PHP_CODESNIFFER_VERBOSITY > 1) {
452 $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
453 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
454 }
455
456 $i = $regex['end'];
457 $buffer = '';
458 $cleanBuffer = false;
459 continue;
460 }//end if
461 }//end if
462
463 // Check for known tokens, but ignore tokens found that are not at
464 // the end of a string, like FOR and this.FORmat.
465 if (isset($this->tokenValues[strtolower($buffer)]) === true
466 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
467 || isset($chars[($i + 1)]) === false
468 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
469 ) {
470 $matchedToken = false;
471 $lookAheadLength = ($maxTokenLength - strlen($buffer));
472
473 if ($lookAheadLength > 0) {
474 // The buffer contains a token type, but we need
475 // to look ahead at the next chars to see if this is
476 // actually part of a larger token. For example,
477 // FOR and FOREACH.
478 if (PHP_CODESNIFFER_VERBOSITY > 1) {
479 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
480 }
481
482 $charBuffer = $buffer;
483 for ($x = 1; $x <= $lookAheadLength; $x++) {
484 if (isset($chars[($i + $x)]) === false) {
485 break;
486 }
487
488 $charBuffer .= $chars[($i + $x)];
489
490 if (PHP_CODESNIFFER_VERBOSITY > 1) {
491 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
492 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
493 }
494
495 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
496 // We've found something larger that matches
497 // so we can ignore this char. Except for 1 very specific
498 // case where a comment like /**/ needs to tokenize as
499 // T_COMMENT and not T_DOC_COMMENT.
500 $oldType = $this->tokenValues[strtolower($buffer)];
501 $newType = $this->tokenValues[strtolower($charBuffer)];
502 if ($oldType === 'T_COMMENT'
503 && $newType === 'T_DOC_COMMENT'
504 && $chars[($i + $x + 1)] === '/'
505 ) {
506 if (PHP_CODESNIFFER_VERBOSITY > 1) {
507 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
508 }
509 } else {
510 if (PHP_CODESNIFFER_VERBOSITY > 1) {
511 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
512 }
513
514 $matchedToken = true;
515 break;
516 }
517 }//end if
518 }//end for
519 }//end if
520
521 if ($matchedToken === false) {
522 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
523 echo "\t\t* look ahead found nothing *".PHP_EOL;
524 }
525
526 $value = $this->tokenValues[strtolower($buffer)];
527
528 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
529 // The function keyword needs to be all lowercase or else
530 // it is just a function called "Function".
531 $value = 'T_STRING';
532 }
533
534 $tokens[] = array(
535 'code' => constant($value),
536 'type' => $value,
537 'content' => $buffer,
538 );
539
540 if (PHP_CODESNIFFER_VERBOSITY > 1) {
541 $content = PHP_CodeSniffer::prepareForOutput($buffer);
542 echo "\t=> Added token $value ($content)".PHP_EOL;
543 }
544
545 $cleanBuffer = true;
546 }//end if
547 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
548 // No matter what token we end up using, we don't
549 // need the content in the buffer any more because we have
550 // found a valid token.
551 $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
552 if ($newContent !== '') {
553 $tokens[] = array(
554 'code' => T_STRING,
555 'type' => 'T_STRING',
556 'content' => $newContent,
557 );
558
559 if (PHP_CODESNIFFER_VERBOSITY > 1) {
560 $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
561 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
562 }
563 }
564
565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
566 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
567 }
568
569 // The char is a token type, but we need to look ahead at the
570 // next chars to see if this is actually part of a larger token.
571 // For example, = and ===.
572 $charBuffer = $char;
573 $matchedToken = false;
574 for ($x = 1; $x <= $maxTokenLength; $x++) {
575 if (isset($chars[($i + $x)]) === false) {
576 break;
577 }
578
579 $charBuffer .= $chars[($i + $x)];
580
581 if (PHP_CODESNIFFER_VERBOSITY > 1) {
582 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
583 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
584 }
585
586 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
587 // We've found something larger that matches
588 // so we can ignore this char.
589 if (PHP_CODESNIFFER_VERBOSITY > 1) {
590 $type = $this->tokenValues[strtolower($charBuffer)];
591 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
592 }
593
594 $matchedToken = true;
595 break;
596 }
597 }//end for
598
599 if ($matchedToken === false) {
600 $value = $this->tokenValues[strtolower($char)];
601 $tokens[] = array(
602 'code' => constant($value),
603 'type' => $value,
604 'content' => $char,
605 );
606
607 if (PHP_CODESNIFFER_VERBOSITY > 1) {
608 echo "\t\t* look ahead found nothing *".PHP_EOL;
609 $content = PHP_CodeSniffer::prepareForOutput($char);
610 echo "\t=> Added token $value ($content)".PHP_EOL;
611 }
612
613 $cleanBuffer = true;
614 } else {
615 $buffer = $char;
616 }//end if
617 }//end if
618
619 // Keep track of content inside comments.
620 if ($inComment === ''
621 && array_key_exists($buffer, $this->commentTokens) === true
622 ) {
623 // This is not really a comment if the content
624 // looks like \// (i.e., it is escaped).
625 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
626 $lastToken = array_pop($tokens);
627 $lastContent = $lastToken['content'];
628 if (PHP_CODESNIFFER_VERBOSITY > 1) {
629 $value = $this->tokenValues[strtolower($lastContent)];
630 $content = PHP_CodeSniffer::prepareForOutput($lastContent);
631 echo "\t=> Removed token $value ($content)".PHP_EOL;
632 }
633
634 $lastChars = str_split($lastContent);
635 $lastNumChars = count($lastChars);
636 for ($x = 0; $x < $lastNumChars; $x++) {
637 $lastChar = $lastChars[$x];
638 $value = $this->tokenValues[strtolower($lastChar)];
639 $tokens[] = array(
640 'code' => constant($value),
641 'type' => $value,
642 'content' => $lastChar,
643 );
644
645 if (PHP_CODESNIFFER_VERBOSITY > 1) {
646 $content = PHP_CodeSniffer::prepareForOutput($lastChar);
647 echo "\t=> Added token $value ($content)".PHP_EOL;
648 }
649 }
650 } else {
651 // We have started a comment.
652 $inComment = $buffer;
653
654 if (PHP_CODESNIFFER_VERBOSITY > 1) {
655 echo "\t\t* looking for end of comment *".PHP_EOL;
656 }
657 }//end if
658 } else if ($inComment !== '') {
659 if ($this->commentTokens[$inComment] === null) {
660 // Comment ends at the next newline.
661 if (strpos($buffer, "\n") !== false) {
662 $inComment = '';
663 }
664 } else {
665 if ($this->commentTokens[$inComment] === $buffer) {
666 $inComment = '';
667 }
668 }
669
670 if (PHP_CODESNIFFER_VERBOSITY > 1) {
671 if ($inComment === '') {
672 echo "\t\t* found end of comment *".PHP_EOL;
673 }
674 }
675
676 if ($inComment === '' && $cleanBuffer === false) {
677 $tokens[] = array(
678 'code' => T_STRING,
679 'type' => 'T_STRING',
680 'content' => str_replace("\n", $eolChar, $buffer),
681 );
682
683 if (PHP_CODESNIFFER_VERBOSITY > 1) {
684 $content = PHP_CodeSniffer::prepareForOutput($buffer);
685 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
686 }
687
688 $buffer = '';
689 }
690 }//end if
691
692 if ($cleanBuffer === true) {
693 $buffer = '';
694 $cleanBuffer = false;
695 }
696 }//end for
697
698 if (empty($buffer) === false) {
699 // Buffer contains whitespace from the end of the file.
700 $tokens[] = array(
701 'code' => T_WHITESPACE,
702 'type' => 'T_WHITESPACE',
703 'content' => str_replace("\n", $eolChar, $buffer),
704 );
705
706 if (PHP_CODESNIFFER_VERBOSITY > 1) {
707 $content = PHP_CodeSniffer::prepareForOutput($buffer);
708 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
709 }
710 }
711
712 $tokens[] = array(
713 'code' => T_CLOSE_TAG,
714 'type' => 'T_CLOSE_TAG',
715 'content' => '',
716 );
717
718 /*
719 Now that we have done some basic tokenizing, we need to
720 modify the tokens to join some together and split some apart
721 so they match what the PHP tokenizer does.
722 */
723
724 $finalTokens = array();
725 $newStackPtr = 0;
726 $numTokens = count($tokens);
727 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
728 $token = $tokens[$stackPtr];
729
730 /*
731 Look for comments and join the tokens together.
732 */
733
734 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
735 $newContent = '';
736 $tokenContent = $token['content'];
737
738 $endContent = null;
739 if (isset($this->commentTokens[$tokenContent]) === true) {
740 $endContent = $this->commentTokens[$tokenContent];
741 }
742
743 while ($tokenContent !== $endContent) {
744 if ($endContent === null
745 && strpos($tokenContent, $eolChar) !== false
746 ) {
747 // A null end token means the comment ends at the end of
748 // the line so we look for newlines and split the token.
749 $tokens[$stackPtr]['content'] = substr(
750 $tokenContent,
751 (strpos($tokenContent, $eolChar) + strlen($eolChar))
752 );
753
754 $tokenContent = substr(
755 $tokenContent,
756 0,
757 (strpos($tokenContent, $eolChar) + strlen($eolChar))
758 );
759
760 // If the substr failed, skip the token as the content
761 // will now be blank.
762 if ($tokens[$stackPtr]['content'] !== false
763 && $tokens[$stackPtr]['content'] !== ''
764 ) {
765 $stackPtr--;
766 }
767
768 break;
769 }//end if
770
771 $stackPtr++;
772 $newContent .= $tokenContent;
773 if (isset($tokens[$stackPtr]) === false) {
774 break;
775 }
776
777 $tokenContent = $tokens[$stackPtr]['content'];
778 }//end while
779
780 if ($token['code'] === T_DOC_COMMENT) {
781 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
782 foreach ($commentTokens as $commentToken) {
783 $finalTokens[$newStackPtr] = $commentToken;
784 $newStackPtr++;
785 }
786
787 continue;
788 } else {
789 // Save the new content in the current token so
790 // the code below can chop it up on newlines.
791 $token['content'] = $newContent.$tokenContent;
792 }
793 }//end if
794
795 /*
796 If this token has newlines in its content, split each line up
797 and create a new token for each line. We do this so it's easier
798 to ascertain where errors occur on a line.
799 Note that $token[1] is the token's content.
800 */
801
802 if (strpos($token['content'], $eolChar) !== false) {
803 $tokenLines = explode($eolChar, $token['content']);
804 $numLines = count($tokenLines);
805
806 for ($i = 0; $i < $numLines; $i++) {
807 $newToken['content'] = $tokenLines[$i];
808 if ($i === ($numLines - 1)) {
809 if ($tokenLines[$i] === '') {
810 break;
811 }
812 } else {
813 $newToken['content'] .= $eolChar;
814 }
815
816 $newToken['type'] = $token['type'];
817 $newToken['code'] = $token['code'];
818 $finalTokens[$newStackPtr] = $newToken;
819 $newStackPtr++;
820 }
821 } else {
822 $finalTokens[$newStackPtr] = $token;
823 $newStackPtr++;
824 }//end if
825
826 // Convert numbers, including decimals.
827 if ($token['code'] === T_STRING
828 || $token['code'] === T_OBJECT_OPERATOR
829 ) {
830 $newContent = '';
831 $oldStackPtr = $stackPtr;
832 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
833 $newContent .= $tokens[$stackPtr]['content'];
834 $stackPtr++;
835 }
836
837 if ($newContent !== '' && $newContent !== '.') {
838 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
839 if (ctype_digit($newContent) === true) {
840 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
841 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
842 } else {
843 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
844 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
845 }
846
847 $stackPtr--;
848 continue;
849 } else {
850 $stackPtr = $oldStackPtr;
851 }
852 }//end if
853
854 // Convert the token after an object operator into a string, in most cases.
855 if ($token['code'] === T_OBJECT_OPERATOR) {
856 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
857 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
858 continue;
859 }
860
861 if ($tokens[$i]['code'] !== T_PROTOTYPE
862 && $tokens[$i]['code'] !== T_LNUMBER
863 && $tokens[$i]['code'] !== T_DNUMBER
864 ) {
865 $tokens[$i]['code'] = T_STRING;
866 $tokens[$i]['type'] = 'T_STRING';
867 }
868
869 break;
870 }
871 }
872 }//end for
873
874 if (PHP_CODESNIFFER_VERBOSITY > 1) {
875 echo "\t*** END TOKENIZING ***".PHP_EOL;
876 }
877
878 return $finalTokens;
879
880 }//end tokenizeString()
881
882
883 /**
884 * Tokenizes a regular expression if one is found.
885 *
886 * If a regular expression is not found, NULL is returned.
887 *
888 * @param string $char The index of the possible regex start character.
889 * @param string $string The complete content of the string being tokenized.
890 * @param string $chars An array of characters being tokenized.
891 * @param string $tokens The current array of tokens found in the string.
892 * @param string $eolChar The EOL character to use for splitting strings.
893 *
894 * @return void
895 */
896 public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
897 {
898 $beforeTokens = array(
899 T_EQUAL => true,
900 T_IS_NOT_EQUAL => true,
901 T_IS_IDENTICAL => true,
902 T_IS_NOT_IDENTICAL => true,
903 T_OPEN_PARENTHESIS => true,
904 T_OPEN_SQUARE_BRACKET => true,
905 T_RETURN => true,
906 T_BOOLEAN_OR => true,
907 T_BOOLEAN_AND => true,
908 T_BITWISE_OR => true,
909 T_BITWISE_AND => true,
910 T_COMMA => true,
911 T_COLON => true,
912 T_TYPEOF => true,
913 T_INLINE_THEN => true,
914 T_INLINE_ELSE => true,
915 );
916
917 $afterTokens = array(
918 ',' => true,
919 ')' => true,
920 ']' => true,
921 ';' => true,
922 ' ' => true,
923 '.' => true,
924 ':' => true,
925 $eolChar => true,
926 );
927
928 // Find the last non-whitespace token that was added
929 // to the tokens array.
930 $numTokens = count($tokens);
931 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
932 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
933 break;
934 }
935 }
936
937 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
938 return null;
939 }
940
941 // This is probably a regular expression, so look for the end of it.
942 if (PHP_CODESNIFFER_VERBOSITY > 1) {
943 echo "\t* token possibly starts a regular expression *".PHP_EOL;
944 }
945
946 $numChars = count($chars);
947 for ($next = ($char + 1); $next < $numChars; $next++) {
948 if ($chars[$next] === '/') {
949 // Just make sure this is not escaped first.
950 if ($chars[($next - 1)] !== '\\') {
951 // In the simple form: /.../ so we found the end.
952 break;
953 } else if ($chars[($next - 2)] === '\\') {
954 // In the form: /...\\/ so we found the end.
955 break;
956 }
957 } else {
958 $possibleEolChar = substr($string, $next, strlen($eolChar));
959 if ($possibleEolChar === $eolChar) {
960 // This is the last token on the line and regular
961 // expressions need to be defined on a single line,
962 // so this is not a regular expression.
963 break;
964 }
965 }
966 }
967
968 if ($chars[$next] !== '/') {
969 if (PHP_CODESNIFFER_VERBOSITY > 1) {
970 echo "\t* could not find end of regular expression *".PHP_EOL;
971 }
972
973 return null;
974 }
975
976 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
977 // The token directly after the end of the regex can
978 // be modifiers like global and case insensitive
979 // (.e.g, /pattern/gi).
980 $next++;
981 }
982
983 $regexEnd = $next;
984 if (PHP_CODESNIFFER_VERBOSITY > 1) {
985 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
986 }
987
988 for ($next = ($next + 1); $next < $numChars; $next++) {
989 if ($chars[$next] !== ' ') {
990 break;
991 } else {
992 $possibleEolChar = substr($string, $next, strlen($eolChar));
993 if ($possibleEolChar === $eolChar) {
994 // This is the last token on the line.
995 break;
996 }
997 }
998 }
999
1000 if (isset($afterTokens[$chars[$next]]) === false) {
1001 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1002 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
1003 }
1004
1005 return null;
1006 }
1007
1008 // This is a regular expression, so join all the tokens together.
1009 $content = '';
1010 for ($x = $char; $x <= $regexEnd; $x++) {
1011 $content .= $chars[$x];
1012 }
1013
1014 $token = array(
1015 'start' => $char,
1016 'end' => $regexEnd,
1017 'content' => $content,
1018 );
1019
1020 return $token;
1021
1022 }//end getRegexToken()
1023
1024
1025 /**
1026 * Performs additional processing after main tokenizing.
1027 *
1028 * This additional processing looks for properties, closures, labels and objects.
1029 *
1030 * @param array $tokens The array of tokens to process.
1031 * @param string $eolChar The EOL character to use for splitting strings.
1032 *
1033 * @return void
1034 */
1035 public function processAdditional(&$tokens, $eolChar)
1036 {
1037 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1038 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
1039 }
1040
1041 $numTokens = count($tokens);
1042 $classStack = array();
1043
1044 for ($i = 0; $i < $numTokens; $i++) {
1045 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1046 $type = $tokens[$i]['type'];
1047 $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
1048
1049 echo str_repeat("\t", count($classStack));
1050 echo "\tProcess token $i: $type => $content".PHP_EOL;
1051 }
1052
1053 // Looking for functions that are actually closures.
1054 if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
1055 for ($x = ($i + 1); $x < $numTokens; $x++) {
1056 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
1057 break;
1058 }
1059 }
1060
1061 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
1062 $tokens[$i]['code'] = T_CLOSURE;
1063 $tokens[$i]['type'] = 'T_CLOSURE';
1064 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1065 $line = $tokens[$i]['line'];
1066 echo str_repeat("\t", count($classStack));
1067 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
1068 }
1069
1070 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
1071 if (isset($tokens[$x]['conditions'][$i]) === false) {
1072 continue;
1073 }
1074
1075 $tokens[$x]['conditions'][$i] = T_CLOSURE;
1076 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1077 $type = $tokens[$x]['type'];
1078 echo str_repeat("\t", count($classStack));
1079 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
1080 }
1081 }
1082 }//end if
1083
1084 continue;
1085 } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
1086 && isset($tokens[$i]['scope_condition']) === false
1087 && isset($tokens[$i]['bracket_closer']) === true
1088 ) {
1089 $classStack[] = $i;
1090
1091 $closer = $tokens[$i]['bracket_closer'];
1092 $tokens[$i]['code'] = T_OBJECT;
1093 $tokens[$i]['type'] = 'T_OBJECT';
1094 $tokens[$closer]['code'] = T_CLOSE_OBJECT;
1095 $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
1096
1097 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1098 echo str_repeat("\t", count($classStack));
1099 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
1100 echo str_repeat("\t", count($classStack));
1101 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
1102 }
1103
1104 for ($x = ($i + 1); $x < $closer; $x++) {
1105 $tokens[$x]['conditions'][$i] = T_OBJECT;
1106 ksort($tokens[$x]['conditions'], SORT_NUMERIC);
1107 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1108 $type = $tokens[$x]['type'];
1109 echo str_repeat("\t", count($classStack));
1110 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
1111 }
1112 }
1113 } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
1114 $opener = array_pop($classStack);
1115 } else if ($tokens[$i]['code'] === T_COLON) {
1116 // If it is a scope opener, it belongs to a
1117 // DEFAULT or CASE statement.
1118 if (isset($tokens[$i]['scope_condition']) === true) {
1119 continue;
1120 }
1121
1122 // Make sure this is not part of an inline IF statement.
1123 for ($x = ($i - 1); $x >= 0; $x--) {
1124 if ($tokens[$x]['code'] === T_INLINE_THEN) {
1125 $tokens[$i]['code'] = T_INLINE_ELSE;
1126 $tokens[$i]['type'] = 'T_INLINE_ELSE';
1127
1128 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1129 echo str_repeat("\t", count($classStack));
1130 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
1131 }
1132
1133 continue(2);
1134 } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
1135 break;
1136 }
1137 }
1138
1139 // The string to the left of the colon is either a property or label.
1140 for ($label = ($i - 1); $label >= 0; $label--) {
1141 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
1142 break;
1143 }
1144 }
1145
1146 if ($tokens[$label]['code'] !== T_STRING
1147 && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
1148 ) {
1149 continue;
1150 }
1151
1152 if (empty($classStack) === false) {
1153 $tokens[$label]['code'] = T_PROPERTY;
1154 $tokens[$label]['type'] = 'T_PROPERTY';
1155
1156 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1157 echo str_repeat("\t", count($classStack));
1158 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
1159 }
1160 } else {
1161 $tokens[$label]['code'] = T_LABEL;
1162 $tokens[$label]['type'] = 'T_LABEL';
1163
1164 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1165 echo str_repeat("\t", count($classStack));
1166 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
1167 }
1168 }//end if
1169 }//end if
1170 }//end for
1171
1172 if (PHP_CODESNIFFER_VERBOSITY > 1) {
1173 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
1174 }
1175
1176 }//end processAdditional()
1177
1178
1179 }//end class