Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * Tokenizes JS code.
|
Chris@0
|
4 *
|
Chris@0
|
5 * PHP version 5
|
Chris@0
|
6 *
|
Chris@0
|
7 * @category PHP
|
Chris@0
|
8 * @package PHP_CodeSniffer
|
Chris@0
|
9 * @author Greg Sherwood <gsherwood@squiz.net>
|
Chris@0
|
10 * @author Marc McIntyre <mmcintyre@squiz.net>
|
Chris@0
|
11 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
|
Chris@0
|
12 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
Chris@0
|
13 * @link http://pear.php.net/package/PHP_CodeSniffer
|
Chris@0
|
14 */
|
Chris@0
|
15
|
Chris@0
|
16 /**
|
Chris@0
|
17 * Tokenizes JS code.
|
Chris@0
|
18 *
|
Chris@0
|
19 * @category PHP
|
Chris@0
|
20 * @package PHP_CodeSniffer
|
Chris@0
|
21 * @author Greg Sherwood <gsherwood@squiz.net>
|
Chris@0
|
22 * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
|
Chris@0
|
23 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
Chris@0
|
24 * @version Release: @package_version@
|
Chris@0
|
25 * @link http://pear.php.net/package/PHP_CodeSniffer
|
Chris@0
|
26 */
|
Chris@0
|
27 class PHP_CodeSniffer_Tokenizers_JS
|
Chris@0
|
28 {
|
Chris@0
|
29
|
Chris@0
|
30 /**
|
Chris@0
|
31 * If TRUE, files that appear to be minified will not be processed.
|
Chris@0
|
32 *
|
Chris@0
|
33 * @var boolean
|
Chris@0
|
34 */
|
Chris@0
|
35 public $skipMinified = true;
|
Chris@0
|
36
|
Chris@0
|
37 /**
|
Chris@0
|
38 * A list of tokens that are allowed to open a scope.
|
Chris@0
|
39 *
|
Chris@0
|
40 * This array also contains information about what kind of token the scope
|
Chris@0
|
41 * opener uses to open and close the scope, if the token strictly requires
|
Chris@0
|
42 * an opener, if the token can share a scope closer, and who it can be shared
|
Chris@0
|
43 * with. An example of a token that shares a scope closer is a CASE scope.
|
Chris@0
|
44 *
|
Chris@0
|
45 * @var array
|
Chris@0
|
46 */
|
Chris@0
|
47 public $scopeOpeners = array(
|
Chris@0
|
48 T_IF => array(
|
Chris@0
|
49 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
50 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
51 'strict' => false,
|
Chris@0
|
52 'shared' => false,
|
Chris@0
|
53 'with' => array(),
|
Chris@0
|
54 ),
|
Chris@0
|
55 T_TRY => array(
|
Chris@0
|
56 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
57 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
58 'strict' => true,
|
Chris@0
|
59 'shared' => false,
|
Chris@0
|
60 'with' => array(),
|
Chris@0
|
61 ),
|
Chris@0
|
62 T_CATCH => array(
|
Chris@0
|
63 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
64 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
65 'strict' => true,
|
Chris@0
|
66 'shared' => false,
|
Chris@0
|
67 'with' => array(),
|
Chris@0
|
68 ),
|
Chris@0
|
69 T_ELSE => array(
|
Chris@0
|
70 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
71 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
72 'strict' => false,
|
Chris@0
|
73 'shared' => false,
|
Chris@0
|
74 'with' => array(),
|
Chris@0
|
75 ),
|
Chris@0
|
76 T_FOR => array(
|
Chris@0
|
77 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
78 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
79 'strict' => false,
|
Chris@0
|
80 'shared' => false,
|
Chris@0
|
81 'with' => array(),
|
Chris@0
|
82 ),
|
Chris@0
|
83 T_FUNCTION => array(
|
Chris@0
|
84 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
85 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
86 'strict' => false,
|
Chris@0
|
87 'shared' => false,
|
Chris@0
|
88 'with' => array(),
|
Chris@0
|
89 ),
|
Chris@0
|
90 T_WHILE => array(
|
Chris@0
|
91 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
92 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
93 'strict' => false,
|
Chris@0
|
94 'shared' => false,
|
Chris@0
|
95 'with' => array(),
|
Chris@0
|
96 ),
|
Chris@0
|
97 T_DO => array(
|
Chris@0
|
98 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
99 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
100 'strict' => true,
|
Chris@0
|
101 'shared' => false,
|
Chris@0
|
102 'with' => array(),
|
Chris@0
|
103 ),
|
Chris@0
|
104 T_SWITCH => array(
|
Chris@0
|
105 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
|
Chris@0
|
106 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
|
Chris@0
|
107 'strict' => true,
|
Chris@0
|
108 'shared' => false,
|
Chris@0
|
109 'with' => array(),
|
Chris@0
|
110 ),
|
Chris@0
|
111 T_CASE => array(
|
Chris@0
|
112 'start' => array(T_COLON => T_COLON),
|
Chris@0
|
113 'end' => array(
|
Chris@0
|
114 T_BREAK => T_BREAK,
|
Chris@0
|
115 T_RETURN => T_RETURN,
|
Chris@0
|
116 T_CONTINUE => T_CONTINUE,
|
Chris@0
|
117 T_THROW => T_THROW,
|
Chris@0
|
118 ),
|
Chris@0
|
119 'strict' => true,
|
Chris@0
|
120 'shared' => true,
|
Chris@0
|
121 'with' => array(
|
Chris@0
|
122 T_DEFAULT => T_DEFAULT,
|
Chris@0
|
123 T_CASE => T_CASE,
|
Chris@0
|
124 T_SWITCH => T_SWITCH,
|
Chris@0
|
125 ),
|
Chris@0
|
126 ),
|
Chris@0
|
127 T_DEFAULT => array(
|
Chris@0
|
128 'start' => array(T_COLON => T_COLON),
|
Chris@0
|
129 'end' => array(
|
Chris@0
|
130 T_BREAK => T_BREAK,
|
Chris@0
|
131 T_RETURN => T_RETURN,
|
Chris@0
|
132 T_CONTINUE => T_CONTINUE,
|
Chris@0
|
133 T_THROW => T_THROW,
|
Chris@0
|
134 ),
|
Chris@0
|
135 'strict' => true,
|
Chris@0
|
136 'shared' => true,
|
Chris@0
|
137 'with' => array(
|
Chris@0
|
138 T_CASE => T_CASE,
|
Chris@0
|
139 T_SWITCH => T_SWITCH,
|
Chris@0
|
140 ),
|
Chris@0
|
141 ),
|
Chris@0
|
142 );
|
Chris@0
|
143
|
Chris@0
|
144 /**
|
Chris@0
|
145 * A list of tokens that end the scope.
|
Chris@0
|
146 *
|
Chris@0
|
147 * This array is just a unique collection of the end tokens
|
Chris@0
|
148 * from the _scopeOpeners array. The data is duplicated here to
|
Chris@0
|
149 * save time during parsing of the file.
|
Chris@0
|
150 *
|
Chris@0
|
151 * @var array
|
Chris@0
|
152 */
|
Chris@0
|
153 public $endScopeTokens = array(
|
Chris@0
|
154 T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
|
Chris@0
|
155 T_BREAK => T_BREAK,
|
Chris@0
|
156 );
|
Chris@0
|
157
|
Chris@0
|
158 /**
|
Chris@0
|
159 * A list of special JS tokens and their types.
|
Chris@0
|
160 *
|
Chris@0
|
161 * @var array
|
Chris@0
|
162 */
|
Chris@0
|
163 protected $tokenValues = array(
|
Chris@0
|
164 'function' => 'T_FUNCTION',
|
Chris@0
|
165 'prototype' => 'T_PROTOTYPE',
|
Chris@0
|
166 'try' => 'T_TRY',
|
Chris@0
|
167 'catch' => 'T_CATCH',
|
Chris@0
|
168 'return' => 'T_RETURN',
|
Chris@0
|
169 'throw' => 'T_THROW',
|
Chris@0
|
170 'break' => 'T_BREAK',
|
Chris@0
|
171 'switch' => 'T_SWITCH',
|
Chris@0
|
172 'continue' => 'T_CONTINUE',
|
Chris@0
|
173 'if' => 'T_IF',
|
Chris@0
|
174 'else' => 'T_ELSE',
|
Chris@0
|
175 'do' => 'T_DO',
|
Chris@0
|
176 'while' => 'T_WHILE',
|
Chris@0
|
177 'for' => 'T_FOR',
|
Chris@0
|
178 'var' => 'T_VAR',
|
Chris@0
|
179 'case' => 'T_CASE',
|
Chris@0
|
180 'default' => 'T_DEFAULT',
|
Chris@0
|
181 'true' => 'T_TRUE',
|
Chris@0
|
182 'false' => 'T_FALSE',
|
Chris@0
|
183 'null' => 'T_NULL',
|
Chris@0
|
184 'this' => 'T_THIS',
|
Chris@0
|
185 'typeof' => 'T_TYPEOF',
|
Chris@0
|
186 '(' => 'T_OPEN_PARENTHESIS',
|
Chris@0
|
187 ')' => 'T_CLOSE_PARENTHESIS',
|
Chris@0
|
188 '{' => 'T_OPEN_CURLY_BRACKET',
|
Chris@0
|
189 '}' => 'T_CLOSE_CURLY_BRACKET',
|
Chris@0
|
190 '[' => 'T_OPEN_SQUARE_BRACKET',
|
Chris@0
|
191 ']' => 'T_CLOSE_SQUARE_BRACKET',
|
Chris@0
|
192 '?' => 'T_INLINE_THEN',
|
Chris@0
|
193 '.' => 'T_OBJECT_OPERATOR',
|
Chris@0
|
194 '+' => 'T_PLUS',
|
Chris@0
|
195 '-' => 'T_MINUS',
|
Chris@0
|
196 '*' => 'T_MULTIPLY',
|
Chris@0
|
197 '%' => 'T_MODULUS',
|
Chris@0
|
198 '/' => 'T_DIVIDE',
|
Chris@0
|
199 '^' => 'T_LOGICAL_XOR',
|
Chris@0
|
200 ',' => 'T_COMMA',
|
Chris@0
|
201 ';' => 'T_SEMICOLON',
|
Chris@0
|
202 ':' => 'T_COLON',
|
Chris@0
|
203 '<' => 'T_LESS_THAN',
|
Chris@0
|
204 '>' => 'T_GREATER_THAN',
|
Chris@0
|
205 '<<' => 'T_SL',
|
Chris@0
|
206 '>>' => 'T_SR',
|
Chris@0
|
207 '>>>' => 'T_ZSR',
|
Chris@0
|
208 '<<=' => 'T_SL_EQUAL',
|
Chris@0
|
209 '>>=' => 'T_SR_EQUAL',
|
Chris@0
|
210 '>>>=' => 'T_ZSR_EQUAL',
|
Chris@0
|
211 '<=' => 'T_IS_SMALLER_OR_EQUAL',
|
Chris@0
|
212 '>=' => 'T_IS_GREATER_OR_EQUAL',
|
Chris@0
|
213 '=>' => 'T_DOUBLE_ARROW',
|
Chris@0
|
214 '!' => 'T_BOOLEAN_NOT',
|
Chris@0
|
215 '||' => 'T_BOOLEAN_OR',
|
Chris@0
|
216 '&&' => 'T_BOOLEAN_AND',
|
Chris@0
|
217 '|' => 'T_BITWISE_OR',
|
Chris@0
|
218 '&' => 'T_BITWISE_AND',
|
Chris@0
|
219 '!=' => 'T_IS_NOT_EQUAL',
|
Chris@0
|
220 '!==' => 'T_IS_NOT_IDENTICAL',
|
Chris@0
|
221 '=' => 'T_EQUAL',
|
Chris@0
|
222 '==' => 'T_IS_EQUAL',
|
Chris@0
|
223 '===' => 'T_IS_IDENTICAL',
|
Chris@0
|
224 '-=' => 'T_MINUS_EQUAL',
|
Chris@0
|
225 '+=' => 'T_PLUS_EQUAL',
|
Chris@0
|
226 '*=' => 'T_MUL_EQUAL',
|
Chris@0
|
227 '/=' => 'T_DIV_EQUAL',
|
Chris@0
|
228 '%=' => 'T_MOD_EQUAL',
|
Chris@0
|
229 '++' => 'T_INC',
|
Chris@0
|
230 '--' => 'T_DEC',
|
Chris@0
|
231 '//' => 'T_COMMENT',
|
Chris@0
|
232 '/*' => 'T_COMMENT',
|
Chris@0
|
233 '/**' => 'T_DOC_COMMENT',
|
Chris@0
|
234 '*/' => 'T_COMMENT',
|
Chris@0
|
235 );
|
Chris@0
|
236
|
Chris@0
|
237 /**
|
Chris@0
|
238 * A list string delimiters.
|
Chris@0
|
239 *
|
Chris@0
|
240 * @var array
|
Chris@0
|
241 */
|
Chris@0
|
242 protected $stringTokens = array(
|
Chris@0
|
243 '\'' => '\'',
|
Chris@0
|
244 '"' => '"',
|
Chris@0
|
245 );
|
Chris@0
|
246
|
Chris@0
|
247 /**
|
Chris@0
|
248 * A list tokens that start and end comments.
|
Chris@0
|
249 *
|
Chris@0
|
250 * @var array
|
Chris@0
|
251 */
|
Chris@0
|
252 protected $commentTokens = array(
|
Chris@0
|
253 '//' => null,
|
Chris@0
|
254 '/*' => '*/',
|
Chris@0
|
255 '/**' => '*/',
|
Chris@0
|
256 );
|
Chris@0
|
257
|
Chris@0
|
258
|
Chris@0
|
259 /**
|
Chris@0
|
260 * Creates an array of tokens when given some JS code.
|
Chris@0
|
261 *
|
Chris@0
|
262 * @param string $string The string to tokenize.
|
Chris@0
|
263 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@0
|
264 *
|
Chris@0
|
265 * @return array
|
Chris@0
|
266 */
|
Chris@0
|
267 public function tokenizeString($string, $eolChar='\n')
|
Chris@0
|
268 {
|
Chris@0
|
269 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
270 echo "\t*** START JS TOKENIZING ***".PHP_EOL;
|
Chris@0
|
271 }
|
Chris@0
|
272
|
Chris@0
|
273 $maxTokenLength = 0;
|
Chris@0
|
274 foreach ($this->tokenValues as $token => $values) {
|
Chris@0
|
275 if (strlen($token) > $maxTokenLength) {
|
Chris@0
|
276 $maxTokenLength = strlen($token);
|
Chris@0
|
277 }
|
Chris@0
|
278 }
|
Chris@0
|
279
|
Chris@0
|
280 $tokens = array();
|
Chris@0
|
281 $inString = '';
|
Chris@0
|
282 $stringChar = null;
|
Chris@0
|
283 $inComment = '';
|
Chris@0
|
284 $buffer = '';
|
Chris@0
|
285 $preStringBuffer = '';
|
Chris@0
|
286 $cleanBuffer = false;
|
Chris@0
|
287
|
Chris@0
|
288 $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
|
Chris@0
|
289
|
Chris@0
|
290 $tokens[] = array(
|
Chris@0
|
291 'code' => T_OPEN_TAG,
|
Chris@0
|
292 'type' => 'T_OPEN_TAG',
|
Chris@0
|
293 'content' => '',
|
Chris@0
|
294 );
|
Chris@0
|
295
|
Chris@0
|
296 // Convert newlines to single characters for ease of
|
Chris@0
|
297 // processing. We will change them back later.
|
Chris@0
|
298 $string = str_replace($eolChar, "\n", $string);
|
Chris@0
|
299
|
Chris@0
|
300 $chars = str_split($string);
|
Chris@0
|
301 $numChars = count($chars);
|
Chris@0
|
302 for ($i = 0; $i < $numChars; $i++) {
|
Chris@0
|
303 $char = $chars[$i];
|
Chris@0
|
304
|
Chris@0
|
305 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
306 $content = PHP_CodeSniffer::prepareForOutput($char);
|
Chris@0
|
307 $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
308
|
Chris@0
|
309 if ($inString !== '') {
|
Chris@0
|
310 echo "\t";
|
Chris@0
|
311 }
|
Chris@0
|
312
|
Chris@0
|
313 if ($inComment !== '') {
|
Chris@0
|
314 echo "\t";
|
Chris@0
|
315 }
|
Chris@0
|
316
|
Chris@0
|
317 echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
|
Chris@0
|
318 }//end if
|
Chris@0
|
319
|
Chris@0
|
320 if ($inString === '' && $inComment === '' && $buffer !== '') {
|
Chris@0
|
321 // If the buffer only has whitespace and we are about to
|
Chris@0
|
322 // add a character, store the whitespace first.
|
Chris@0
|
323 if (trim($char) !== '' && trim($buffer) === '') {
|
Chris@0
|
324 $tokens[] = array(
|
Chris@0
|
325 'code' => T_WHITESPACE,
|
Chris@0
|
326 'type' => 'T_WHITESPACE',
|
Chris@0
|
327 'content' => str_replace("\n", $eolChar, $buffer),
|
Chris@0
|
328 );
|
Chris@0
|
329
|
Chris@0
|
330 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
331 $content = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
332 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
|
Chris@0
|
333 }
|
Chris@0
|
334
|
Chris@0
|
335 $buffer = '';
|
Chris@0
|
336 }
|
Chris@0
|
337
|
Chris@0
|
338 // If the buffer is not whitespace and we are about to
|
Chris@0
|
339 // add a whitespace character, store the content first.
|
Chris@0
|
340 if ($inString === ''
|
Chris@0
|
341 && $inComment === ''
|
Chris@0
|
342 && trim($char) === ''
|
Chris@0
|
343 && trim($buffer) !== ''
|
Chris@0
|
344 ) {
|
Chris@0
|
345 $tokens[] = array(
|
Chris@0
|
346 'code' => T_STRING,
|
Chris@0
|
347 'type' => 'T_STRING',
|
Chris@0
|
348 'content' => str_replace("\n", $eolChar, $buffer),
|
Chris@0
|
349 );
|
Chris@0
|
350
|
Chris@0
|
351 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
352 $content = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
353 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
|
Chris@0
|
354 }
|
Chris@0
|
355
|
Chris@0
|
356 $buffer = '';
|
Chris@0
|
357 }
|
Chris@0
|
358 }//end if
|
Chris@0
|
359
|
Chris@0
|
360 // Process strings.
|
Chris@0
|
361 if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
|
Chris@0
|
362 if ($inString === $char) {
|
Chris@0
|
363 // This could be the end of the string, but make sure it
|
Chris@0
|
364 // is not escaped first.
|
Chris@0
|
365 $escapes = 0;
|
Chris@0
|
366 for ($x = ($i - 1); $x >= 0; $x--) {
|
Chris@0
|
367 if ($chars[$x] !== '\\') {
|
Chris@0
|
368 break;
|
Chris@0
|
369 }
|
Chris@0
|
370
|
Chris@0
|
371 $escapes++;
|
Chris@0
|
372 }
|
Chris@0
|
373
|
Chris@0
|
374 if ($escapes === 0 || ($escapes % 2) === 0) {
|
Chris@0
|
375 // There is an even number escape chars,
|
Chris@0
|
376 // so this is not escaped, it is the end of the string.
|
Chris@0
|
377 $tokens[] = array(
|
Chris@0
|
378 'code' => T_CONSTANT_ENCAPSED_STRING,
|
Chris@0
|
379 'type' => 'T_CONSTANT_ENCAPSED_STRING',
|
Chris@0
|
380 'content' => str_replace("\n", $eolChar, $buffer).$char,
|
Chris@0
|
381 );
|
Chris@0
|
382
|
Chris@0
|
383 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
384 echo "\t\t* found end of string *".PHP_EOL;
|
Chris@0
|
385 $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
|
Chris@0
|
386 echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
|
Chris@0
|
387 }
|
Chris@0
|
388
|
Chris@0
|
389 $buffer = '';
|
Chris@0
|
390 $preStringBuffer = '';
|
Chris@0
|
391 $inString = '';
|
Chris@0
|
392 $stringChar = null;
|
Chris@0
|
393 continue;
|
Chris@0
|
394 }//end if
|
Chris@0
|
395 } else if ($inString === '') {
|
Chris@0
|
396 $inString = $char;
|
Chris@0
|
397 $stringChar = $i;
|
Chris@0
|
398 $preStringBuffer = $buffer;
|
Chris@0
|
399
|
Chris@0
|
400 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
401 echo "\t\t* looking for string closer *".PHP_EOL;
|
Chris@0
|
402 }
|
Chris@0
|
403 }//end if
|
Chris@0
|
404 }//end if
|
Chris@0
|
405
|
Chris@0
|
406 if ($inString !== '' && $char === "\n") {
|
Chris@0
|
407 // Unless this newline character is escaped, the string did not
|
Chris@0
|
408 // end before the end of the line, which means it probably
|
Chris@0
|
409 // wasn't a string at all (maybe a regex).
|
Chris@0
|
410 if ($chars[($i - 1)] !== '\\') {
|
Chris@0
|
411 $i = $stringChar;
|
Chris@0
|
412 $buffer = $preStringBuffer;
|
Chris@0
|
413 $preStringBuffer = '';
|
Chris@0
|
414 $inString = '';
|
Chris@0
|
415 $stringChar = null;
|
Chris@0
|
416 $char = $chars[$i];
|
Chris@0
|
417
|
Chris@0
|
418 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
419 echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
|
Chris@0
|
420 }
|
Chris@0
|
421 }
|
Chris@0
|
422 }
|
Chris@0
|
423
|
Chris@0
|
424 $buffer .= $char;
|
Chris@0
|
425
|
Chris@0
|
426 // We don't look for special tokens inside strings,
|
Chris@0
|
427 // so if we are in a string, we can continue here now
|
Chris@0
|
428 // that the current char is in the buffer.
|
Chris@0
|
429 if ($inString !== '') {
|
Chris@0
|
430 continue;
|
Chris@0
|
431 }
|
Chris@0
|
432
|
Chris@0
|
433 // Special case for T_DIVIDE which can actually be
|
Chris@0
|
434 // the start of a regular expression.
|
Chris@0
|
435 if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
|
Chris@0
|
436 $regex = $this->getRegexToken(
|
Chris@0
|
437 $i,
|
Chris@0
|
438 $string,
|
Chris@0
|
439 $chars,
|
Chris@0
|
440 $tokens,
|
Chris@0
|
441 $eolChar
|
Chris@0
|
442 );
|
Chris@0
|
443
|
Chris@0
|
444 if ($regex !== null) {
|
Chris@0
|
445 $tokens[] = array(
|
Chris@0
|
446 'code' => T_REGULAR_EXPRESSION,
|
Chris@0
|
447 'type' => 'T_REGULAR_EXPRESSION',
|
Chris@0
|
448 'content' => $regex['content'],
|
Chris@0
|
449 );
|
Chris@0
|
450
|
Chris@0
|
451 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
452 $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
|
Chris@0
|
453 echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
|
Chris@0
|
454 }
|
Chris@0
|
455
|
Chris@0
|
456 $i = $regex['end'];
|
Chris@0
|
457 $buffer = '';
|
Chris@0
|
458 $cleanBuffer = false;
|
Chris@0
|
459 continue;
|
Chris@0
|
460 }//end if
|
Chris@0
|
461 }//end if
|
Chris@0
|
462
|
Chris@0
|
463 // Check for known tokens, but ignore tokens found that are not at
|
Chris@0
|
464 // the end of a string, like FOR and this.FORmat.
|
Chris@0
|
465 if (isset($this->tokenValues[strtolower($buffer)]) === true
|
Chris@0
|
466 && (preg_match('|[a-zA-z0-9_]|', $char) === 0
|
Chris@0
|
467 || isset($chars[($i + 1)]) === false
|
Chris@0
|
468 || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
|
Chris@0
|
469 ) {
|
Chris@0
|
470 $matchedToken = false;
|
Chris@0
|
471 $lookAheadLength = ($maxTokenLength - strlen($buffer));
|
Chris@0
|
472
|
Chris@0
|
473 if ($lookAheadLength > 0) {
|
Chris@0
|
474 // The buffer contains a token type, but we need
|
Chris@0
|
475 // to look ahead at the next chars to see if this is
|
Chris@0
|
476 // actually part of a larger token. For example,
|
Chris@0
|
477 // FOR and FOREACH.
|
Chris@0
|
478 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
479 echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
|
Chris@0
|
480 }
|
Chris@0
|
481
|
Chris@0
|
482 $charBuffer = $buffer;
|
Chris@0
|
483 for ($x = 1; $x <= $lookAheadLength; $x++) {
|
Chris@0
|
484 if (isset($chars[($i + $x)]) === false) {
|
Chris@0
|
485 break;
|
Chris@0
|
486 }
|
Chris@0
|
487
|
Chris@0
|
488 $charBuffer .= $chars[($i + $x)];
|
Chris@0
|
489
|
Chris@0
|
490 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
491 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
|
Chris@0
|
492 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
|
Chris@0
|
493 }
|
Chris@0
|
494
|
Chris@0
|
495 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
|
Chris@0
|
496 // We've found something larger that matches
|
Chris@0
|
497 // so we can ignore this char. Except for 1 very specific
|
Chris@0
|
498 // case where a comment like /**/ needs to tokenize as
|
Chris@0
|
499 // T_COMMENT and not T_DOC_COMMENT.
|
Chris@0
|
500 $oldType = $this->tokenValues[strtolower($buffer)];
|
Chris@0
|
501 $newType = $this->tokenValues[strtolower($charBuffer)];
|
Chris@0
|
502 if ($oldType === 'T_COMMENT'
|
Chris@0
|
503 && $newType === 'T_DOC_COMMENT'
|
Chris@0
|
504 && $chars[($i + $x + 1)] === '/'
|
Chris@0
|
505 ) {
|
Chris@0
|
506 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
507 echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
|
Chris@0
|
508 }
|
Chris@0
|
509 } else {
|
Chris@0
|
510 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
511 echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
|
Chris@0
|
512 }
|
Chris@0
|
513
|
Chris@0
|
514 $matchedToken = true;
|
Chris@0
|
515 break;
|
Chris@0
|
516 }
|
Chris@0
|
517 }//end if
|
Chris@0
|
518 }//end for
|
Chris@0
|
519 }//end if
|
Chris@0
|
520
|
Chris@0
|
521 if ($matchedToken === false) {
|
Chris@0
|
522 if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
|
Chris@0
|
523 echo "\t\t* look ahead found nothing *".PHP_EOL;
|
Chris@0
|
524 }
|
Chris@0
|
525
|
Chris@0
|
526 $value = $this->tokenValues[strtolower($buffer)];
|
Chris@0
|
527
|
Chris@0
|
528 if ($value === 'T_FUNCTION' && $buffer !== 'function') {
|
Chris@0
|
529 // The function keyword needs to be all lowercase or else
|
Chris@0
|
530 // it is just a function called "Function".
|
Chris@0
|
531 $value = 'T_STRING';
|
Chris@0
|
532 }
|
Chris@0
|
533
|
Chris@0
|
534 $tokens[] = array(
|
Chris@0
|
535 'code' => constant($value),
|
Chris@0
|
536 'type' => $value,
|
Chris@0
|
537 'content' => $buffer,
|
Chris@0
|
538 );
|
Chris@0
|
539
|
Chris@0
|
540 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
541 $content = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
542 echo "\t=> Added token $value ($content)".PHP_EOL;
|
Chris@0
|
543 }
|
Chris@0
|
544
|
Chris@0
|
545 $cleanBuffer = true;
|
Chris@0
|
546 }//end if
|
Chris@0
|
547 } else if (isset($this->tokenValues[strtolower($char)]) === true) {
|
Chris@0
|
548 // No matter what token we end up using, we don't
|
Chris@0
|
549 // need the content in the buffer any more because we have
|
Chris@0
|
550 // found a valid token.
|
Chris@0
|
551 $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
|
Chris@0
|
552 if ($newContent !== '') {
|
Chris@0
|
553 $tokens[] = array(
|
Chris@0
|
554 'code' => T_STRING,
|
Chris@0
|
555 'type' => 'T_STRING',
|
Chris@0
|
556 'content' => $newContent,
|
Chris@0
|
557 );
|
Chris@0
|
558
|
Chris@0
|
559 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
560 $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
|
Chris@0
|
561 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
|
Chris@0
|
562 }
|
Chris@0
|
563 }
|
Chris@0
|
564
|
Chris@0
|
565 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
566 echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
|
Chris@0
|
567 }
|
Chris@0
|
568
|
Chris@0
|
569 // The char is a token type, but we need to look ahead at the
|
Chris@0
|
570 // next chars to see if this is actually part of a larger token.
|
Chris@0
|
571 // For example, = and ===.
|
Chris@0
|
572 $charBuffer = $char;
|
Chris@0
|
573 $matchedToken = false;
|
Chris@0
|
574 for ($x = 1; $x <= $maxTokenLength; $x++) {
|
Chris@0
|
575 if (isset($chars[($i + $x)]) === false) {
|
Chris@0
|
576 break;
|
Chris@0
|
577 }
|
Chris@0
|
578
|
Chris@0
|
579 $charBuffer .= $chars[($i + $x)];
|
Chris@0
|
580
|
Chris@0
|
581 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
582 $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
|
Chris@0
|
583 echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
|
Chris@0
|
584 }
|
Chris@0
|
585
|
Chris@0
|
586 if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
|
Chris@0
|
587 // We've found something larger that matches
|
Chris@0
|
588 // so we can ignore this char.
|
Chris@0
|
589 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
590 $type = $this->tokenValues[strtolower($charBuffer)];
|
Chris@0
|
591 echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
|
Chris@0
|
592 }
|
Chris@0
|
593
|
Chris@0
|
594 $matchedToken = true;
|
Chris@0
|
595 break;
|
Chris@0
|
596 }
|
Chris@0
|
597 }//end for
|
Chris@0
|
598
|
Chris@0
|
599 if ($matchedToken === false) {
|
Chris@0
|
600 $value = $this->tokenValues[strtolower($char)];
|
Chris@0
|
601 $tokens[] = array(
|
Chris@0
|
602 'code' => constant($value),
|
Chris@0
|
603 'type' => $value,
|
Chris@0
|
604 'content' => $char,
|
Chris@0
|
605 );
|
Chris@0
|
606
|
Chris@0
|
607 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
608 echo "\t\t* look ahead found nothing *".PHP_EOL;
|
Chris@0
|
609 $content = PHP_CodeSniffer::prepareForOutput($char);
|
Chris@0
|
610 echo "\t=> Added token $value ($content)".PHP_EOL;
|
Chris@0
|
611 }
|
Chris@0
|
612
|
Chris@0
|
613 $cleanBuffer = true;
|
Chris@0
|
614 } else {
|
Chris@0
|
615 $buffer = $char;
|
Chris@0
|
616 }//end if
|
Chris@0
|
617 }//end if
|
Chris@0
|
618
|
Chris@0
|
619 // Keep track of content inside comments.
|
Chris@0
|
620 if ($inComment === ''
|
Chris@0
|
621 && array_key_exists($buffer, $this->commentTokens) === true
|
Chris@0
|
622 ) {
|
Chris@0
|
623 // This is not really a comment if the content
|
Chris@0
|
624 // looks like \// (i.e., it is escaped).
|
Chris@0
|
625 if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
|
Chris@0
|
626 $lastToken = array_pop($tokens);
|
Chris@0
|
627 $lastContent = $lastToken['content'];
|
Chris@0
|
628 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
629 $value = $this->tokenValues[strtolower($lastContent)];
|
Chris@0
|
630 $content = PHP_CodeSniffer::prepareForOutput($lastContent);
|
Chris@0
|
631 echo "\t=> Removed token $value ($content)".PHP_EOL;
|
Chris@0
|
632 }
|
Chris@0
|
633
|
Chris@0
|
634 $lastChars = str_split($lastContent);
|
Chris@0
|
635 $lastNumChars = count($lastChars);
|
Chris@0
|
636 for ($x = 0; $x < $lastNumChars; $x++) {
|
Chris@0
|
637 $lastChar = $lastChars[$x];
|
Chris@0
|
638 $value = $this->tokenValues[strtolower($lastChar)];
|
Chris@0
|
639 $tokens[] = array(
|
Chris@0
|
640 'code' => constant($value),
|
Chris@0
|
641 'type' => $value,
|
Chris@0
|
642 'content' => $lastChar,
|
Chris@0
|
643 );
|
Chris@0
|
644
|
Chris@0
|
645 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
646 $content = PHP_CodeSniffer::prepareForOutput($lastChar);
|
Chris@0
|
647 echo "\t=> Added token $value ($content)".PHP_EOL;
|
Chris@0
|
648 }
|
Chris@0
|
649 }
|
Chris@0
|
650 } else {
|
Chris@0
|
651 // We have started a comment.
|
Chris@0
|
652 $inComment = $buffer;
|
Chris@0
|
653
|
Chris@0
|
654 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
655 echo "\t\t* looking for end of comment *".PHP_EOL;
|
Chris@0
|
656 }
|
Chris@0
|
657 }//end if
|
Chris@0
|
658 } else if ($inComment !== '') {
|
Chris@0
|
659 if ($this->commentTokens[$inComment] === null) {
|
Chris@0
|
660 // Comment ends at the next newline.
|
Chris@0
|
661 if (strpos($buffer, "\n") !== false) {
|
Chris@0
|
662 $inComment = '';
|
Chris@0
|
663 }
|
Chris@0
|
664 } else {
|
Chris@0
|
665 if ($this->commentTokens[$inComment] === $buffer) {
|
Chris@0
|
666 $inComment = '';
|
Chris@0
|
667 }
|
Chris@0
|
668 }
|
Chris@0
|
669
|
Chris@0
|
670 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
671 if ($inComment === '') {
|
Chris@0
|
672 echo "\t\t* found end of comment *".PHP_EOL;
|
Chris@0
|
673 }
|
Chris@0
|
674 }
|
Chris@0
|
675
|
Chris@0
|
676 if ($inComment === '' && $cleanBuffer === false) {
|
Chris@0
|
677 $tokens[] = array(
|
Chris@0
|
678 'code' => T_STRING,
|
Chris@0
|
679 'type' => 'T_STRING',
|
Chris@0
|
680 'content' => str_replace("\n", $eolChar, $buffer),
|
Chris@0
|
681 );
|
Chris@0
|
682
|
Chris@0
|
683 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
684 $content = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
685 echo "\t=> Added token T_STRING ($content)".PHP_EOL;
|
Chris@0
|
686 }
|
Chris@0
|
687
|
Chris@0
|
688 $buffer = '';
|
Chris@0
|
689 }
|
Chris@0
|
690 }//end if
|
Chris@0
|
691
|
Chris@0
|
692 if ($cleanBuffer === true) {
|
Chris@0
|
693 $buffer = '';
|
Chris@0
|
694 $cleanBuffer = false;
|
Chris@0
|
695 }
|
Chris@0
|
696 }//end for
|
Chris@0
|
697
|
Chris@0
|
698 if (empty($buffer) === false) {
|
Chris@0
|
699 // Buffer contains whitespace from the end of the file.
|
Chris@0
|
700 $tokens[] = array(
|
Chris@0
|
701 'code' => T_WHITESPACE,
|
Chris@0
|
702 'type' => 'T_WHITESPACE',
|
Chris@0
|
703 'content' => str_replace("\n", $eolChar, $buffer),
|
Chris@0
|
704 );
|
Chris@0
|
705
|
Chris@0
|
706 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
707 $content = PHP_CodeSniffer::prepareForOutput($buffer);
|
Chris@0
|
708 echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
|
Chris@0
|
709 }
|
Chris@0
|
710 }
|
Chris@0
|
711
|
Chris@0
|
712 $tokens[] = array(
|
Chris@0
|
713 'code' => T_CLOSE_TAG,
|
Chris@0
|
714 'type' => 'T_CLOSE_TAG',
|
Chris@0
|
715 'content' => '',
|
Chris@0
|
716 );
|
Chris@0
|
717
|
Chris@0
|
718 /*
|
Chris@0
|
719 Now that we have done some basic tokenizing, we need to
|
Chris@0
|
720 modify the tokens to join some together and split some apart
|
Chris@0
|
721 so they match what the PHP tokenizer does.
|
Chris@0
|
722 */
|
Chris@0
|
723
|
Chris@0
|
724 $finalTokens = array();
|
Chris@0
|
725 $newStackPtr = 0;
|
Chris@0
|
726 $numTokens = count($tokens);
|
Chris@0
|
727 for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
|
Chris@0
|
728 $token = $tokens[$stackPtr];
|
Chris@0
|
729
|
Chris@0
|
730 /*
|
Chris@0
|
731 Look for comments and join the tokens together.
|
Chris@0
|
732 */
|
Chris@0
|
733
|
Chris@0
|
734 if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
|
Chris@0
|
735 $newContent = '';
|
Chris@0
|
736 $tokenContent = $token['content'];
|
Chris@0
|
737
|
Chris@0
|
738 $endContent = null;
|
Chris@0
|
739 if (isset($this->commentTokens[$tokenContent]) === true) {
|
Chris@0
|
740 $endContent = $this->commentTokens[$tokenContent];
|
Chris@0
|
741 }
|
Chris@0
|
742
|
Chris@0
|
743 while ($tokenContent !== $endContent) {
|
Chris@0
|
744 if ($endContent === null
|
Chris@0
|
745 && strpos($tokenContent, $eolChar) !== false
|
Chris@0
|
746 ) {
|
Chris@0
|
747 // A null end token means the comment ends at the end of
|
Chris@0
|
748 // the line so we look for newlines and split the token.
|
Chris@0
|
749 $tokens[$stackPtr]['content'] = substr(
|
Chris@0
|
750 $tokenContent,
|
Chris@0
|
751 (strpos($tokenContent, $eolChar) + strlen($eolChar))
|
Chris@0
|
752 );
|
Chris@0
|
753
|
Chris@0
|
754 $tokenContent = substr(
|
Chris@0
|
755 $tokenContent,
|
Chris@0
|
756 0,
|
Chris@0
|
757 (strpos($tokenContent, $eolChar) + strlen($eolChar))
|
Chris@0
|
758 );
|
Chris@0
|
759
|
Chris@0
|
760 // If the substr failed, skip the token as the content
|
Chris@0
|
761 // will now be blank.
|
Chris@0
|
762 if ($tokens[$stackPtr]['content'] !== false
|
Chris@0
|
763 && $tokens[$stackPtr]['content'] !== ''
|
Chris@0
|
764 ) {
|
Chris@0
|
765 $stackPtr--;
|
Chris@0
|
766 }
|
Chris@0
|
767
|
Chris@0
|
768 break;
|
Chris@0
|
769 }//end if
|
Chris@0
|
770
|
Chris@0
|
771 $stackPtr++;
|
Chris@0
|
772 $newContent .= $tokenContent;
|
Chris@0
|
773 if (isset($tokens[$stackPtr]) === false) {
|
Chris@0
|
774 break;
|
Chris@0
|
775 }
|
Chris@0
|
776
|
Chris@0
|
777 $tokenContent = $tokens[$stackPtr]['content'];
|
Chris@0
|
778 }//end while
|
Chris@0
|
779
|
Chris@0
|
780 if ($token['code'] === T_DOC_COMMENT) {
|
Chris@0
|
781 $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
|
Chris@0
|
782 foreach ($commentTokens as $commentToken) {
|
Chris@0
|
783 $finalTokens[$newStackPtr] = $commentToken;
|
Chris@0
|
784 $newStackPtr++;
|
Chris@0
|
785 }
|
Chris@0
|
786
|
Chris@0
|
787 continue;
|
Chris@0
|
788 } else {
|
Chris@0
|
789 // Save the new content in the current token so
|
Chris@0
|
790 // the code below can chop it up on newlines.
|
Chris@0
|
791 $token['content'] = $newContent.$tokenContent;
|
Chris@0
|
792 }
|
Chris@0
|
793 }//end if
|
Chris@0
|
794
|
Chris@0
|
795 /*
|
Chris@0
|
796 If this token has newlines in its content, split each line up
|
Chris@0
|
797 and create a new token for each line. We do this so it's easier
|
Chris@0
|
798 to ascertain where errors occur on a line.
|
Chris@0
|
799 Note that $token[1] is the token's content.
|
Chris@0
|
800 */
|
Chris@0
|
801
|
Chris@0
|
802 if (strpos($token['content'], $eolChar) !== false) {
|
Chris@0
|
803 $tokenLines = explode($eolChar, $token['content']);
|
Chris@0
|
804 $numLines = count($tokenLines);
|
Chris@0
|
805
|
Chris@0
|
806 for ($i = 0; $i < $numLines; $i++) {
|
Chris@0
|
807 $newToken['content'] = $tokenLines[$i];
|
Chris@0
|
808 if ($i === ($numLines - 1)) {
|
Chris@0
|
809 if ($tokenLines[$i] === '') {
|
Chris@0
|
810 break;
|
Chris@0
|
811 }
|
Chris@0
|
812 } else {
|
Chris@0
|
813 $newToken['content'] .= $eolChar;
|
Chris@0
|
814 }
|
Chris@0
|
815
|
Chris@0
|
816 $newToken['type'] = $token['type'];
|
Chris@0
|
817 $newToken['code'] = $token['code'];
|
Chris@0
|
818 $finalTokens[$newStackPtr] = $newToken;
|
Chris@0
|
819 $newStackPtr++;
|
Chris@0
|
820 }
|
Chris@0
|
821 } else {
|
Chris@0
|
822 $finalTokens[$newStackPtr] = $token;
|
Chris@0
|
823 $newStackPtr++;
|
Chris@0
|
824 }//end if
|
Chris@0
|
825
|
Chris@0
|
826 // Convert numbers, including decimals.
|
Chris@0
|
827 if ($token['code'] === T_STRING
|
Chris@0
|
828 || $token['code'] === T_OBJECT_OPERATOR
|
Chris@0
|
829 ) {
|
Chris@0
|
830 $newContent = '';
|
Chris@0
|
831 $oldStackPtr = $stackPtr;
|
Chris@0
|
832 while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
|
Chris@0
|
833 $newContent .= $tokens[$stackPtr]['content'];
|
Chris@0
|
834 $stackPtr++;
|
Chris@0
|
835 }
|
Chris@0
|
836
|
Chris@0
|
837 if ($newContent !== '' && $newContent !== '.') {
|
Chris@0
|
838 $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
|
Chris@0
|
839 if (ctype_digit($newContent) === true) {
|
Chris@0
|
840 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
|
Chris@0
|
841 $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
|
Chris@0
|
842 } else {
|
Chris@0
|
843 $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
|
Chris@0
|
844 $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
|
Chris@0
|
845 }
|
Chris@0
|
846
|
Chris@0
|
847 $stackPtr--;
|
Chris@0
|
848 continue;
|
Chris@0
|
849 } else {
|
Chris@0
|
850 $stackPtr = $oldStackPtr;
|
Chris@0
|
851 }
|
Chris@0
|
852 }//end if
|
Chris@0
|
853
|
Chris@0
|
854 // Convert the token after an object operator into a string, in most cases.
|
Chris@0
|
855 if ($token['code'] === T_OBJECT_OPERATOR) {
|
Chris@0
|
856 for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
|
Chris@0
|
857 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
|
Chris@0
|
858 continue;
|
Chris@0
|
859 }
|
Chris@0
|
860
|
Chris@0
|
861 if ($tokens[$i]['code'] !== T_PROTOTYPE
|
Chris@0
|
862 && $tokens[$i]['code'] !== T_LNUMBER
|
Chris@0
|
863 && $tokens[$i]['code'] !== T_DNUMBER
|
Chris@0
|
864 ) {
|
Chris@0
|
865 $tokens[$i]['code'] = T_STRING;
|
Chris@0
|
866 $tokens[$i]['type'] = 'T_STRING';
|
Chris@0
|
867 }
|
Chris@0
|
868
|
Chris@0
|
869 break;
|
Chris@0
|
870 }
|
Chris@0
|
871 }
|
Chris@0
|
872 }//end for
|
Chris@0
|
873
|
Chris@0
|
874 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
875 echo "\t*** END TOKENIZING ***".PHP_EOL;
|
Chris@0
|
876 }
|
Chris@0
|
877
|
Chris@0
|
878 return $finalTokens;
|
Chris@0
|
879
|
Chris@0
|
880 }//end tokenizeString()
|
Chris@0
|
881
|
Chris@0
|
882
|
Chris@0
|
883 /**
|
Chris@0
|
884 * Tokenizes a regular expression if one is found.
|
Chris@0
|
885 *
|
Chris@0
|
886 * If a regular expression is not found, NULL is returned.
|
Chris@0
|
887 *
|
Chris@0
|
888 * @param string $char The index of the possible regex start character.
|
Chris@0
|
889 * @param string $string The complete content of the string being tokenized.
|
Chris@0
|
890 * @param string $chars An array of characters being tokenized.
|
Chris@0
|
891 * @param string $tokens The current array of tokens found in the string.
|
Chris@0
|
892 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@0
|
893 *
|
Chris@0
|
894 * @return void
|
Chris@0
|
895 */
|
Chris@0
|
896 public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
|
Chris@0
|
897 {
|
Chris@0
|
898 $beforeTokens = array(
|
Chris@0
|
899 T_EQUAL => true,
|
Chris@0
|
900 T_IS_NOT_EQUAL => true,
|
Chris@0
|
901 T_IS_IDENTICAL => true,
|
Chris@0
|
902 T_IS_NOT_IDENTICAL => true,
|
Chris@0
|
903 T_OPEN_PARENTHESIS => true,
|
Chris@0
|
904 T_OPEN_SQUARE_BRACKET => true,
|
Chris@0
|
905 T_RETURN => true,
|
Chris@0
|
906 T_BOOLEAN_OR => true,
|
Chris@0
|
907 T_BOOLEAN_AND => true,
|
Chris@0
|
908 T_BITWISE_OR => true,
|
Chris@0
|
909 T_BITWISE_AND => true,
|
Chris@0
|
910 T_COMMA => true,
|
Chris@0
|
911 T_COLON => true,
|
Chris@0
|
912 T_TYPEOF => true,
|
Chris@0
|
913 T_INLINE_THEN => true,
|
Chris@0
|
914 T_INLINE_ELSE => true,
|
Chris@0
|
915 );
|
Chris@0
|
916
|
Chris@0
|
917 $afterTokens = array(
|
Chris@0
|
918 ',' => true,
|
Chris@0
|
919 ')' => true,
|
Chris@0
|
920 ']' => true,
|
Chris@0
|
921 ';' => true,
|
Chris@0
|
922 ' ' => true,
|
Chris@0
|
923 '.' => true,
|
Chris@0
|
924 ':' => true,
|
Chris@0
|
925 $eolChar => true,
|
Chris@0
|
926 );
|
Chris@0
|
927
|
Chris@0
|
928 // Find the last non-whitespace token that was added
|
Chris@0
|
929 // to the tokens array.
|
Chris@0
|
930 $numTokens = count($tokens);
|
Chris@0
|
931 for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
|
Chris@0
|
932 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
|
Chris@0
|
933 break;
|
Chris@0
|
934 }
|
Chris@0
|
935 }
|
Chris@0
|
936
|
Chris@0
|
937 if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
|
Chris@0
|
938 return null;
|
Chris@0
|
939 }
|
Chris@0
|
940
|
Chris@0
|
941 // This is probably a regular expression, so look for the end of it.
|
Chris@0
|
942 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
943 echo "\t* token possibly starts a regular expression *".PHP_EOL;
|
Chris@0
|
944 }
|
Chris@0
|
945
|
Chris@0
|
946 $numChars = count($chars);
|
Chris@0
|
947 for ($next = ($char + 1); $next < $numChars; $next++) {
|
Chris@0
|
948 if ($chars[$next] === '/') {
|
Chris@0
|
949 // Just make sure this is not escaped first.
|
Chris@0
|
950 if ($chars[($next - 1)] !== '\\') {
|
Chris@0
|
951 // In the simple form: /.../ so we found the end.
|
Chris@0
|
952 break;
|
Chris@0
|
953 } else if ($chars[($next - 2)] === '\\') {
|
Chris@0
|
954 // In the form: /...\\/ so we found the end.
|
Chris@0
|
955 break;
|
Chris@0
|
956 }
|
Chris@0
|
957 } else {
|
Chris@0
|
958 $possibleEolChar = substr($string, $next, strlen($eolChar));
|
Chris@0
|
959 if ($possibleEolChar === $eolChar) {
|
Chris@0
|
960 // This is the last token on the line and regular
|
Chris@0
|
961 // expressions need to be defined on a single line,
|
Chris@0
|
962 // so this is not a regular expression.
|
Chris@0
|
963 break;
|
Chris@0
|
964 }
|
Chris@0
|
965 }
|
Chris@0
|
966 }
|
Chris@0
|
967
|
Chris@0
|
968 if ($chars[$next] !== '/') {
|
Chris@0
|
969 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
970 echo "\t* could not find end of regular expression *".PHP_EOL;
|
Chris@0
|
971 }
|
Chris@0
|
972
|
Chris@0
|
973 return null;
|
Chris@0
|
974 }
|
Chris@0
|
975
|
Chris@0
|
976 while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
|
Chris@0
|
977 // The token directly after the end of the regex can
|
Chris@0
|
978 // be modifiers like global and case insensitive
|
Chris@0
|
979 // (.e.g, /pattern/gi).
|
Chris@0
|
980 $next++;
|
Chris@0
|
981 }
|
Chris@0
|
982
|
Chris@0
|
983 $regexEnd = $next;
|
Chris@0
|
984 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
985 echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
|
Chris@0
|
986 }
|
Chris@0
|
987
|
Chris@0
|
988 for ($next = ($next + 1); $next < $numChars; $next++) {
|
Chris@0
|
989 if ($chars[$next] !== ' ') {
|
Chris@0
|
990 break;
|
Chris@0
|
991 } else {
|
Chris@0
|
992 $possibleEolChar = substr($string, $next, strlen($eolChar));
|
Chris@0
|
993 if ($possibleEolChar === $eolChar) {
|
Chris@0
|
994 // This is the last token on the line.
|
Chris@0
|
995 break;
|
Chris@0
|
996 }
|
Chris@0
|
997 }
|
Chris@0
|
998 }
|
Chris@0
|
999
|
Chris@0
|
1000 if (isset($afterTokens[$chars[$next]]) === false) {
|
Chris@0
|
1001 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1002 echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
|
Chris@0
|
1003 }
|
Chris@0
|
1004
|
Chris@0
|
1005 return null;
|
Chris@0
|
1006 }
|
Chris@0
|
1007
|
Chris@0
|
1008 // This is a regular expression, so join all the tokens together.
|
Chris@0
|
1009 $content = '';
|
Chris@0
|
1010 for ($x = $char; $x <= $regexEnd; $x++) {
|
Chris@0
|
1011 $content .= $chars[$x];
|
Chris@0
|
1012 }
|
Chris@0
|
1013
|
Chris@0
|
1014 $token = array(
|
Chris@0
|
1015 'start' => $char,
|
Chris@0
|
1016 'end' => $regexEnd,
|
Chris@0
|
1017 'content' => $content,
|
Chris@0
|
1018 );
|
Chris@0
|
1019
|
Chris@0
|
1020 return $token;
|
Chris@0
|
1021
|
Chris@0
|
1022 }//end getRegexToken()
|
Chris@0
|
1023
|
Chris@0
|
1024
|
Chris@0
|
1025 /**
|
Chris@0
|
1026 * Performs additional processing after main tokenizing.
|
Chris@0
|
1027 *
|
Chris@0
|
1028 * This additional processing looks for properties, closures, labels and objects.
|
Chris@0
|
1029 *
|
Chris@0
|
1030 * @param array $tokens The array of tokens to process.
|
Chris@0
|
1031 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@0
|
1032 *
|
Chris@0
|
1033 * @return void
|
Chris@0
|
1034 */
|
Chris@0
|
1035 public function processAdditional(&$tokens, $eolChar)
|
Chris@0
|
1036 {
|
Chris@0
|
1037 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1038 echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
|
Chris@0
|
1039 }
|
Chris@0
|
1040
|
Chris@0
|
1041 $numTokens = count($tokens);
|
Chris@0
|
1042 $classStack = array();
|
Chris@0
|
1043
|
Chris@0
|
1044 for ($i = 0; $i < $numTokens; $i++) {
|
Chris@0
|
1045 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1046 $type = $tokens[$i]['type'];
|
Chris@0
|
1047 $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
|
Chris@0
|
1048
|
Chris@0
|
1049 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1050 echo "\tProcess token $i: $type => $content".PHP_EOL;
|
Chris@0
|
1051 }
|
Chris@0
|
1052
|
Chris@0
|
1053 // Looking for functions that are actually closures.
|
Chris@0
|
1054 if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
|
Chris@0
|
1055 for ($x = ($i + 1); $x < $numTokens; $x++) {
|
Chris@0
|
1056 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
|
Chris@0
|
1057 break;
|
Chris@0
|
1058 }
|
Chris@0
|
1059 }
|
Chris@0
|
1060
|
Chris@0
|
1061 if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
|
Chris@0
|
1062 $tokens[$i]['code'] = T_CLOSURE;
|
Chris@0
|
1063 $tokens[$i]['type'] = 'T_CLOSURE';
|
Chris@0
|
1064 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1065 $line = $tokens[$i]['line'];
|
Chris@0
|
1066 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1067 echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
|
Chris@0
|
1068 }
|
Chris@0
|
1069
|
Chris@0
|
1070 for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
|
Chris@0
|
1071 if (isset($tokens[$x]['conditions'][$i]) === false) {
|
Chris@0
|
1072 continue;
|
Chris@0
|
1073 }
|
Chris@0
|
1074
|
Chris@0
|
1075 $tokens[$x]['conditions'][$i] = T_CLOSURE;
|
Chris@0
|
1076 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1077 $type = $tokens[$x]['type'];
|
Chris@0
|
1078 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1079 echo "\t\t* cleaned $x ($type) *".PHP_EOL;
|
Chris@0
|
1080 }
|
Chris@0
|
1081 }
|
Chris@0
|
1082 }//end if
|
Chris@0
|
1083
|
Chris@0
|
1084 continue;
|
Chris@0
|
1085 } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
|
Chris@0
|
1086 && isset($tokens[$i]['scope_condition']) === false
|
Chris@0
|
1087 && isset($tokens[$i]['bracket_closer']) === true
|
Chris@0
|
1088 ) {
|
Chris@0
|
1089 $classStack[] = $i;
|
Chris@0
|
1090
|
Chris@0
|
1091 $closer = $tokens[$i]['bracket_closer'];
|
Chris@0
|
1092 $tokens[$i]['code'] = T_OBJECT;
|
Chris@0
|
1093 $tokens[$i]['type'] = 'T_OBJECT';
|
Chris@0
|
1094 $tokens[$closer]['code'] = T_CLOSE_OBJECT;
|
Chris@0
|
1095 $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
|
Chris@0
|
1096
|
Chris@0
|
1097 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1098 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1099 echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
|
Chris@0
|
1100 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1101 echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
|
Chris@0
|
1102 }
|
Chris@0
|
1103
|
Chris@0
|
1104 for ($x = ($i + 1); $x < $closer; $x++) {
|
Chris@0
|
1105 $tokens[$x]['conditions'][$i] = T_OBJECT;
|
Chris@0
|
1106 ksort($tokens[$x]['conditions'], SORT_NUMERIC);
|
Chris@0
|
1107 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1108 $type = $tokens[$x]['type'];
|
Chris@0
|
1109 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1110 echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
|
Chris@0
|
1111 }
|
Chris@0
|
1112 }
|
Chris@0
|
1113 } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
|
Chris@0
|
1114 $opener = array_pop($classStack);
|
Chris@0
|
1115 } else if ($tokens[$i]['code'] === T_COLON) {
|
Chris@0
|
1116 // If it is a scope opener, it belongs to a
|
Chris@0
|
1117 // DEFAULT or CASE statement.
|
Chris@0
|
1118 if (isset($tokens[$i]['scope_condition']) === true) {
|
Chris@0
|
1119 continue;
|
Chris@0
|
1120 }
|
Chris@0
|
1121
|
Chris@0
|
1122 // Make sure this is not part of an inline IF statement.
|
Chris@0
|
1123 for ($x = ($i - 1); $x >= 0; $x--) {
|
Chris@0
|
1124 if ($tokens[$x]['code'] === T_INLINE_THEN) {
|
Chris@0
|
1125 $tokens[$i]['code'] = T_INLINE_ELSE;
|
Chris@0
|
1126 $tokens[$i]['type'] = 'T_INLINE_ELSE';
|
Chris@0
|
1127
|
Chris@0
|
1128 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1129 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1130 echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
|
Chris@0
|
1131 }
|
Chris@0
|
1132
|
Chris@0
|
1133 continue(2);
|
Chris@0
|
1134 } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
|
Chris@0
|
1135 break;
|
Chris@0
|
1136 }
|
Chris@0
|
1137 }
|
Chris@0
|
1138
|
Chris@0
|
1139 // The string to the left of the colon is either a property or label.
|
Chris@0
|
1140 for ($label = ($i - 1); $label >= 0; $label--) {
|
Chris@0
|
1141 if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
|
Chris@0
|
1142 break;
|
Chris@0
|
1143 }
|
Chris@0
|
1144 }
|
Chris@0
|
1145
|
Chris@0
|
1146 if ($tokens[$label]['code'] !== T_STRING
|
Chris@0
|
1147 && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
|
Chris@0
|
1148 ) {
|
Chris@0
|
1149 continue;
|
Chris@0
|
1150 }
|
Chris@0
|
1151
|
Chris@0
|
1152 if (empty($classStack) === false) {
|
Chris@0
|
1153 $tokens[$label]['code'] = T_PROPERTY;
|
Chris@0
|
1154 $tokens[$label]['type'] = 'T_PROPERTY';
|
Chris@0
|
1155
|
Chris@0
|
1156 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1157 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1158 echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
|
Chris@0
|
1159 }
|
Chris@0
|
1160 } else {
|
Chris@0
|
1161 $tokens[$label]['code'] = T_LABEL;
|
Chris@0
|
1162 $tokens[$label]['type'] = 'T_LABEL';
|
Chris@0
|
1163
|
Chris@0
|
1164 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1165 echo str_repeat("\t", count($classStack));
|
Chris@0
|
1166 echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
|
Chris@0
|
1167 }
|
Chris@0
|
1168 }//end if
|
Chris@0
|
1169 }//end if
|
Chris@0
|
1170 }//end for
|
Chris@0
|
1171
|
Chris@0
|
1172 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
1173 echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
|
Chris@0
|
1174 }
|
Chris@0
|
1175
|
Chris@0
|
1176 }//end processAdditional()
|
Chris@0
|
1177
|
Chris@0
|
1178
|
Chris@0
|
1179 }//end class
|