Chris@17
|
1 <?php
|
Chris@17
|
2 /**
|
Chris@17
|
3 * Tokenizes doc block comments.
|
Chris@17
|
4 *
|
Chris@17
|
5 * @author Greg Sherwood <gsherwood@squiz.net>
|
Chris@17
|
6 * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
|
Chris@17
|
7 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
Chris@17
|
8 */
|
Chris@17
|
9
|
Chris@17
|
10 namespace PHP_CodeSniffer\Tokenizers;
|
Chris@17
|
11
|
Chris@17
|
12 use PHP_CodeSniffer\Util;
|
Chris@17
|
13
|
Chris@17
|
14 class Comment
|
Chris@17
|
15 {
|
Chris@17
|
16
|
Chris@17
|
17
|
Chris@17
|
18 /**
|
Chris@17
|
19 * Creates an array of tokens when given some PHP code.
|
Chris@17
|
20 *
|
Chris@17
|
21 * Starts by using token_get_all() but does a lot of extra processing
|
Chris@17
|
22 * to insert information about the context of the token.
|
Chris@17
|
23 *
|
Chris@17
|
24 * @param string $string The string to tokenize.
|
Chris@17
|
25 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@17
|
26 * @param int $stackPtr The position of the first token in the file.
|
Chris@17
|
27 *
|
Chris@17
|
28 * @return array
|
Chris@17
|
29 */
|
Chris@17
|
30 public function tokenizeString($string, $eolChar, $stackPtr)
|
Chris@17
|
31 {
|
Chris@17
|
32 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
33 echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
|
Chris@17
|
34 }
|
Chris@17
|
35
|
Chris@17
|
36 $tokens = [];
|
Chris@17
|
37 $numChars = strlen($string);
|
Chris@17
|
38
|
Chris@17
|
39 /*
|
Chris@17
|
40 Doc block comments start with /*, but typically contain an
|
Chris@17
|
41 extra star when they are used for function and class comments.
|
Chris@17
|
42 */
|
Chris@17
|
43
|
Chris@17
|
44 $char = ($numChars - strlen(ltrim($string, '/*')));
|
Chris@17
|
45 $openTag = substr($string, 0, $char);
|
Chris@17
|
46 $string = ltrim($string, '/*');
|
Chris@17
|
47
|
Chris@17
|
48 $tokens[$stackPtr] = [
|
Chris@17
|
49 'content' => $openTag,
|
Chris@17
|
50 'code' => T_DOC_COMMENT_OPEN_TAG,
|
Chris@17
|
51 'type' => 'T_DOC_COMMENT_OPEN_TAG',
|
Chris@17
|
52 'comment_tags' => [],
|
Chris@17
|
53 ];
|
Chris@17
|
54
|
Chris@17
|
55 $openPtr = $stackPtr;
|
Chris@17
|
56 $stackPtr++;
|
Chris@17
|
57
|
Chris@17
|
58 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
59 $content = Util\Common::prepareForOutput($openTag);
|
Chris@17
|
60 echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
|
Chris@17
|
61 }
|
Chris@17
|
62
|
Chris@17
|
63 /*
|
Chris@17
|
64 Strip off the close tag so it doesn't interfere with any
|
Chris@17
|
65 of our comment line processing. The token will be added to the
|
Chris@17
|
66 stack just before we return it.
|
Chris@17
|
67 */
|
Chris@17
|
68
|
Chris@17
|
69 $closeTag = [
|
Chris@17
|
70 'content' => substr($string, strlen(rtrim($string, '/*'))),
|
Chris@17
|
71 'code' => T_DOC_COMMENT_CLOSE_TAG,
|
Chris@17
|
72 'type' => 'T_DOC_COMMENT_CLOSE_TAG',
|
Chris@17
|
73 'comment_opener' => $openPtr,
|
Chris@17
|
74 ];
|
Chris@17
|
75
|
Chris@17
|
76 if ($closeTag['content'] === false) {
|
Chris@17
|
77 $closeTag['content'] = '';
|
Chris@17
|
78 }
|
Chris@17
|
79
|
Chris@17
|
80 $string = rtrim($string, '/*');
|
Chris@17
|
81
|
Chris@17
|
82 /*
|
Chris@17
|
83 Process each line of the comment.
|
Chris@17
|
84 */
|
Chris@17
|
85
|
Chris@17
|
86 $lines = explode($eolChar, $string);
|
Chris@17
|
87 $numLines = count($lines);
|
Chris@17
|
88 foreach ($lines as $lineNum => $string) {
|
Chris@17
|
89 if ($lineNum !== ($numLines - 1)) {
|
Chris@17
|
90 $string .= $eolChar;
|
Chris@17
|
91 }
|
Chris@17
|
92
|
Chris@17
|
93 $char = 0;
|
Chris@17
|
94 $numChars = strlen($string);
|
Chris@17
|
95
|
Chris@17
|
96 // We've started a new line, so process the indent.
|
Chris@17
|
97 $space = $this->collectWhitespace($string, $char, $numChars);
|
Chris@17
|
98 if ($space !== null) {
|
Chris@17
|
99 $tokens[$stackPtr] = $space;
|
Chris@17
|
100 $stackPtr++;
|
Chris@17
|
101 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
102 $content = Util\Common::prepareForOutput($space['content']);
|
Chris@17
|
103 echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
|
Chris@17
|
104 }
|
Chris@17
|
105
|
Chris@17
|
106 $char += strlen($space['content']);
|
Chris@17
|
107 if ($char === $numChars) {
|
Chris@17
|
108 break;
|
Chris@17
|
109 }
|
Chris@17
|
110 }
|
Chris@17
|
111
|
Chris@17
|
112 if ($string === '') {
|
Chris@17
|
113 continue;
|
Chris@17
|
114 }
|
Chris@17
|
115
|
Chris@17
|
116 if ($lineNum > 0 && $string[$char] === '*') {
|
Chris@17
|
117 // This is a function or class doc block line.
|
Chris@17
|
118 $char++;
|
Chris@17
|
119 $tokens[$stackPtr] = [
|
Chris@17
|
120 'content' => '*',
|
Chris@17
|
121 'code' => T_DOC_COMMENT_STAR,
|
Chris@17
|
122 'type' => 'T_DOC_COMMENT_STAR',
|
Chris@17
|
123 ];
|
Chris@17
|
124
|
Chris@17
|
125 $stackPtr++;
|
Chris@17
|
126
|
Chris@17
|
127 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
128 echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
|
Chris@17
|
129 }
|
Chris@17
|
130 }
|
Chris@17
|
131
|
Chris@17
|
132 // Now we are ready to process the actual content of the line.
|
Chris@17
|
133 $lineTokens = $this->processLine($string, $eolChar, $char, $numChars);
|
Chris@17
|
134 foreach ($lineTokens as $lineToken) {
|
Chris@17
|
135 $tokens[$stackPtr] = $lineToken;
|
Chris@17
|
136 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
137 $content = Util\Common::prepareForOutput($lineToken['content']);
|
Chris@17
|
138 $type = $lineToken['type'];
|
Chris@17
|
139 echo "\t\tCreate comment token: $type => $content".PHP_EOL;
|
Chris@17
|
140 }
|
Chris@17
|
141
|
Chris@17
|
142 if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
|
Chris@17
|
143 $tokens[$openPtr]['comment_tags'][] = $stackPtr;
|
Chris@17
|
144 }
|
Chris@17
|
145
|
Chris@17
|
146 $stackPtr++;
|
Chris@17
|
147 }
|
Chris@17
|
148 }//end foreach
|
Chris@17
|
149
|
Chris@17
|
150 $tokens[$stackPtr] = $closeTag;
|
Chris@17
|
151 $tokens[$openPtr]['comment_closer'] = $stackPtr;
|
Chris@17
|
152 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
153 $content = Util\Common::prepareForOutput($closeTag['content']);
|
Chris@17
|
154 echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
|
Chris@17
|
155 }
|
Chris@17
|
156
|
Chris@17
|
157 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@17
|
158 echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
|
Chris@17
|
159 }
|
Chris@17
|
160
|
Chris@17
|
161 return $tokens;
|
Chris@17
|
162
|
Chris@17
|
163 }//end tokenizeString()
|
Chris@17
|
164
|
Chris@17
|
165
|
Chris@17
|
166 /**
|
Chris@17
|
167 * Process a single line of a comment.
|
Chris@17
|
168 *
|
Chris@17
|
169 * @param string $string The comment string being tokenized.
|
Chris@17
|
170 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@17
|
171 * @param int $start The position in the string to start processing.
|
Chris@17
|
172 * @param int $end The position in the string to end processing.
|
Chris@17
|
173 *
|
Chris@17
|
174 * @return array
|
Chris@17
|
175 */
|
Chris@17
|
176 private function processLine($string, $eolChar, $start, $end)
|
Chris@17
|
177 {
|
Chris@17
|
178 $tokens = [];
|
Chris@17
|
179
|
Chris@17
|
180 // Collect content padding.
|
Chris@17
|
181 $space = $this->collectWhitespace($string, $start, $end);
|
Chris@17
|
182 if ($space !== null) {
|
Chris@17
|
183 $tokens[] = $space;
|
Chris@17
|
184 $start += strlen($space['content']);
|
Chris@17
|
185 }
|
Chris@17
|
186
|
Chris@17
|
187 if (isset($string[$start]) === false) {
|
Chris@17
|
188 return $tokens;
|
Chris@17
|
189 }
|
Chris@17
|
190
|
Chris@17
|
191 if ($string[$start] === '@') {
|
Chris@17
|
192 // The content up until the first whitespace is the tag name.
|
Chris@17
|
193 $matches = [];
|
Chris@17
|
194 preg_match('/@[^\s]+/', $string, $matches, 0, $start);
|
Chris@17
|
195 if (isset($matches[0]) === true
|
Chris@17
|
196 && substr(strtolower($matches[0]), 0, 7) !== '@phpcs:'
|
Chris@17
|
197 ) {
|
Chris@17
|
198 $tagName = $matches[0];
|
Chris@17
|
199 $start += strlen($tagName);
|
Chris@17
|
200 $tokens[] = [
|
Chris@17
|
201 'content' => $tagName,
|
Chris@17
|
202 'code' => T_DOC_COMMENT_TAG,
|
Chris@17
|
203 'type' => 'T_DOC_COMMENT_TAG',
|
Chris@17
|
204 ];
|
Chris@17
|
205
|
Chris@17
|
206 // Then there will be some whitespace.
|
Chris@17
|
207 $space = $this->collectWhitespace($string, $start, $end);
|
Chris@17
|
208 if ($space !== null) {
|
Chris@17
|
209 $tokens[] = $space;
|
Chris@17
|
210 $start += strlen($space['content']);
|
Chris@17
|
211 }
|
Chris@17
|
212 }
|
Chris@17
|
213 }//end if
|
Chris@17
|
214
|
Chris@17
|
215 // Process the rest of the line.
|
Chris@17
|
216 $eol = strpos($string, $eolChar, $start);
|
Chris@17
|
217 if ($eol === false) {
|
Chris@17
|
218 $eol = $end;
|
Chris@17
|
219 }
|
Chris@17
|
220
|
Chris@17
|
221 if ($eol > $start) {
|
Chris@17
|
222 $tokens[] = [
|
Chris@17
|
223 'content' => substr($string, $start, ($eol - $start)),
|
Chris@17
|
224 'code' => T_DOC_COMMENT_STRING,
|
Chris@17
|
225 'type' => 'T_DOC_COMMENT_STRING',
|
Chris@17
|
226 ];
|
Chris@17
|
227 }
|
Chris@17
|
228
|
Chris@17
|
229 if ($eol !== $end) {
|
Chris@17
|
230 $tokens[] = [
|
Chris@17
|
231 'content' => substr($string, $eol, strlen($eolChar)),
|
Chris@17
|
232 'code' => T_DOC_COMMENT_WHITESPACE,
|
Chris@17
|
233 'type' => 'T_DOC_COMMENT_WHITESPACE',
|
Chris@17
|
234 ];
|
Chris@17
|
235 }
|
Chris@17
|
236
|
Chris@17
|
237 return $tokens;
|
Chris@17
|
238
|
Chris@17
|
239 }//end processLine()
|
Chris@17
|
240
|
Chris@17
|
241
|
Chris@17
|
242 /**
|
Chris@17
|
243 * Collect consecutive whitespace into a single token.
|
Chris@17
|
244 *
|
Chris@17
|
245 * @param string $string The comment string being tokenized.
|
Chris@17
|
246 * @param int $start The position in the string to start processing.
|
Chris@17
|
247 * @param int $end The position in the string to end processing.
|
Chris@17
|
248 *
|
Chris@17
|
249 * @return array|null
|
Chris@17
|
250 */
|
Chris@17
|
251 private function collectWhitespace($string, $start, $end)
|
Chris@17
|
252 {
|
Chris@17
|
253 $space = '';
|
Chris@17
|
254 for ($start; $start < $end; $start++) {
|
Chris@17
|
255 if ($string[$start] !== ' ' && $string[$start] !== "\t") {
|
Chris@17
|
256 break;
|
Chris@17
|
257 }
|
Chris@17
|
258
|
Chris@17
|
259 $space .= $string[$start];
|
Chris@17
|
260 }
|
Chris@17
|
261
|
Chris@17
|
262 if ($space === '') {
|
Chris@17
|
263 return null;
|
Chris@17
|
264 }
|
Chris@17
|
265
|
Chris@17
|
266 $token = [
|
Chris@17
|
267 'content' => $space,
|
Chris@17
|
268 'code' => T_DOC_COMMENT_WHITESPACE,
|
Chris@17
|
269 'type' => 'T_DOC_COMMENT_WHITESPACE',
|
Chris@17
|
270 ];
|
Chris@17
|
271
|
Chris@17
|
272 return $token;
|
Chris@17
|
273
|
Chris@17
|
274 }//end collectWhitespace()
|
Chris@17
|
275
|
Chris@17
|
276
|
Chris@17
|
277 }//end class
|