Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * Tokenizes doc block comments.
|
Chris@0
|
4 *
|
Chris@0
|
5 * PHP version 5
|
Chris@0
|
6 *
|
Chris@0
|
7 * @category PHP
|
Chris@0
|
8 * @package PHP_CodeSniffer
|
Chris@0
|
9 * @author Greg Sherwood <gsherwood@squiz.net>
|
Chris@0
|
10 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
|
Chris@0
|
11 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
Chris@0
|
12 * @link http://pear.php.net/package/PHP_CodeSniffer
|
Chris@0
|
13 */
|
Chris@0
|
14
|
Chris@0
|
15 /**
|
Chris@0
|
16 * Tokenizes doc block comments.
|
Chris@0
|
17 *
|
Chris@0
|
18 * @category PHP
|
Chris@0
|
19 * @package PHP_CodeSniffer
|
Chris@0
|
20 * @author Greg Sherwood <gsherwood@squiz.net>
|
Chris@0
|
21 * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
|
Chris@0
|
22 * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
|
Chris@0
|
23 * @version Release: @package_version@
|
Chris@0
|
24 * @link http://pear.php.net/package/PHP_CodeSniffer
|
Chris@0
|
25 */
|
Chris@0
|
26 class PHP_CodeSniffer_Tokenizers_Comment
|
Chris@0
|
27 {
|
Chris@0
|
28
|
Chris@0
|
29
|
Chris@0
|
30 /**
|
Chris@0
|
31 * Creates an array of tokens when given some PHP code.
|
Chris@0
|
32 *
|
Chris@0
|
33 * Starts by using token_get_all() but does a lot of extra processing
|
Chris@0
|
34 * to insert information about the context of the token.
|
Chris@0
|
35 *
|
Chris@0
|
36 * @param string $string The string to tokenize.
|
Chris@0
|
37 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@0
|
38 * @param int $stackPtr The position of the first token in the file.
|
Chris@0
|
39 *
|
Chris@0
|
40 * @return array
|
Chris@0
|
41 */
|
Chris@0
|
42 public function tokenizeString($string, $eolChar, $stackPtr)
|
Chris@0
|
43 {
|
Chris@0
|
44 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
45 echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
|
Chris@0
|
46 }
|
Chris@0
|
47
|
Chris@0
|
48 $tokens = array();
|
Chris@0
|
49 $numChars = strlen($string);
|
Chris@0
|
50
|
Chris@0
|
51 /*
|
Chris@0
|
52 Doc block comments start with /*, but typically contain an
|
Chris@0
|
53 extra star when they are used for function and class comments.
|
Chris@0
|
54 */
|
Chris@0
|
55
|
Chris@0
|
56 $char = ($numChars - strlen(ltrim($string, '/*')));
|
Chris@0
|
57 $openTag = substr($string, 0, $char);
|
Chris@0
|
58 $string = ltrim($string, '/*');
|
Chris@0
|
59
|
Chris@0
|
60 $tokens[$stackPtr] = array(
|
Chris@0
|
61 'content' => $openTag,
|
Chris@0
|
62 'code' => T_DOC_COMMENT_OPEN_TAG,
|
Chris@0
|
63 'type' => 'T_DOC_COMMENT_OPEN_TAG',
|
Chris@0
|
64 'comment_tags' => array(),
|
Chris@0
|
65 );
|
Chris@0
|
66
|
Chris@0
|
67 $openPtr = $stackPtr;
|
Chris@0
|
68 $stackPtr++;
|
Chris@0
|
69
|
Chris@0
|
70 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
71 $content = PHP_CodeSniffer::prepareForOutput($openTag);
|
Chris@0
|
72 echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
|
Chris@0
|
73 }
|
Chris@0
|
74
|
Chris@0
|
75 /*
|
Chris@0
|
76 Strip off the close tag so it doesn't interfere with any
|
Chris@0
|
77 of our comment line processing. The token will be added to the
|
Chris@0
|
78 stack just before we return it.
|
Chris@0
|
79 */
|
Chris@0
|
80
|
Chris@0
|
81 $closeTag = array(
|
Chris@0
|
82 'content' => substr($string, strlen(rtrim($string, '/*'))),
|
Chris@0
|
83 'code' => T_DOC_COMMENT_CLOSE_TAG,
|
Chris@0
|
84 'type' => 'T_DOC_COMMENT_CLOSE_TAG',
|
Chris@0
|
85 'comment_opener' => $openPtr,
|
Chris@0
|
86 );
|
Chris@0
|
87
|
Chris@0
|
88 if ($closeTag['content'] === false) {
|
Chris@0
|
89 $closeTag['content'] = '';
|
Chris@0
|
90 }
|
Chris@0
|
91
|
Chris@0
|
92 $string = rtrim($string, '/*');
|
Chris@0
|
93
|
Chris@0
|
94 /*
|
Chris@0
|
95 Process each line of the comment.
|
Chris@0
|
96 */
|
Chris@0
|
97
|
Chris@0
|
98 $lines = explode($eolChar, $string);
|
Chris@0
|
99 $numLines = count($lines);
|
Chris@0
|
100 foreach ($lines as $lineNum => $string) {
|
Chris@0
|
101 if ($lineNum !== ($numLines - 1)) {
|
Chris@0
|
102 $string .= $eolChar;
|
Chris@0
|
103 }
|
Chris@0
|
104
|
Chris@0
|
105 $char = 0;
|
Chris@0
|
106 $numChars = strlen($string);
|
Chris@0
|
107
|
Chris@0
|
108 // We've started a new line, so process the indent.
|
Chris@0
|
109 $space = $this->_collectWhitespace($string, $char, $numChars);
|
Chris@0
|
110 if ($space !== null) {
|
Chris@0
|
111 $tokens[$stackPtr] = $space;
|
Chris@0
|
112 $stackPtr++;
|
Chris@0
|
113 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
114 $content = PHP_CodeSniffer::prepareForOutput($space['content']);
|
Chris@0
|
115 echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
|
Chris@0
|
116 }
|
Chris@0
|
117
|
Chris@0
|
118 $char += strlen($space['content']);
|
Chris@0
|
119 if ($char === $numChars) {
|
Chris@0
|
120 break;
|
Chris@0
|
121 }
|
Chris@0
|
122 }
|
Chris@0
|
123
|
Chris@0
|
124 if ($string === '') {
|
Chris@0
|
125 continue;
|
Chris@0
|
126 }
|
Chris@0
|
127
|
Chris@0
|
128 if ($string[$char] === '*') {
|
Chris@0
|
129 // This is a function or class doc block line.
|
Chris@0
|
130 $char++;
|
Chris@0
|
131 $tokens[$stackPtr] = array(
|
Chris@0
|
132 'content' => '*',
|
Chris@0
|
133 'code' => T_DOC_COMMENT_STAR,
|
Chris@0
|
134 'type' => 'T_DOC_COMMENT_STAR',
|
Chris@0
|
135 );
|
Chris@0
|
136
|
Chris@0
|
137 $stackPtr++;
|
Chris@0
|
138
|
Chris@0
|
139 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
140 echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
|
Chris@0
|
141 }
|
Chris@0
|
142 }
|
Chris@0
|
143
|
Chris@0
|
144 // Now we are ready to process the actual content of the line.
|
Chris@0
|
145 $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars);
|
Chris@0
|
146 foreach ($lineTokens as $lineToken) {
|
Chris@0
|
147 $tokens[$stackPtr] = $lineToken;
|
Chris@0
|
148 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
149 $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']);
|
Chris@0
|
150 $type = $lineToken['type'];
|
Chris@0
|
151 echo "\t\tCreate comment token: $type => $content".PHP_EOL;
|
Chris@0
|
152 }
|
Chris@0
|
153
|
Chris@0
|
154 if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
|
Chris@0
|
155 $tokens[$openPtr]['comment_tags'][] = $stackPtr;
|
Chris@0
|
156 }
|
Chris@0
|
157
|
Chris@0
|
158 $stackPtr++;
|
Chris@0
|
159 }
|
Chris@0
|
160 }//end foreach
|
Chris@0
|
161
|
Chris@0
|
162 $tokens[$stackPtr] = $closeTag;
|
Chris@0
|
163 $tokens[$openPtr]['comment_closer'] = $stackPtr;
|
Chris@0
|
164 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
165 $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']);
|
Chris@0
|
166 echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
|
Chris@0
|
167 }
|
Chris@0
|
168
|
Chris@0
|
169 if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
Chris@0
|
170 echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
|
Chris@0
|
171 }
|
Chris@0
|
172
|
Chris@0
|
173 return $tokens;
|
Chris@0
|
174
|
Chris@0
|
175 }//end tokenizeString()
|
Chris@0
|
176
|
Chris@0
|
177
|
Chris@0
|
178 /**
|
Chris@0
|
179 * Process a single line of a comment.
|
Chris@0
|
180 *
|
Chris@0
|
181 * @param string $string The comment string being tokenized.
|
Chris@0
|
182 * @param string $eolChar The EOL character to use for splitting strings.
|
Chris@0
|
183 * @param int $start The position in the string to start processing.
|
Chris@0
|
184 * @param int $end The position in the string to end processing.
|
Chris@0
|
185 *
|
Chris@0
|
186 * @return array
|
Chris@0
|
187 */
|
Chris@0
|
188 private function _processLine($string, $eolChar, $start, $end)
|
Chris@0
|
189 {
|
Chris@0
|
190 $tokens = array();
|
Chris@0
|
191
|
Chris@0
|
192 // Collect content padding.
|
Chris@0
|
193 $space = $this->_collectWhitespace($string, $start, $end);
|
Chris@0
|
194 if ($space !== null) {
|
Chris@0
|
195 $tokens[] = $space;
|
Chris@0
|
196 $start += strlen($space['content']);
|
Chris@0
|
197 }
|
Chris@0
|
198
|
Chris@0
|
199 if (isset($string[$start]) === false) {
|
Chris@0
|
200 return $tokens;
|
Chris@0
|
201 }
|
Chris@0
|
202
|
Chris@0
|
203 if ($string[$start] === '@') {
|
Chris@0
|
204 // The content up until the first whitespace is the tag name.
|
Chris@0
|
205 $matches = array();
|
Chris@0
|
206 preg_match('/@[^\s]+/', $string, $matches, 0, $start);
|
Chris@0
|
207 if (isset($matches[0]) === true) {
|
Chris@0
|
208 $tagName = $matches[0];
|
Chris@0
|
209 $start += strlen($tagName);
|
Chris@0
|
210 $tokens[] = array(
|
Chris@0
|
211 'content' => $tagName,
|
Chris@0
|
212 'code' => T_DOC_COMMENT_TAG,
|
Chris@0
|
213 'type' => 'T_DOC_COMMENT_TAG',
|
Chris@0
|
214 );
|
Chris@0
|
215
|
Chris@0
|
216 // Then there will be some whitespace.
|
Chris@0
|
217 $space = $this->_collectWhitespace($string, $start, $end);
|
Chris@0
|
218 if ($space !== null) {
|
Chris@0
|
219 $tokens[] = $space;
|
Chris@0
|
220 $start += strlen($space['content']);
|
Chris@0
|
221 }
|
Chris@0
|
222 }
|
Chris@0
|
223 }//end if
|
Chris@0
|
224
|
Chris@0
|
225 // Process the rest of the line.
|
Chris@0
|
226 $eol = strpos($string, $eolChar, $start);
|
Chris@0
|
227 if ($eol === false) {
|
Chris@0
|
228 $eol = $end;
|
Chris@0
|
229 }
|
Chris@0
|
230
|
Chris@0
|
231 if ($eol > $start) {
|
Chris@0
|
232 $tokens[] = array(
|
Chris@0
|
233 'content' => substr($string, $start, ($eol - $start)),
|
Chris@0
|
234 'code' => T_DOC_COMMENT_STRING,
|
Chris@0
|
235 'type' => 'T_DOC_COMMENT_STRING',
|
Chris@0
|
236 );
|
Chris@0
|
237 }
|
Chris@0
|
238
|
Chris@0
|
239 if ($eol !== $end) {
|
Chris@0
|
240 $tokens[] = array(
|
Chris@0
|
241 'content' => substr($string, $eol, strlen($eolChar)),
|
Chris@0
|
242 'code' => T_DOC_COMMENT_WHITESPACE,
|
Chris@0
|
243 'type' => 'T_DOC_COMMENT_WHITESPACE',
|
Chris@0
|
244 );
|
Chris@0
|
245 }
|
Chris@0
|
246
|
Chris@0
|
247 return $tokens;
|
Chris@0
|
248
|
Chris@0
|
249 }//end _processLine()
|
Chris@0
|
250
|
Chris@0
|
251
|
Chris@0
|
252 /**
|
Chris@0
|
253 * Collect consecutive whitespace into a single token.
|
Chris@0
|
254 *
|
Chris@0
|
255 * @param string $string The comment string being tokenized.
|
Chris@0
|
256 * @param int $start The position in the string to start processing.
|
Chris@0
|
257 * @param int $end The position in the string to end processing.
|
Chris@0
|
258 *
|
Chris@0
|
259 * @return array|null
|
Chris@0
|
260 */
|
Chris@0
|
261 private function _collectWhitespace($string, $start, $end)
|
Chris@0
|
262 {
|
Chris@0
|
263 $space = '';
|
Chris@0
|
264 for ($start; $start < $end; $start++) {
|
Chris@0
|
265 if ($string[$start] !== ' ' && $string[$start] !== "\t") {
|
Chris@0
|
266 break;
|
Chris@0
|
267 }
|
Chris@0
|
268
|
Chris@0
|
269 $space .= $string[$start];
|
Chris@0
|
270 }
|
Chris@0
|
271
|
Chris@0
|
272 if ($space === '') {
|
Chris@0
|
273 return null;
|
Chris@0
|
274 }
|
Chris@0
|
275
|
Chris@0
|
276 $token = array(
|
Chris@0
|
277 'content' => $space,
|
Chris@0
|
278 'code' => T_DOC_COMMENT_WHITESPACE,
|
Chris@0
|
279 'type' => 'T_DOC_COMMENT_WHITESPACE',
|
Chris@0
|
280 );
|
Chris@0
|
281
|
Chris@0
|
282 return $token;
|
Chris@0
|
283
|
Chris@0
|
284 }//end _collectWhitespace()
|
Chris@0
|
285
|
Chris@0
|
286
|
Chris@0
|
287 }//end class
|