Chris@0: Chris@0: * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) Chris@0: * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence Chris@0: * @link http://pear.php.net/package/PHP_CodeSniffer Chris@0: */ Chris@0: Chris@0: /** Chris@0: * Tokenizes doc block comments. Chris@0: * Chris@0: * @category PHP Chris@0: * @package PHP_CodeSniffer Chris@0: * @author Greg Sherwood Chris@0: * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600) Chris@0: * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence Chris@0: * @version Release: @package_version@ Chris@0: * @link http://pear.php.net/package/PHP_CodeSniffer Chris@0: */ Chris@0: class PHP_CodeSniffer_Tokenizers_Comment Chris@0: { Chris@0: Chris@0: Chris@0: /** Chris@0: * Creates an array of tokens when given some PHP code. Chris@0: * Chris@0: * Starts by using token_get_all() but does a lot of extra processing Chris@0: * to insert information about the context of the token. Chris@0: * Chris@0: * @param string $string The string to tokenize. Chris@0: * @param string $eolChar The EOL character to use for splitting strings. Chris@0: * @param int $stackPtr The position of the first token in the file. Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: public function tokenizeString($string, $eolChar, $stackPtr) Chris@0: { Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL; Chris@0: } Chris@0: Chris@0: $tokens = array(); Chris@0: $numChars = strlen($string); Chris@0: Chris@0: /* Chris@0: Doc block comments start with /*, but typically contain an Chris@0: extra star when they are used for function and class comments. Chris@0: */ Chris@0: Chris@0: $char = ($numChars - strlen(ltrim($string, '/*'))); Chris@0: $openTag = substr($string, 0, $char); Chris@0: $string = ltrim($string, '/*'); Chris@0: Chris@0: $tokens[$stackPtr] = array( Chris@0: 'content' => $openTag, Chris@0: 'code' => T_DOC_COMMENT_OPEN_TAG, Chris@0: 'type' => 'T_DOC_COMMENT_OPEN_TAG', Chris@0: 'comment_tags' => array(), Chris@0: ); Chris@0: Chris@0: $openPtr = $stackPtr; Chris@0: $stackPtr++; Chris@0: Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: $content = PHP_CodeSniffer::prepareForOutput($openTag); Chris@0: echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL; Chris@0: } Chris@0: Chris@0: /* Chris@0: Strip off the close tag so it doesn't interfere with any Chris@0: of our comment line processing. The token will be added to the Chris@0: stack just before we return it. Chris@0: */ Chris@0: Chris@0: $closeTag = array( Chris@0: 'content' => substr($string, strlen(rtrim($string, '/*'))), Chris@0: 'code' => T_DOC_COMMENT_CLOSE_TAG, Chris@0: 'type' => 'T_DOC_COMMENT_CLOSE_TAG', Chris@0: 'comment_opener' => $openPtr, Chris@0: ); Chris@0: Chris@0: if ($closeTag['content'] === false) { Chris@0: $closeTag['content'] = ''; Chris@0: } Chris@0: Chris@0: $string = rtrim($string, '/*'); Chris@0: Chris@0: /* Chris@0: Process each line of the comment. Chris@0: */ Chris@0: Chris@0: $lines = explode($eolChar, $string); Chris@0: $numLines = count($lines); Chris@0: foreach ($lines as $lineNum => $string) { Chris@0: if ($lineNum !== ($numLines - 1)) { Chris@0: $string .= $eolChar; Chris@0: } Chris@0: Chris@0: $char = 0; Chris@0: $numChars = strlen($string); Chris@0: Chris@0: // We've started a new line, so process the indent. Chris@0: $space = $this->_collectWhitespace($string, $char, $numChars); Chris@0: if ($space !== null) { Chris@0: $tokens[$stackPtr] = $space; Chris@0: $stackPtr++; Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: $content = PHP_CodeSniffer::prepareForOutput($space['content']); Chris@0: echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL; Chris@0: } Chris@0: Chris@0: $char += strlen($space['content']); Chris@0: if ($char === $numChars) { Chris@0: break; Chris@0: } Chris@0: } Chris@0: Chris@0: if ($string === '') { Chris@0: continue; Chris@0: } Chris@0: Chris@0: if ($string[$char] === '*') { Chris@0: // This is a function or class doc block line. Chris@0: $char++; Chris@0: $tokens[$stackPtr] = array( Chris@0: 'content' => '*', Chris@0: 'code' => T_DOC_COMMENT_STAR, Chris@0: 'type' => 'T_DOC_COMMENT_STAR', Chris@0: ); Chris@0: Chris@0: $stackPtr++; Chris@0: Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL; Chris@0: } Chris@0: } Chris@0: Chris@0: // Now we are ready to process the actual content of the line. Chris@0: $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars); Chris@0: foreach ($lineTokens as $lineToken) { Chris@0: $tokens[$stackPtr] = $lineToken; Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']); Chris@0: $type = $lineToken['type']; Chris@0: echo "\t\tCreate comment token: $type => $content".PHP_EOL; Chris@0: } Chris@0: Chris@0: if ($lineToken['code'] === T_DOC_COMMENT_TAG) { Chris@0: $tokens[$openPtr]['comment_tags'][] = $stackPtr; Chris@0: } Chris@0: Chris@0: $stackPtr++; Chris@0: } Chris@0: }//end foreach Chris@0: Chris@0: $tokens[$stackPtr] = $closeTag; Chris@0: $tokens[$openPtr]['comment_closer'] = $stackPtr; Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']); Chris@0: echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL; Chris@0: } Chris@0: Chris@0: if (PHP_CODESNIFFER_VERBOSITY > 1) { Chris@0: echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL; Chris@0: } Chris@0: Chris@0: return $tokens; Chris@0: Chris@0: }//end tokenizeString() Chris@0: Chris@0: Chris@0: /** Chris@0: * Process a single line of a comment. Chris@0: * Chris@0: * @param string $string The comment string being tokenized. Chris@0: * @param string $eolChar The EOL character to use for splitting strings. Chris@0: * @param int $start The position in the string to start processing. Chris@0: * @param int $end The position in the string to end processing. Chris@0: * Chris@0: * @return array Chris@0: */ Chris@0: private function _processLine($string, $eolChar, $start, $end) Chris@0: { Chris@0: $tokens = array(); Chris@0: Chris@0: // Collect content padding. Chris@0: $space = $this->_collectWhitespace($string, $start, $end); Chris@0: if ($space !== null) { Chris@0: $tokens[] = $space; Chris@0: $start += strlen($space['content']); Chris@0: } Chris@0: Chris@0: if (isset($string[$start]) === false) { Chris@0: return $tokens; Chris@0: } Chris@0: Chris@0: if ($string[$start] === '@') { Chris@0: // The content up until the first whitespace is the tag name. Chris@0: $matches = array(); Chris@0: preg_match('/@[^\s]+/', $string, $matches, 0, $start); Chris@0: if (isset($matches[0]) === true) { Chris@0: $tagName = $matches[0]; Chris@0: $start += strlen($tagName); Chris@0: $tokens[] = array( Chris@0: 'content' => $tagName, Chris@0: 'code' => T_DOC_COMMENT_TAG, Chris@0: 'type' => 'T_DOC_COMMENT_TAG', Chris@0: ); Chris@0: Chris@0: // Then there will be some whitespace. Chris@0: $space = $this->_collectWhitespace($string, $start, $end); Chris@0: if ($space !== null) { Chris@0: $tokens[] = $space; Chris@0: $start += strlen($space['content']); Chris@0: } Chris@0: } Chris@0: }//end if Chris@0: Chris@0: // Process the rest of the line. Chris@0: $eol = strpos($string, $eolChar, $start); Chris@0: if ($eol === false) { Chris@0: $eol = $end; Chris@0: } Chris@0: Chris@0: if ($eol > $start) { Chris@0: $tokens[] = array( Chris@0: 'content' => substr($string, $start, ($eol - $start)), Chris@0: 'code' => T_DOC_COMMENT_STRING, Chris@0: 'type' => 'T_DOC_COMMENT_STRING', Chris@0: ); Chris@0: } Chris@0: Chris@0: if ($eol !== $end) { Chris@0: $tokens[] = array( Chris@0: 'content' => substr($string, $eol, strlen($eolChar)), Chris@0: 'code' => T_DOC_COMMENT_WHITESPACE, Chris@0: 'type' => 'T_DOC_COMMENT_WHITESPACE', Chris@0: ); Chris@0: } Chris@0: Chris@0: return $tokens; Chris@0: Chris@0: }//end _processLine() Chris@0: Chris@0: Chris@0: /** Chris@0: * Collect consecutive whitespace into a single token. Chris@0: * Chris@0: * @param string $string The comment string being tokenized. Chris@0: * @param int $start The position in the string to start processing. Chris@0: * @param int $end The position in the string to end processing. Chris@0: * Chris@0: * @return array|null Chris@0: */ Chris@0: private function _collectWhitespace($string, $start, $end) Chris@0: { Chris@0: $space = ''; Chris@0: for ($start; $start < $end; $start++) { Chris@0: if ($string[$start] !== ' ' && $string[$start] !== "\t") { Chris@0: break; Chris@0: } Chris@0: Chris@0: $space .= $string[$start]; Chris@0: } Chris@0: Chris@0: if ($space === '') { Chris@0: return null; Chris@0: } Chris@0: Chris@0: $token = array( Chris@0: 'content' => $space, Chris@0: 'code' => T_DOC_COMMENT_WHITESPACE, Chris@0: 'type' => 'T_DOC_COMMENT_WHITESPACE', Chris@0: ); Chris@0: Chris@0: return $token; Chris@0: Chris@0: }//end _collectWhitespace() Chris@0: Chris@0: Chris@0: }//end class