Chris@0: <?php
Chris@0: /**
Chris@0:  * Tokenizes doc block comments.
Chris@0:  *
Chris@0:  * PHP version 5
Chris@0:  *
Chris@0:  * @category  PHP
Chris@0:  * @package   PHP_CodeSniffer
Chris@0:  * @author    Greg Sherwood <gsherwood@squiz.net>
Chris@0:  * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@0:  * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@0:  * @link      http://pear.php.net/package/PHP_CodeSniffer
Chris@0:  */
Chris@0: 
Chris@0: /**
Chris@0:  * Tokenizes doc block comments.
Chris@0:  *
Chris@0:  * @category  PHP
Chris@0:  * @package   PHP_CodeSniffer
Chris@0:  * @author    Greg Sherwood <gsherwood@squiz.net>
Chris@0:  * @copyright 2006-2012 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@0:  * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@0:  * @version   Release: @package_version@
Chris@0:  * @link      http://pear.php.net/package/PHP_CodeSniffer
Chris@0:  */
Chris@0: class PHP_CodeSniffer_Tokenizers_Comment
Chris@0: {
Chris@0: 
Chris@0: 
Chris@0:     /**
Chris@0:      * Creates an array of tokens when given some PHP code.
Chris@0:      *
Chris@0:      * Starts by using token_get_all() but does a lot of extra processing
Chris@0:      * to insert information about the context of the token.
Chris@0:      *
Chris@0:      * @param string $string   The string to tokenize.
Chris@0:      * @param string $eolChar  The EOL character to use for splitting strings.
Chris@0:      * @param int    $stackPtr The position of the first token in the file.
Chris@0:      *
Chris@0:      * @return array
Chris@0:      */
Chris@0:     public function tokenizeString($string, $eolChar, $stackPtr)
Chris@0:     {
Chris@0:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:             echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
Chris@0:         }
Chris@0: 
Chris@0:         $tokens   = array();
Chris@0:         $numChars = strlen($string);
Chris@0: 
Chris@0:         /*
Chris@0:             Doc block comments start with /*, but typically contain an
Chris@0:             extra star when they are used for function and class comments.
Chris@0:         */
Chris@0: 
Chris@0:         $char    = ($numChars - strlen(ltrim($string, '/*')));
Chris@0:         $openTag = substr($string, 0, $char);
Chris@0:         $string  = ltrim($string, '/*');
Chris@0: 
Chris@0:         $tokens[$stackPtr] = array(
Chris@0:                               'content'      => $openTag,
Chris@0:                               'code'         => T_DOC_COMMENT_OPEN_TAG,
Chris@0:                               'type'         => 'T_DOC_COMMENT_OPEN_TAG',
Chris@0:                               'comment_tags' => array(),
Chris@0:                              );
Chris@0: 
Chris@0:         $openPtr = $stackPtr;
Chris@0:         $stackPtr++;
Chris@0: 
Chris@0:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:             $content = PHP_CodeSniffer::prepareForOutput($openTag);
Chris@0:             echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
Chris@0:         }
Chris@0: 
Chris@0:         /*
Chris@0:             Strip off the close tag so it doesn't interfere with any
Chris@0:             of our comment line processing. The token will be added to the
Chris@0:             stack just before we return it.
Chris@0:         */
Chris@0: 
Chris@0:         $closeTag = array(
Chris@0:                      'content'        => substr($string, strlen(rtrim($string, '/*'))),
Chris@0:                      'code'           => T_DOC_COMMENT_CLOSE_TAG,
Chris@0:                      'type'           => 'T_DOC_COMMENT_CLOSE_TAG',
Chris@0:                      'comment_opener' => $openPtr,
Chris@0:                     );
Chris@0: 
Chris@0:         if ($closeTag['content'] === false) {
Chris@0:             $closeTag['content'] = '';
Chris@0:         }
Chris@0: 
Chris@0:         $string = rtrim($string, '/*');
Chris@0: 
Chris@0:         /*
Chris@0:             Process each line of the comment.
Chris@0:         */
Chris@0: 
Chris@0:         $lines    = explode($eolChar, $string);
Chris@0:         $numLines = count($lines);
Chris@0:         foreach ($lines as $lineNum => $string) {
Chris@0:             if ($lineNum !== ($numLines - 1)) {
Chris@0:                 $string .= $eolChar;
Chris@0:             }
Chris@0: 
Chris@0:             $char     = 0;
Chris@0:             $numChars = strlen($string);
Chris@0: 
Chris@0:             // We've started a new line, so process the indent.
Chris@0:             $space = $this->_collectWhitespace($string, $char, $numChars);
Chris@0:             if ($space !== null) {
Chris@0:                 $tokens[$stackPtr] = $space;
Chris@0:                 $stackPtr++;
Chris@0:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:                     $content = PHP_CodeSniffer::prepareForOutput($space['content']);
Chris@0:                     echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
Chris@0:                 }
Chris@0: 
Chris@0:                 $char += strlen($space['content']);
Chris@0:                 if ($char === $numChars) {
Chris@0:                     break;
Chris@0:                 }
Chris@0:             }
Chris@0: 
Chris@0:             if ($string === '') {
Chris@0:                 continue;
Chris@0:             }
Chris@0: 
Chris@0:             if ($string[$char] === '*') {
Chris@0:                 // This is a function or class doc block line.
Chris@0:                 $char++;
Chris@0:                 $tokens[$stackPtr] = array(
Chris@0:                                       'content' => '*',
Chris@0:                                       'code'    => T_DOC_COMMENT_STAR,
Chris@0:                                       'type'    => 'T_DOC_COMMENT_STAR',
Chris@0:                                      );
Chris@0: 
Chris@0:                 $stackPtr++;
Chris@0: 
Chris@0:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:                     echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
Chris@0:                 }
Chris@0:             }
Chris@0: 
Chris@0:             // Now we are ready to process the actual content of the line.
Chris@0:             $lineTokens = $this->_processLine($string, $eolChar, $char, $numChars);
Chris@0:             foreach ($lineTokens as $lineToken) {
Chris@0:                 $tokens[$stackPtr] = $lineToken;
Chris@0:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:                     $content = PHP_CodeSniffer::prepareForOutput($lineToken['content']);
Chris@0:                     $type    = $lineToken['type'];
Chris@0:                     echo "\t\tCreate comment token: $type => $content".PHP_EOL;
Chris@0:                 }
Chris@0: 
Chris@0:                 if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
Chris@0:                     $tokens[$openPtr]['comment_tags'][] = $stackPtr;
Chris@0:                 }
Chris@0: 
Chris@0:                 $stackPtr++;
Chris@0:             }
Chris@0:         }//end foreach
Chris@0: 
Chris@0:         $tokens[$stackPtr] = $closeTag;
Chris@0:         $tokens[$openPtr]['comment_closer'] = $stackPtr;
Chris@0:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:             $content = PHP_CodeSniffer::prepareForOutput($closeTag['content']);
Chris@0:             echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
Chris@0:         }
Chris@0: 
Chris@0:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@0:             echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
Chris@0:         }
Chris@0: 
Chris@0:         return $tokens;
Chris@0: 
Chris@0:     }//end tokenizeString()
Chris@0: 
Chris@0: 
Chris@0:     /**
Chris@0:      * Process a single line of a comment.
Chris@0:      *
Chris@0:      * @param string $string  The comment string being tokenized.
Chris@0:      * @param string $eolChar The EOL character to use for splitting strings.
Chris@0:      * @param int    $start   The position in the string to start processing.
Chris@0:      * @param int    $end     The position in the string to end processing.
Chris@0:      *
Chris@0:      * @return array
Chris@0:      */
Chris@0:     private function _processLine($string, $eolChar, $start, $end)
Chris@0:     {
Chris@0:         $tokens = array();
Chris@0: 
Chris@0:         // Collect content padding.
Chris@0:         $space = $this->_collectWhitespace($string, $start, $end);
Chris@0:         if ($space !== null) {
Chris@0:             $tokens[] = $space;
Chris@0:             $start   += strlen($space['content']);
Chris@0:         }
Chris@0: 
Chris@0:         if (isset($string[$start]) === false) {
Chris@0:             return $tokens;
Chris@0:         }
Chris@0: 
Chris@0:         if ($string[$start] === '@') {
Chris@0:             // The content up until the first whitespace is the tag name.
Chris@0:             $matches = array();
Chris@0:             preg_match('/@[^\s]+/', $string, $matches, 0, $start);
Chris@0:             if (isset($matches[0]) === true) {
Chris@0:                 $tagName  = $matches[0];
Chris@0:                 $start   += strlen($tagName);
Chris@0:                 $tokens[] = array(
Chris@0:                              'content' => $tagName,
Chris@0:                              'code'    => T_DOC_COMMENT_TAG,
Chris@0:                              'type'    => 'T_DOC_COMMENT_TAG',
Chris@0:                             );
Chris@0: 
Chris@0:                 // Then there will be some whitespace.
Chris@0:                 $space = $this->_collectWhitespace($string, $start, $end);
Chris@0:                 if ($space !== null) {
Chris@0:                     $tokens[] = $space;
Chris@0:                     $start   += strlen($space['content']);
Chris@0:                 }
Chris@0:             }
Chris@0:         }//end if
Chris@0: 
Chris@0:         // Process the rest of the line.
Chris@0:         $eol = strpos($string, $eolChar, $start);
Chris@0:         if ($eol === false) {
Chris@0:             $eol = $end;
Chris@0:         }
Chris@0: 
Chris@0:         if ($eol > $start) {
Chris@0:             $tokens[] = array(
Chris@0:                          'content' => substr($string, $start, ($eol - $start)),
Chris@0:                          'code'    => T_DOC_COMMENT_STRING,
Chris@0:                          'type'    => 'T_DOC_COMMENT_STRING',
Chris@0:                         );
Chris@0:         }
Chris@0: 
Chris@0:         if ($eol !== $end) {
Chris@0:             $tokens[] = array(
Chris@0:                          'content' => substr($string, $eol, strlen($eolChar)),
Chris@0:                          'code'    => T_DOC_COMMENT_WHITESPACE,
Chris@0:                          'type'    => 'T_DOC_COMMENT_WHITESPACE',
Chris@0:                         );
Chris@0:         }
Chris@0: 
Chris@0:         return $tokens;
Chris@0: 
Chris@0:     }//end _processLine()
Chris@0: 
Chris@0: 
Chris@0:     /**
Chris@0:      * Collect consecutive whitespace into a single token.
Chris@0:      *
Chris@0:      * @param string $string The comment string being tokenized.
Chris@0:      * @param int    $start  The position in the string to start processing.
Chris@0:      * @param int    $end    The position in the string to end processing.
Chris@0:      *
Chris@0:      * @return array|null
Chris@0:      */
Chris@0:     private function _collectWhitespace($string, $start, $end)
Chris@0:     {
Chris@0:         $space = '';
Chris@0:         for ($start; $start < $end; $start++) {
Chris@0:             if ($string[$start] !== ' ' && $string[$start] !== "\t") {
Chris@0:                 break;
Chris@0:             }
Chris@0: 
Chris@0:             $space .= $string[$start];
Chris@0:         }
Chris@0: 
Chris@0:         if ($space === '') {
Chris@0:             return null;
Chris@0:         }
Chris@0: 
Chris@0:         $token = array(
Chris@0:                   'content' => $space,
Chris@0:                   'code'    => T_DOC_COMMENT_WHITESPACE,
Chris@0:                   'type'    => 'T_DOC_COMMENT_WHITESPACE',
Chris@0:                  );
Chris@0: 
Chris@0:         return $token;
Chris@0: 
Chris@0:     }//end _collectWhitespace()
Chris@0: 
Chris@0: 
Chris@0: }//end class