Chris@17: <?php
Chris@17: /**
Chris@17:  * Tokenizes doc block comments.
Chris@17:  *
Chris@17:  * @author    Greg Sherwood <gsherwood@squiz.net>
Chris@17:  * @copyright 2006-2015 Squiz Pty Ltd (ABN 77 084 670 600)
Chris@17:  * @license   https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
Chris@17:  */
Chris@17: 
Chris@17: namespace PHP_CodeSniffer\Tokenizers;
Chris@17: 
Chris@17: use PHP_CodeSniffer\Util;
Chris@17: 
Chris@17: class Comment
Chris@17: {
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Creates an array of tokens when given some PHP code.
Chris@17:      *
Chris@17:      * Starts by using token_get_all() but does a lot of extra processing
Chris@17:      * to insert information about the context of the token.
Chris@17:      *
Chris@17:      * @param string $string   The string to tokenize.
Chris@17:      * @param string $eolChar  The EOL character to use for splitting strings.
Chris@17:      * @param int    $stackPtr The position of the first token in the file.
Chris@17:      *
Chris@17:      * @return array
Chris@17:      */
Chris@17:     public function tokenizeString($string, $eolChar, $stackPtr)
Chris@17:     {
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t\t*** START COMMENT TOKENIZING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         $tokens   = [];
Chris@17:         $numChars = strlen($string);
Chris@17: 
Chris@17:         /*
Chris@17:             Doc block comments start with /*, but typically contain an
Chris@17:             extra star when they are used for function and class comments.
Chris@17:         */
Chris@17: 
Chris@17:         $char    = ($numChars - strlen(ltrim($string, '/*')));
Chris@17:         $openTag = substr($string, 0, $char);
Chris@17:         $string  = ltrim($string, '/*');
Chris@17: 
Chris@17:         $tokens[$stackPtr] = [
Chris@17:             'content'      => $openTag,
Chris@17:             'code'         => T_DOC_COMMENT_OPEN_TAG,
Chris@17:             'type'         => 'T_DOC_COMMENT_OPEN_TAG',
Chris@17:             'comment_tags' => [],
Chris@17:         ];
Chris@17: 
Chris@17:         $openPtr = $stackPtr;
Chris@17:         $stackPtr++;
Chris@17: 
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             $content = Util\Common::prepareForOutput($openTag);
Chris@17:             echo "\t\tCreate comment token: T_DOC_COMMENT_OPEN_TAG => $content".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         /*
Chris@17:             Strip off the close tag so it doesn't interfere with any
Chris@17:             of our comment line processing. The token will be added to the
Chris@17:             stack just before we return it.
Chris@17:         */
Chris@17: 
Chris@17:         $closeTag = [
Chris@17:             'content'        => substr($string, strlen(rtrim($string, '/*'))),
Chris@17:             'code'           => T_DOC_COMMENT_CLOSE_TAG,
Chris@17:             'type'           => 'T_DOC_COMMENT_CLOSE_TAG',
Chris@17:             'comment_opener' => $openPtr,
Chris@17:         ];
Chris@17: 
Chris@17:         if ($closeTag['content'] === false) {
Chris@17:             $closeTag['content'] = '';
Chris@17:         }
Chris@17: 
Chris@17:         $string = rtrim($string, '/*');
Chris@17: 
Chris@17:         /*
Chris@17:             Process each line of the comment.
Chris@17:         */
Chris@17: 
Chris@17:         $lines    = explode($eolChar, $string);
Chris@17:         $numLines = count($lines);
Chris@17:         foreach ($lines as $lineNum => $string) {
Chris@17:             if ($lineNum !== ($numLines - 1)) {
Chris@17:                 $string .= $eolChar;
Chris@17:             }
Chris@17: 
Chris@17:             $char     = 0;
Chris@17:             $numChars = strlen($string);
Chris@17: 
Chris@17:             // We've started a new line, so process the indent.
Chris@17:             $space = $this->collectWhitespace($string, $char, $numChars);
Chris@17:             if ($space !== null) {
Chris@17:                 $tokens[$stackPtr] = $space;
Chris@17:                 $stackPtr++;
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     $content = Util\Common::prepareForOutput($space['content']);
Chris@17:                     echo "\t\tCreate comment token: T_DOC_COMMENT_WHITESPACE => $content".PHP_EOL;
Chris@17:                 }
Chris@17: 
Chris@17:                 $char += strlen($space['content']);
Chris@17:                 if ($char === $numChars) {
Chris@17:                     break;
Chris@17:                 }
Chris@17:             }
Chris@17: 
Chris@17:             if ($string === '') {
Chris@17:                 continue;
Chris@17:             }
Chris@17: 
Chris@17:             if ($lineNum > 0 && $string[$char] === '*') {
Chris@17:                 // This is a function or class doc block line.
Chris@17:                 $char++;
Chris@17:                 $tokens[$stackPtr] = [
Chris@17:                     'content' => '*',
Chris@17:                     'code'    => T_DOC_COMMENT_STAR,
Chris@17:                     'type'    => 'T_DOC_COMMENT_STAR',
Chris@17:                 ];
Chris@17: 
Chris@17:                 $stackPtr++;
Chris@17: 
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     echo "\t\tCreate comment token: T_DOC_COMMENT_STAR => *".PHP_EOL;
Chris@17:                 }
Chris@17:             }
Chris@17: 
Chris@17:             // Now we are ready to process the actual content of the line.
Chris@17:             $lineTokens = $this->processLine($string, $eolChar, $char, $numChars);
Chris@17:             foreach ($lineTokens as $lineToken) {
Chris@17:                 $tokens[$stackPtr] = $lineToken;
Chris@17:                 if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:                     $content = Util\Common::prepareForOutput($lineToken['content']);
Chris@17:                     $type    = $lineToken['type'];
Chris@17:                     echo "\t\tCreate comment token: $type => $content".PHP_EOL;
Chris@17:                 }
Chris@17: 
Chris@17:                 if ($lineToken['code'] === T_DOC_COMMENT_TAG) {
Chris@17:                     $tokens[$openPtr]['comment_tags'][] = $stackPtr;
Chris@17:                 }
Chris@17: 
Chris@17:                 $stackPtr++;
Chris@17:             }
Chris@17:         }//end foreach
Chris@17: 
Chris@17:         $tokens[$stackPtr] = $closeTag;
Chris@17:         $tokens[$openPtr]['comment_closer'] = $stackPtr;
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             $content = Util\Common::prepareForOutput($closeTag['content']);
Chris@17:             echo "\t\tCreate comment token: T_DOC_COMMENT_CLOSE_TAG => $content".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         if (PHP_CODESNIFFER_VERBOSITY > 1) {
Chris@17:             echo "\t\t*** END COMMENT TOKENIZING ***".PHP_EOL;
Chris@17:         }
Chris@17: 
Chris@17:         return $tokens;
Chris@17: 
Chris@17:     }//end tokenizeString()
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Process a single line of a comment.
Chris@17:      *
Chris@17:      * @param string $string  The comment string being tokenized.
Chris@17:      * @param string $eolChar The EOL character to use for splitting strings.
Chris@17:      * @param int    $start   The position in the string to start processing.
Chris@17:      * @param int    $end     The position in the string to end processing.
Chris@17:      *
Chris@17:      * @return array
Chris@17:      */
Chris@17:     private function processLine($string, $eolChar, $start, $end)
Chris@17:     {
Chris@17:         $tokens = [];
Chris@17: 
Chris@17:         // Collect content padding.
Chris@17:         $space = $this->collectWhitespace($string, $start, $end);
Chris@17:         if ($space !== null) {
Chris@17:             $tokens[] = $space;
Chris@17:             $start   += strlen($space['content']);
Chris@17:         }
Chris@17: 
Chris@17:         if (isset($string[$start]) === false) {
Chris@17:             return $tokens;
Chris@17:         }
Chris@17: 
Chris@17:         if ($string[$start] === '@') {
Chris@17:             // The content up until the first whitespace is the tag name.
Chris@17:             $matches = [];
Chris@17:             preg_match('/@[^\s]+/', $string, $matches, 0, $start);
Chris@17:             if (isset($matches[0]) === true
Chris@17:                 && substr(strtolower($matches[0]), 0, 7) !== '@phpcs:'
Chris@17:             ) {
Chris@17:                 $tagName  = $matches[0];
Chris@17:                 $start   += strlen($tagName);
Chris@17:                 $tokens[] = [
Chris@17:                     'content' => $tagName,
Chris@17:                     'code'    => T_DOC_COMMENT_TAG,
Chris@17:                     'type'    => 'T_DOC_COMMENT_TAG',
Chris@17:                 ];
Chris@17: 
Chris@17:                 // Then there will be some whitespace.
Chris@17:                 $space = $this->collectWhitespace($string, $start, $end);
Chris@17:                 if ($space !== null) {
Chris@17:                     $tokens[] = $space;
Chris@17:                     $start   += strlen($space['content']);
Chris@17:                 }
Chris@17:             }
Chris@17:         }//end if
Chris@17: 
Chris@17:         // Process the rest of the line.
Chris@17:         $eol = strpos($string, $eolChar, $start);
Chris@17:         if ($eol === false) {
Chris@17:             $eol = $end;
Chris@17:         }
Chris@17: 
Chris@17:         if ($eol > $start) {
Chris@17:             $tokens[] = [
Chris@17:                 'content' => substr($string, $start, ($eol - $start)),
Chris@17:                 'code'    => T_DOC_COMMENT_STRING,
Chris@17:                 'type'    => 'T_DOC_COMMENT_STRING',
Chris@17:             ];
Chris@17:         }
Chris@17: 
Chris@17:         if ($eol !== $end) {
Chris@17:             $tokens[] = [
Chris@17:                 'content' => substr($string, $eol, strlen($eolChar)),
Chris@17:                 'code'    => T_DOC_COMMENT_WHITESPACE,
Chris@17:                 'type'    => 'T_DOC_COMMENT_WHITESPACE',
Chris@17:             ];
Chris@17:         }
Chris@17: 
Chris@17:         return $tokens;
Chris@17: 
Chris@17:     }//end processLine()
Chris@17: 
Chris@17: 
Chris@17:     /**
Chris@17:      * Collect consecutive whitespace into a single token.
Chris@17:      *
Chris@17:      * @param string $string The comment string being tokenized.
Chris@17:      * @param int    $start  The position in the string to start processing.
Chris@17:      * @param int    $end    The position in the string to end processing.
Chris@17:      *
Chris@17:      * @return array|null
Chris@17:      */
Chris@17:     private function collectWhitespace($string, $start, $end)
Chris@17:     {
Chris@17:         $space = '';
Chris@17:         for ($start; $start < $end; $start++) {
Chris@17:             if ($string[$start] !== ' ' && $string[$start] !== "\t") {
Chris@17:                 break;
Chris@17:             }
Chris@17: 
Chris@17:             $space .= $string[$start];
Chris@17:         }
Chris@17: 
Chris@17:         if ($space === '') {
Chris@17:             return null;
Chris@17:         }
Chris@17: 
Chris@17:         $token = [
Chris@17:             'content' => $space,
Chris@17:             'code'    => T_DOC_COMMENT_WHITESPACE,
Chris@17:             'type'    => 'T_DOC_COMMENT_WHITESPACE',
Chris@17:         ];
Chris@17: 
Chris@17:         return $token;
Chris@17: 
Chris@17:     }//end collectWhitespace()
Chris@17: 
Chris@17: 
Chris@17: }//end class