Chris@2: <?php
Chris@2: /**
Chris@2:  * This file is part of phpDocumentor.
Chris@2:  *
Chris@2:  * For the full copyright and license information, please view the LICENSE
Chris@2:  * file that was distributed with this source code.
Chris@2:  *
Chris@2:  * @copyright 2010-2015 Mike van Riel<mike@phpdoc.org>
Chris@2:  * @license   http://www.opensource.org/licenses/mit-license.php MIT
Chris@2:  * @link      http://phpdoc.org
Chris@2:  */
Chris@2: 
Chris@2: namespace phpDocumentor\Reflection\DocBlock;
Chris@2: 
Chris@2: use phpDocumentor\Reflection\Types\Context as TypeContext;
Chris@2: 
Chris@2: /**
Chris@2:  * Creates a new Description object given a body of text.
Chris@2:  *
Chris@2:  * Descriptions in phpDocumentor are somewhat complex entities as they can contain one or more tags inside their
Chris@2:  * body that can be replaced with a readable output. The replacing is done by passing a Formatter object to the
Chris@2:  * Description object's `render` method.
Chris@2:  *
Chris@2:  * In addition to the above does a Description support two types of escape sequences:
Chris@2:  *
Chris@2:  * 1. `{@}` to escape the `@` character to prevent it from being interpreted as part of a tag, i.e. `{{@}link}`
Chris@2:  * 2. `{}` to escape the `}` character, this can be used if you want to use the `}` character in the description
Chris@2:  *    of an inline tag.
Chris@2:  *
Chris@2:  * If a body consists of multiple lines then this factory will also remove any superfluous whitespace at the beginning
Chris@2:  * of each line while maintaining any indentation that is used. This will prevent formatting parsers from tripping
Chris@2:  * over unexpected spaces as can be observed with tag descriptions.
Chris@2:  */
Chris@2: class DescriptionFactory
Chris@2: {
Chris@2:     /** @var TagFactory */
Chris@2:     private $tagFactory;
Chris@2: 
Chris@2:     /**
Chris@2:      * Initializes this factory with the means to construct (inline) tags.
Chris@2:      *
Chris@2:      * @param TagFactory $tagFactory
Chris@2:      */
Chris@2:     public function __construct(TagFactory $tagFactory)
Chris@2:     {
Chris@2:         $this->tagFactory = $tagFactory;
Chris@2:     }
Chris@2: 
Chris@2:     /**
Chris@2:      * Returns the parsed text of this description.
Chris@2:      *
Chris@2:      * @param string $contents
Chris@2:      * @param TypeContext $context
Chris@2:      *
Chris@2:      * @return Description
Chris@2:      */
Chris@2:     public function create($contents, TypeContext $context = null)
Chris@2:     {
Chris@2:         list($text, $tags) = $this->parse($this->lex($contents), $context);
Chris@2: 
Chris@2:         return new Description($text, $tags);
Chris@2:     }
Chris@2: 
Chris@2:     /**
Chris@2:      * Strips the contents from superfluous whitespace and splits the description into a series of tokens.
Chris@2:      *
Chris@2:      * @param string $contents
Chris@2:      *
Chris@2:      * @return string[] A series of tokens of which the description text is composed.
Chris@2:      */
Chris@2:     private function lex($contents)
Chris@2:     {
Chris@2:         $contents = $this->removeSuperfluousStartingWhitespace($contents);
Chris@2: 
Chris@2:         // performance optimalization; if there is no inline tag, don't bother splitting it up.
Chris@2:         if (strpos($contents, '{@') === false) {
Chris@2:             return [$contents];
Chris@2:         }
Chris@2: 
Chris@2:         return preg_split(
Chris@2:             '/\{
Chris@2:                 # "{@}" is not a valid inline tag. This ensures that we do not treat it as one, but treat it literally.
Chris@2:                 (?!@\})
Chris@2:                 # We want to capture the whole tag line, but without the inline tag delimiters.
Chris@2:                 (\@
Chris@2:                     # Match everything up to the next delimiter.
Chris@2:                     [^{}]*
Chris@2:                     # Nested inline tag content should not be captured, or it will appear in the result separately.
Chris@2:                     (?:
Chris@2:                         # Match nested inline tags.
Chris@2:                         (?:
Chris@2:                             # Because we did not catch the tag delimiters earlier, we must be explicit with them here.
Chris@2:                             # Notice that this also matches "{}", as a way to later introduce it as an escape sequence.
Chris@2:                             \{(?1)?\}
Chris@2:                             |
Chris@2:                             # Make sure we match hanging "{".
Chris@2:                             \{
Chris@2:                         )
Chris@2:                         # Match content after the nested inline tag.
Chris@2:                         [^{}]*
Chris@2:                     )* # If there are more inline tags, match them as well. We use "*" since there may not be any
Chris@2:                        # nested inline tags.
Chris@2:                 )
Chris@2:             \}/Sux',
Chris@2:             $contents,
Chris@2:             null,
Chris@2:             PREG_SPLIT_DELIM_CAPTURE
Chris@2:         );
Chris@2:     }
Chris@2: 
Chris@2:     /**
Chris@2:      * Parses the stream of tokens in to a new set of tokens containing Tags.
Chris@2:      *
Chris@2:      * @param string[] $tokens
Chris@2:      * @param TypeContext $context
Chris@2:      *
Chris@2:      * @return string[]|Tag[]
Chris@2:      */
Chris@2:     private function parse($tokens, TypeContext $context)
Chris@2:     {
Chris@2:         $count = count($tokens);
Chris@2:         $tagCount = 0;
Chris@2:         $tags  = [];
Chris@2: 
Chris@2:         for ($i = 1; $i < $count; $i += 2) {
Chris@2:             $tags[] = $this->tagFactory->create($tokens[$i], $context);
Chris@2:             $tokens[$i] = '%' . ++$tagCount . '$s';
Chris@2:         }
Chris@2: 
Chris@2:         //In order to allow "literal" inline tags, the otherwise invalid
Chris@2:         //sequence "{@}" is changed to "@", and "{}" is changed to "}".
Chris@2:         //"%" is escaped to "%%" because of vsprintf.
Chris@2:         //See unit tests for examples.
Chris@2:         for ($i = 0; $i < $count; $i += 2) {
Chris@2:             $tokens[$i] = str_replace(['{@}', '{}', '%'], ['@', '}', '%%'], $tokens[$i]);
Chris@2:         }
Chris@2: 
Chris@2:         return [implode('', $tokens), $tags];
Chris@2:     }
Chris@2: 
Chris@2:     /**
Chris@2:      * Removes the superfluous from a multi-line description.
Chris@2:      *
Chris@2:      * When a description has more than one line then it can happen that the second and subsequent lines have an
Chris@2:      * additional indentation. This is commonly in use with tags like this:
Chris@2:      *
Chris@2:      *     {@}since 1.1.0 This is an example
Chris@2:      *         description where we have an
Chris@2:      *         indentation in the second and
Chris@2:      *         subsequent lines.
Chris@2:      *
Chris@2:      * If we do not normalize the indentation then we have superfluous whitespace on the second and subsequent
Chris@2:      * lines and this may cause rendering issues when, for example, using a Markdown converter.
Chris@2:      *
Chris@2:      * @param string $contents
Chris@2:      *
Chris@2:      * @return string
Chris@2:      */
Chris@2:     private function removeSuperfluousStartingWhitespace($contents)
Chris@2:     {
Chris@2:         $lines = explode("\n", $contents);
Chris@2: 
Chris@2:         // if there is only one line then we don't have lines with superfluous whitespace and
Chris@2:         // can use the contents as-is
Chris@2:         if (count($lines) <= 1) {
Chris@2:             return $contents;
Chris@2:         }
Chris@2: 
Chris@2:         // determine how many whitespace characters need to be stripped
Chris@2:         $startingSpaceCount = 9999999;
Chris@2:         for ($i = 1; $i < count($lines); $i++) {
Chris@2:             // lines with a no length do not count as they are not indented at all
Chris@2:             if (strlen(trim($lines[$i])) === 0) {
Chris@2:                 continue;
Chris@2:             }
Chris@2: 
Chris@2:             // determine the number of prefixing spaces by checking the difference in line length before and after
Chris@2:             // an ltrim
Chris@2:             $startingSpaceCount = min($startingSpaceCount, strlen($lines[$i]) - strlen(ltrim($lines[$i])));
Chris@2:         }
Chris@2: 
Chris@2:         // strip the number of spaces from each line
Chris@2:         if ($startingSpaceCount > 0) {
Chris@2:             for ($i = 1; $i < count($lines); $i++) {
Chris@2:                 $lines[$i] = substr($lines[$i], $startingSpaceCount);
Chris@2:             }
Chris@2:         }
Chris@2: 
Chris@2:         return implode("\n", $lines);
Chris@2:     }
Chris@2: }