Chris@12: <?php
Chris@12: /**
Chris@12:  * This file is part of phpDocumentor.
Chris@12:  *
Chris@12:  * For the full copyright and license information, please view the LICENSE
Chris@12:  * file that was distributed with this source code.
Chris@12:  *
Chris@12:  * @copyright 2010-2015 Mike van Riel<mike@phpdoc.org>
Chris@12:  * @license   http://www.opensource.org/licenses/mit-license.php MIT
Chris@12:  * @link      http://phpdoc.org
Chris@12:  */
Chris@12: 
Chris@12: namespace phpDocumentor\Reflection\DocBlock;
Chris@12: 
Chris@12: use phpDocumentor\Reflection\Types\Context as TypeContext;
Chris@12: 
Chris@12: /**
Chris@12:  * Creates a new Description object given a body of text.
Chris@12:  *
Chris@12:  * Descriptions in phpDocumentor are somewhat complex entities as they can contain one or more tags inside their
Chris@12:  * body that can be replaced with a readable output. The replacing is done by passing a Formatter object to the
Chris@12:  * Description object's `render` method.
Chris@12:  *
Chris@12:  * In addition to the above does a Description support two types of escape sequences:
Chris@12:  *
Chris@12:  * 1. `{@}` to escape the `@` character to prevent it from being interpreted as part of a tag, i.e. `{{@}link}`
Chris@12:  * 2. `{}` to escape the `}` character, this can be used if you want to use the `}` character in the description
Chris@12:  *    of an inline tag.
Chris@12:  *
Chris@12:  * If a body consists of multiple lines then this factory will also remove any superfluous whitespace at the beginning
Chris@12:  * of each line while maintaining any indentation that is used. This will prevent formatting parsers from tripping
Chris@12:  * over unexpected spaces as can be observed with tag descriptions.
Chris@12:  */
Chris@12: class DescriptionFactory
Chris@12: {
Chris@12:     /** @var TagFactory */
Chris@12:     private $tagFactory;
Chris@12: 
Chris@12:     /**
Chris@12:      * Initializes this factory with the means to construct (inline) tags.
Chris@12:      *
Chris@12:      * @param TagFactory $tagFactory
Chris@12:      */
Chris@12:     public function __construct(TagFactory $tagFactory)
Chris@12:     {
Chris@12:         $this->tagFactory = $tagFactory;
Chris@12:     }
Chris@12: 
Chris@12:     /**
Chris@12:      * Returns the parsed text of this description.
Chris@12:      *
Chris@12:      * @param string $contents
Chris@12:      * @param TypeContext $context
Chris@12:      *
Chris@12:      * @return Description
Chris@12:      */
Chris@12:     public function create($contents, TypeContext $context = null)
Chris@12:     {
Chris@12:         list($text, $tags) = $this->parse($this->lex($contents), $context);
Chris@12: 
Chris@12:         return new Description($text, $tags);
Chris@12:     }
Chris@12: 
Chris@12:     /**
Chris@12:      * Strips the contents from superfluous whitespace and splits the description into a series of tokens.
Chris@12:      *
Chris@12:      * @param string $contents
Chris@12:      *
Chris@12:      * @return string[] A series of tokens of which the description text is composed.
Chris@12:      */
Chris@12:     private function lex($contents)
Chris@12:     {
Chris@12:         $contents = $this->removeSuperfluousStartingWhitespace($contents);
Chris@12: 
Chris@12:         // performance optimalization; if there is no inline tag, don't bother splitting it up.
Chris@12:         if (strpos($contents, '{@') === false) {
Chris@12:             return [$contents];
Chris@12:         }
Chris@12: 
Chris@12:         return preg_split(
Chris@12:             '/\{
Chris@12:                 # "{@}" is not a valid inline tag. This ensures that we do not treat it as one, but treat it literally.
Chris@12:                 (?!@\})
Chris@12:                 # We want to capture the whole tag line, but without the inline tag delimiters.
Chris@12:                 (\@
Chris@12:                     # Match everything up to the next delimiter.
Chris@12:                     [^{}]*
Chris@12:                     # Nested inline tag content should not be captured, or it will appear in the result separately.
Chris@12:                     (?:
Chris@12:                         # Match nested inline tags.
Chris@12:                         (?:
Chris@12:                             # Because we did not catch the tag delimiters earlier, we must be explicit with them here.
Chris@12:                             # Notice that this also matches "{}", as a way to later introduce it as an escape sequence.
Chris@12:                             \{(?1)?\}
Chris@12:                             |
Chris@12:                             # Make sure we match hanging "{".
Chris@12:                             \{
Chris@12:                         )
Chris@12:                         # Match content after the nested inline tag.
Chris@12:                         [^{}]*
Chris@12:                     )* # If there are more inline tags, match them as well. We use "*" since there may not be any
Chris@12:                        # nested inline tags.
Chris@12:                 )
Chris@12:             \}/Sux',
Chris@12:             $contents,
Chris@12:             null,
Chris@12:             PREG_SPLIT_DELIM_CAPTURE
Chris@12:         );
Chris@12:     }
Chris@12: 
Chris@12:     /**
Chris@12:      * Parses the stream of tokens in to a new set of tokens containing Tags.
Chris@12:      *
Chris@12:      * @param string[] $tokens
Chris@12:      * @param TypeContext $context
Chris@12:      *
Chris@12:      * @return string[]|Tag[]
Chris@12:      */
Chris@12:     private function parse($tokens, TypeContext $context)
Chris@12:     {
Chris@12:         $count = count($tokens);
Chris@12:         $tagCount = 0;
Chris@12:         $tags  = [];
Chris@12: 
Chris@12:         for ($i = 1; $i < $count; $i += 2) {
Chris@12:             $tags[] = $this->tagFactory->create($tokens[$i], $context);
Chris@12:             $tokens[$i] = '%' . ++$tagCount . '$s';
Chris@12:         }
Chris@12: 
Chris@12:         //In order to allow "literal" inline tags, the otherwise invalid
Chris@12:         //sequence "{@}" is changed to "@", and "{}" is changed to "}".
Chris@12:         //"%" is escaped to "%%" because of vsprintf.
Chris@12:         //See unit tests for examples.
Chris@12:         for ($i = 0; $i < $count; $i += 2) {
Chris@12:             $tokens[$i] = str_replace(['{@}', '{}', '%'], ['@', '}', '%%'], $tokens[$i]);
Chris@12:         }
Chris@12: 
Chris@12:         return [implode('', $tokens), $tags];
Chris@12:     }
Chris@12: 
Chris@12:     /**
Chris@12:      * Removes the superfluous from a multi-line description.
Chris@12:      *
Chris@12:      * When a description has more than one line then it can happen that the second and subsequent lines have an
Chris@12:      * additional indentation. This is commonly in use with tags like this:
Chris@12:      *
Chris@12:      *     {@}since 1.1.0 This is an example
Chris@12:      *         description where we have an
Chris@12:      *         indentation in the second and
Chris@12:      *         subsequent lines.
Chris@12:      *
Chris@12:      * If we do not normalize the indentation then we have superfluous whitespace on the second and subsequent
Chris@12:      * lines and this may cause rendering issues when, for example, using a Markdown converter.
Chris@12:      *
Chris@12:      * @param string $contents
Chris@12:      *
Chris@12:      * @return string
Chris@12:      */
Chris@12:     private function removeSuperfluousStartingWhitespace($contents)
Chris@12:     {
Chris@12:         $lines = explode("\n", $contents);
Chris@12: 
Chris@12:         // if there is only one line then we don't have lines with superfluous whitespace and
Chris@12:         // can use the contents as-is
Chris@12:         if (count($lines) <= 1) {
Chris@12:             return $contents;
Chris@12:         }
Chris@12: 
Chris@12:         // determine how many whitespace characters need to be stripped
Chris@12:         $startingSpaceCount = 9999999;
Chris@12:         for ($i = 1; $i < count($lines); $i++) {
Chris@12:             // lines with a no length do not count as they are not indented at all
Chris@12:             if (strlen(trim($lines[$i])) === 0) {
Chris@12:                 continue;
Chris@12:             }
Chris@12: 
Chris@12:             // determine the number of prefixing spaces by checking the difference in line length before and after
Chris@12:             // an ltrim
Chris@12:             $startingSpaceCount = min($startingSpaceCount, strlen($lines[$i]) - strlen(ltrim($lines[$i])));
Chris@12:         }
Chris@12: 
Chris@12:         // strip the number of spaces from each line
Chris@12:         if ($startingSpaceCount > 0) {
Chris@12:             for ($i = 1; $i < count($lines); $i++) {
Chris@12:                 $lines[$i] = substr($lines[$i], $startingSpaceCount);
Chris@12:             }
Chris@12:         }
Chris@12: 
Chris@12:         return implode("\n", $lines);
Chris@12:     }
Chris@12: }