Chris@0: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace Symfony\Component\Routing; Chris@0: Chris@0: /** Chris@0: * RouteCompiler compiles Route instances to CompiledRoute instances. Chris@0: * Chris@0: * @author Fabien Potencier Chris@0: * @author Tobias Schultze Chris@0: */ Chris@0: class RouteCompiler implements RouteCompilerInterface Chris@0: { Chris@0: const REGEX_DELIMITER = '#'; Chris@0: Chris@0: /** Chris@0: * This string defines the characters that are automatically considered separators in front of Chris@0: * optional placeholders (with default and no static text following). Such a single separator Chris@0: * can be left out together with the optional placeholder from matching and generating URLs. Chris@0: */ Chris@0: const SEPARATORS = '/,;.:-_~+*=@|'; Chris@0: Chris@0: /** Chris@0: * The maximum supported length of a PCRE subpattern name Chris@0: * http://pcre.org/current/doc/html/pcre2pattern.html#SEC16. Chris@0: * Chris@0: * @internal Chris@0: */ Chris@0: const VARIABLE_MAXIMUM_LENGTH = 32; Chris@0: Chris@0: /** Chris@0: * {@inheritdoc} Chris@0: * Chris@14: * @throws \InvalidArgumentException if a path variable is named _fragment Chris@14: * @throws \LogicException if a variable is referenced more than once Chris@14: * @throws \DomainException if a variable name starts with a digit or if it is too long to be successfully used as Chris@14: * a PCRE subpattern Chris@0: */ Chris@0: public static function compile(Route $route) Chris@0: { Chris@17: $hostVariables = []; Chris@17: $variables = []; Chris@0: $hostRegex = null; Chris@17: $hostTokens = []; Chris@0: Chris@0: if ('' !== $host = $route->getHost()) { Chris@0: $result = self::compilePattern($route, $host, true); Chris@0: Chris@0: $hostVariables = $result['variables']; Chris@0: $variables = $hostVariables; Chris@0: Chris@0: $hostTokens = $result['tokens']; Chris@0: $hostRegex = $result['regex']; Chris@0: } Chris@0: Chris@0: $path = $route->getPath(); Chris@0: Chris@0: $result = self::compilePattern($route, $path, false); Chris@0: Chris@0: $staticPrefix = $result['staticPrefix']; Chris@0: Chris@0: $pathVariables = $result['variables']; Chris@0: Chris@0: foreach ($pathVariables as $pathParam) { Chris@0: if ('_fragment' === $pathParam) { Chris@0: throw new \InvalidArgumentException(sprintf('Route pattern "%s" cannot contain "_fragment" as a path parameter.', $route->getPath())); Chris@0: } Chris@0: } Chris@0: Chris@0: $variables = array_merge($variables, $pathVariables); Chris@0: Chris@0: $tokens = $result['tokens']; Chris@0: $regex = $result['regex']; Chris@0: Chris@0: return new CompiledRoute( Chris@0: $staticPrefix, Chris@0: $regex, Chris@0: $tokens, Chris@0: $pathVariables, Chris@0: $hostRegex, Chris@0: $hostTokens, Chris@0: $hostVariables, Chris@0: array_unique($variables) Chris@0: ); Chris@0: } Chris@0: Chris@0: private static function compilePattern(Route $route, $pattern, $isHost) Chris@0: { Chris@17: $tokens = []; Chris@17: $variables = []; Chris@17: $matches = []; Chris@0: $pos = 0; Chris@0: $defaultSeparator = $isHost ? '.' : '/'; Chris@0: $useUtf8 = preg_match('//u', $pattern); Chris@0: $needsUtf8 = $route->getOption('utf8'); Chris@0: Chris@0: if (!$needsUtf8 && $useUtf8 && preg_match('/[\x80-\xFF]/', $pattern)) { Chris@0: $needsUtf8 = true; Chris@0: @trigger_error(sprintf('Using UTF-8 route patterns without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s".', $pattern), E_USER_DEPRECATED); Chris@0: } Chris@0: if (!$useUtf8 && $needsUtf8) { Chris@0: throw new \LogicException(sprintf('Cannot mix UTF-8 requirements with non-UTF-8 pattern "%s".', $pattern)); Chris@0: } Chris@0: Chris@0: // Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable Chris@0: // in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself. Chris@0: preg_match_all('#\{\w+\}#', $pattern, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER); Chris@0: foreach ($matches as $match) { Chris@0: $varName = substr($match[0][0], 1, -1); Chris@0: // get all static text preceding the current variable Chris@0: $precedingText = substr($pattern, $pos, $match[0][1] - $pos); Chris@17: $pos = $match[0][1] + \strlen($match[0][0]); Chris@0: Chris@17: if (!\strlen($precedingText)) { Chris@0: $precedingChar = ''; Chris@0: } elseif ($useUtf8) { Chris@0: preg_match('/.$/u', $precedingText, $precedingChar); Chris@0: $precedingChar = $precedingChar[0]; Chris@0: } else { Chris@0: $precedingChar = substr($precedingText, -1); Chris@0: } Chris@0: $isSeparator = '' !== $precedingChar && false !== strpos(static::SEPARATORS, $precedingChar); Chris@0: Chris@0: // A PCRE subpattern name must start with a non-digit. Also a PHP variable cannot start with a digit so the Chris@0: // variable would not be usable as a Controller action argument. Chris@0: if (preg_match('/^\d/', $varName)) { Chris@0: throw new \DomainException(sprintf('Variable name "%s" cannot start with a digit in route pattern "%s". Please use a different name.', $varName, $pattern)); Chris@0: } Chris@17: if (\in_array($varName, $variables)) { Chris@0: throw new \LogicException(sprintf('Route pattern "%s" cannot reference variable name "%s" more than once.', $pattern, $varName)); Chris@0: } Chris@0: Chris@17: if (\strlen($varName) > self::VARIABLE_MAXIMUM_LENGTH) { Chris@0: throw new \DomainException(sprintf('Variable name "%s" cannot be longer than %s characters in route pattern "%s". Please use a shorter name.', $varName, self::VARIABLE_MAXIMUM_LENGTH, $pattern)); Chris@0: } Chris@0: Chris@0: if ($isSeparator && $precedingText !== $precedingChar) { Chris@17: $tokens[] = ['text', substr($precedingText, 0, -\strlen($precedingChar))]; Chris@17: } elseif (!$isSeparator && \strlen($precedingText) > 0) { Chris@17: $tokens[] = ['text', $precedingText]; Chris@0: } Chris@0: Chris@0: $regexp = $route->getRequirement($varName); Chris@0: if (null === $regexp) { Chris@0: $followingPattern = (string) substr($pattern, $pos); Chris@0: // Find the next static character after the variable that functions as a separator. By default, this separator and '/' Chris@0: // are disallowed for the variable. This default requirement makes sure that optional variables can be matched at all Chris@0: // and that the generating-matching-combination of URLs unambiguous, i.e. the params used for generating the URL are Chris@17: // the same that will be matched. Example: new Route('/{page}.{_format}', ['_format' => 'html']) Chris@0: // If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything. Chris@0: // Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally Chris@0: // part of {_format} when generating the URL, e.g. _format = 'mobile.html'. Chris@0: $nextSeparator = self::findNextSeparator($followingPattern, $useUtf8); Chris@0: $regexp = sprintf( Chris@0: '[^%s%s]+', Chris@0: preg_quote($defaultSeparator, self::REGEX_DELIMITER), Chris@0: $defaultSeparator !== $nextSeparator && '' !== $nextSeparator ? preg_quote($nextSeparator, self::REGEX_DELIMITER) : '' Chris@0: ); Chris@0: if (('' !== $nextSeparator && !preg_match('#^\{\w+\}#', $followingPattern)) || '' === $followingPattern) { Chris@0: // When we have a separator, which is disallowed for the variable, we can optimize the regex with a possessive Chris@0: // quantifier. This prevents useless backtracking of PCRE and improves performance by 20% for matching those patterns. Chris@0: // Given the above example, there is no point in backtracking into {page} (that forbids the dot) when a dot must follow Chris@0: // after it. This optimization cannot be applied when the next char is no real separator or when the next variable is Chris@0: // directly adjacent, e.g. '/{x}{y}'. Chris@0: $regexp .= '+'; Chris@0: } Chris@0: } else { Chris@0: if (!preg_match('//u', $regexp)) { Chris@0: $useUtf8 = false; Chris@0: } elseif (!$needsUtf8 && preg_match('/[\x80-\xFF]|(?= 0; --$i) { Chris@0: $token = $tokens[$i]; Chris@0: if ('variable' === $token[0] && $route->hasDefault($token[3])) { Chris@0: $firstOptional = $i; Chris@0: } else { Chris@0: break; Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: // compute the matching regexp Chris@0: $regexp = ''; Chris@17: for ($i = 0, $nbToken = \count($tokens); $i < $nbToken; ++$i) { Chris@0: $regexp .= self::computeRegexp($tokens, $i, $firstOptional); Chris@0: } Chris@14: $regexp = self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'sD'.($isHost ? 'i' : ''); Chris@0: Chris@0: // enable Utf8 matching if really required Chris@0: if ($needsUtf8) { Chris@0: $regexp .= 'u'; Chris@17: for ($i = 0, $nbToken = \count($tokens); $i < $nbToken; ++$i) { Chris@0: if ('variable' === $tokens[$i][0]) { Chris@0: $tokens[$i][] = true; Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@17: return [ Chris@14: 'staticPrefix' => self::determineStaticPrefix($route, $tokens), Chris@0: 'regex' => $regexp, Chris@0: 'tokens' => array_reverse($tokens), Chris@0: 'variables' => $variables, Chris@17: ]; Chris@0: } Chris@0: Chris@0: /** Chris@14: * Determines the longest static prefix possible for a route. Chris@14: * Chris@14: * @return string The leading static part of a route's path Chris@14: */ Chris@14: private static function determineStaticPrefix(Route $route, array $tokens) Chris@14: { Chris@14: if ('text' !== $tokens[0][0]) { Chris@14: return ($route->hasDefault($tokens[0][3]) || '/' === $tokens[0][1]) ? '' : $tokens[0][1]; Chris@14: } Chris@14: Chris@14: $prefix = $tokens[0][1]; Chris@14: Chris@14: if (isset($tokens[1][1]) && '/' !== $tokens[1][1] && false === $route->hasDefault($tokens[1][3])) { Chris@14: $prefix .= $tokens[1][1]; Chris@14: } Chris@14: Chris@14: return $prefix; Chris@14: } Chris@14: Chris@14: /** Chris@0: * Returns the next static character in the Route pattern that will serve as a separator. Chris@0: * Chris@0: * @param string $pattern The route pattern Chris@0: * @param bool $useUtf8 Whether the character is encoded in UTF-8 or not Chris@0: * Chris@0: * @return string The next static character that functions as separator (or empty string when none available) Chris@0: */ Chris@0: private static function findNextSeparator($pattern, $useUtf8) Chris@0: { Chris@0: if ('' == $pattern) { Chris@0: // return empty string if pattern is empty or false (false which can be returned by substr) Chris@0: return ''; Chris@0: } Chris@0: // first remove all placeholders from the pattern so we can find the next real static character Chris@0: if ('' === $pattern = preg_replace('#\{\w+\}#', '', $pattern)) { Chris@0: return ''; Chris@0: } Chris@0: if ($useUtf8) { Chris@0: preg_match('/^./u', $pattern, $pattern); Chris@0: } Chris@0: Chris@0: return false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : ''; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Computes the regexp used to match a specific token. It can be static text or a subpattern. Chris@0: * Chris@0: * @param array $tokens The route tokens Chris@0: * @param int $index The index of the current token Chris@0: * @param int $firstOptional The index of the first optional token Chris@0: * Chris@0: * @return string The regexp pattern for a single token Chris@0: */ Chris@0: private static function computeRegexp(array $tokens, $index, $firstOptional) Chris@0: { Chris@0: $token = $tokens[$index]; Chris@0: if ('text' === $token[0]) { Chris@0: // Text tokens Chris@0: return preg_quote($token[1], self::REGEX_DELIMITER); Chris@0: } else { Chris@0: // Variable tokens Chris@0: if (0 === $index && 0 === $firstOptional) { Chris@0: // When the only token is an optional variable token, the separator is required Chris@0: return sprintf('%s(?P<%s>%s)?', preg_quote($token[1], self::REGEX_DELIMITER), $token[3], $token[2]); Chris@0: } else { Chris@0: $regexp = sprintf('%s(?P<%s>%s)', preg_quote($token[1], self::REGEX_DELIMITER), $token[3], $token[2]); Chris@0: if ($index >= $firstOptional) { Chris@0: // Enclose each optional token in a subpattern to make it optional. Chris@0: // "?:" means it is non-capturing, i.e. the portion of the subject string that Chris@0: // matched the optional subpattern is not passed back. Chris@0: $regexp = "(?:$regexp"; Chris@17: $nbTokens = \count($tokens); Chris@0: if ($nbTokens - 1 == $index) { Chris@0: // Close the optional subpatterns Chris@0: $regexp .= str_repeat(')?', $nbTokens - $firstOptional - (0 === $firstOptional ? 1 : 0)); Chris@0: } Chris@0: } Chris@0: Chris@0: return $regexp; Chris@0: } Chris@0: } Chris@0: } Chris@0: }