Chris@0: Chris@0: * Chris@0: * For the full copyright and license information, please view the LICENSE Chris@0: * file that was distributed with this source code. Chris@0: */ Chris@0: Chris@0: namespace Symfony\Component\DomCrawler; Chris@0: Chris@0: /** Chris@0: * Any HTML element that can link to an URI. Chris@0: * Chris@0: * @author Fabien Potencier Chris@0: */ Chris@0: abstract class AbstractUriElement Chris@0: { Chris@0: /** Chris@0: * @var \DOMElement Chris@0: */ Chris@0: protected $node; Chris@0: Chris@0: /** Chris@0: * @var string The method to use for the element Chris@0: */ Chris@0: protected $method; Chris@0: Chris@0: /** Chris@0: * @var string The URI of the page where the element is embedded (or the base href) Chris@0: */ Chris@0: protected $currentUri; Chris@0: Chris@0: /** Chris@0: * @param \DOMElement $node A \DOMElement instance Chris@0: * @param string $currentUri The URI of the page where the link is embedded (or the base href) Chris@0: * @param string $method The method to use for the link (get by default) Chris@0: * Chris@0: * @throws \InvalidArgumentException if the node is not a link Chris@0: */ Chris@0: public function __construct(\DOMElement $node, $currentUri, $method = 'GET') Chris@0: { Chris@17: if (!\in_array(strtolower(substr($currentUri, 0, 4)), ['http', 'file'])) { Chris@0: throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri)); Chris@0: } Chris@0: Chris@0: $this->setNode($node); Chris@0: $this->method = $method ? strtoupper($method) : null; Chris@0: $this->currentUri = $currentUri; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the node associated with this link. Chris@0: * Chris@0: * @return \DOMElement A \DOMElement instance Chris@0: */ Chris@0: public function getNode() Chris@0: { Chris@0: return $this->node; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the method associated with this link. Chris@0: * Chris@0: * @return string The method Chris@0: */ Chris@0: public function getMethod() Chris@0: { Chris@0: return $this->method; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Gets the URI associated with this link. Chris@0: * Chris@0: * @return string The URI Chris@0: */ Chris@0: public function getUri() Chris@0: { Chris@0: $uri = trim($this->getRawUri()); Chris@0: Chris@0: // absolute URL? Chris@0: if (null !== parse_url($uri, PHP_URL_SCHEME)) { Chris@0: return $uri; Chris@0: } Chris@0: Chris@0: // empty URI Chris@0: if (!$uri) { Chris@0: return $this->currentUri; Chris@0: } Chris@0: Chris@0: // an anchor Chris@0: if ('#' === $uri[0]) { Chris@0: return $this->cleanupAnchor($this->currentUri).$uri; Chris@0: } Chris@0: Chris@0: $baseUri = $this->cleanupUri($this->currentUri); Chris@0: Chris@0: if ('?' === $uri[0]) { Chris@0: return $baseUri.$uri; Chris@0: } Chris@0: Chris@0: // absolute URL with relative schema Chris@0: if (0 === strpos($uri, '//')) { Chris@0: return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri; Chris@0: } Chris@0: Chris@0: $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri); Chris@0: Chris@0: // absolute path Chris@0: if ('/' === $uri[0]) { Chris@0: return $baseUri.$uri; Chris@0: } Chris@0: Chris@0: // relative path Chris@17: $path = parse_url(substr($this->currentUri, \strlen($baseUri)), PHP_URL_PATH); Chris@0: $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri); Chris@0: Chris@0: return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Returns raw URI data. Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: abstract protected function getRawUri(); Chris@0: Chris@0: /** Chris@0: * Returns the canonicalized URI path (see RFC 3986, section 5.2.4). Chris@0: * Chris@0: * @param string $path URI path Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: protected function canonicalizePath($path) Chris@0: { Chris@0: if ('' === $path || '/' === $path) { Chris@0: return $path; Chris@0: } Chris@0: Chris@0: if ('.' === substr($path, -1)) { Chris@0: $path .= '/'; Chris@0: } Chris@0: Chris@17: $output = []; Chris@0: Chris@0: foreach (explode('/', $path) as $segment) { Chris@0: if ('..' === $segment) { Chris@0: array_pop($output); Chris@0: } elseif ('.' !== $segment) { Chris@0: $output[] = $segment; Chris@0: } Chris@0: } Chris@0: Chris@0: return implode('/', $output); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Sets current \DOMElement instance. Chris@0: * Chris@0: * @param \DOMElement $node A \DOMElement instance Chris@0: * Chris@0: * @throws \LogicException If given node is not an anchor Chris@0: */ Chris@0: abstract protected function setNode(\DOMElement $node); Chris@0: Chris@0: /** Chris@0: * Removes the query string and the anchor from the given uri. Chris@0: * Chris@0: * @param string $uri The uri to clean Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: private function cleanupUri($uri) Chris@0: { Chris@0: return $this->cleanupQuery($this->cleanupAnchor($uri)); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Remove the query string from the uri. Chris@0: * Chris@0: * @param string $uri Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: private function cleanupQuery($uri) Chris@0: { Chris@0: if (false !== $pos = strpos($uri, '?')) { Chris@0: return substr($uri, 0, $pos); Chris@0: } Chris@0: Chris@0: return $uri; Chris@0: } Chris@0: Chris@0: /** Chris@0: * Remove the anchor from the uri. Chris@0: * Chris@0: * @param string $uri Chris@0: * Chris@0: * @return string Chris@0: */ Chris@0: private function cleanupAnchor($uri) Chris@0: { Chris@0: if (false !== $pos = strpos($uri, '#')) { Chris@0: return substr($uri, 0, $pos); Chris@0: } Chris@0: Chris@0: return $uri; Chris@0: } Chris@0: }