Mercurial > hg > cmmr2012-drupal-site
comparison vendor/symfony/dom-crawler/Crawler.php @ 4:a9cd425dd02b
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:11:55 +0000 |
parents | c75dbcec494b |
children | 12f9dff5fda9 |
comparison
equal
deleted
inserted
replaced
3:307d7a7fd348 | 4:a9cd425dd02b |
---|---|
28 private $defaultNamespacePrefix = 'default'; | 28 private $defaultNamespacePrefix = 'default'; |
29 | 29 |
30 /** | 30 /** |
31 * @var array A map of manually registered namespaces | 31 * @var array A map of manually registered namespaces |
32 */ | 32 */ |
33 private $namespaces = array(); | 33 private $namespaces = []; |
34 | 34 |
35 /** | 35 /** |
36 * @var string The base href value | 36 * @var string The base href value |
37 */ | 37 */ |
38 private $baseHref; | 38 private $baseHref; |
43 private $document; | 43 private $document; |
44 | 44 |
45 /** | 45 /** |
46 * @var \DOMElement[] | 46 * @var \DOMElement[] |
47 */ | 47 */ |
48 private $nodes = array(); | 48 private $nodes = []; |
49 | 49 |
50 /** | 50 /** |
51 * Whether the Crawler contains HTML or XML content (used when converting CSS to XPath). | 51 * Whether the Crawler contains HTML or XML content (used when converting CSS to XPath). |
52 * | 52 * |
53 * @var bool | 53 * @var bool |
90 /** | 90 /** |
91 * Removes all the nodes. | 91 * Removes all the nodes. |
92 */ | 92 */ |
93 public function clear() | 93 public function clear() |
94 { | 94 { |
95 $this->nodes = array(); | 95 $this->nodes = []; |
96 $this->document = null; | 96 $this->document = null; |
97 } | 97 } |
98 | 98 |
99 /** | 99 /** |
100 * Adds a node to the current list of nodes. | 100 * Adds a node to the current list of nodes. |
110 { | 110 { |
111 if ($node instanceof \DOMNodeList) { | 111 if ($node instanceof \DOMNodeList) { |
112 $this->addNodeList($node); | 112 $this->addNodeList($node); |
113 } elseif ($node instanceof \DOMNode) { | 113 } elseif ($node instanceof \DOMNode) { |
114 $this->addNode($node); | 114 $this->addNode($node); |
115 } elseif (is_array($node)) { | 115 } elseif (\is_array($node)) { |
116 $this->addNodes($node); | 116 $this->addNodes($node); |
117 } elseif (is_string($node)) { | 117 } elseif (\is_string($node)) { |
118 $this->addContent($node); | 118 $this->addContent($node); |
119 } elseif (null !== $node) { | 119 } elseif (null !== $node) { |
120 throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', is_object($node) ? get_class($node) : gettype($node))); | 120 throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', \is_object($node) ? \get_class($node) : \gettype($node))); |
121 } | 121 } |
122 } | 122 } |
123 | 123 |
124 /** | 124 /** |
125 * Adds HTML/XML content. | 125 * Adds HTML/XML content. |
127 * If the charset is not set via the content type, it is assumed to be UTF-8, | 127 * If the charset is not set via the content type, it is assumed to be UTF-8, |
128 * or ISO-8859-1 as a fallback, which is the default charset defined by the | 128 * or ISO-8859-1 as a fallback, which is the default charset defined by the |
129 * HTTP 1.1 specification. | 129 * HTTP 1.1 specification. |
130 * | 130 * |
131 * @param string $content A string to parse as HTML/XML | 131 * @param string $content A string to parse as HTML/XML |
132 * @param null|string $type The content type of the string | 132 * @param string|null $type The content type of the string |
133 */ | 133 */ |
134 public function addContent($content, $type = null) | 134 public function addContent($content, $type = null) |
135 { | 135 { |
136 if (empty($type)) { | 136 if (empty($type)) { |
137 $type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html'; | 137 $type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html'; |
206 libxml_use_internal_errors($internalErrors); | 206 libxml_use_internal_errors($internalErrors); |
207 libxml_disable_entity_loader($disableEntities); | 207 libxml_disable_entity_loader($disableEntities); |
208 | 208 |
209 $this->addDocument($dom); | 209 $this->addDocument($dom); |
210 | 210 |
211 $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href')); | 211 $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']); |
212 | 212 |
213 $baseHref = current($base); | 213 $baseHref = current($base); |
214 if (count($base) && !empty($baseHref)) { | 214 if (\count($base) && !empty($baseHref)) { |
215 if ($this->baseHref) { | 215 if ($this->baseHref) { |
216 $linkNode = $dom->createElement('a'); | 216 $linkNode = $dom->createElement('a'); |
217 $linkNode->setAttribute('href', $baseHref); | 217 $linkNode->setAttribute('href', $baseHref); |
218 $link = new Link($linkNode, $this->baseHref); | 218 $link = new Link($linkNode, $this->baseHref); |
219 $this->baseHref = $link->getUri(); | 219 $this->baseHref = $link->getUri(); |
320 if (null === $this->document) { | 320 if (null === $this->document) { |
321 $this->document = $node->ownerDocument; | 321 $this->document = $node->ownerDocument; |
322 } | 322 } |
323 | 323 |
324 // Don't add duplicate nodes in the Crawler | 324 // Don't add duplicate nodes in the Crawler |
325 if (in_array($node, $this->nodes, true)) { | 325 if (\in_array($node, $this->nodes, true)) { |
326 return; | 326 return; |
327 } | 327 } |
328 | 328 |
329 $this->nodes[] = $node; | 329 $this->nodes[] = $node; |
330 } | 330 } |
361 * | 361 * |
362 * @return array An array of values returned by the anonymous function | 362 * @return array An array of values returned by the anonymous function |
363 */ | 363 */ |
364 public function each(\Closure $closure) | 364 public function each(\Closure $closure) |
365 { | 365 { |
366 $data = array(); | 366 $data = []; |
367 foreach ($this->nodes as $i => $node) { | 367 foreach ($this->nodes as $i => $node) { |
368 $data[] = $closure($this->createSubCrawler($node), $i); | 368 $data[] = $closure($this->createSubCrawler($node), $i); |
369 } | 369 } |
370 | 370 |
371 return $data; | 371 return $data; |
379 * | 379 * |
380 * @return self | 380 * @return self |
381 */ | 381 */ |
382 public function slice($offset = 0, $length = null) | 382 public function slice($offset = 0, $length = null) |
383 { | 383 { |
384 return $this->createSubCrawler(array_slice($this->nodes, $offset, $length)); | 384 return $this->createSubCrawler(\array_slice($this->nodes, $offset, $length)); |
385 } | 385 } |
386 | 386 |
387 /** | 387 /** |
388 * Reduces the list of nodes by calling an anonymous function. | 388 * Reduces the list of nodes by calling an anonymous function. |
389 * | 389 * |
393 * | 393 * |
394 * @return self | 394 * @return self |
395 */ | 395 */ |
396 public function reduce(\Closure $closure) | 396 public function reduce(\Closure $closure) |
397 { | 397 { |
398 $nodes = array(); | 398 $nodes = []; |
399 foreach ($this->nodes as $i => $node) { | 399 foreach ($this->nodes as $i => $node) { |
400 if (false !== $closure($this->createSubCrawler($node), $i)) { | 400 if (false !== $closure($this->createSubCrawler($node), $i)) { |
401 $nodes[] = $node; | 401 $nodes[] = $node; |
402 } | 402 } |
403 } | 403 } |
420 * | 420 * |
421 * @return self | 421 * @return self |
422 */ | 422 */ |
423 public function last() | 423 public function last() |
424 { | 424 { |
425 return $this->eq(count($this->nodes) - 1); | 425 return $this->eq(\count($this->nodes) - 1); |
426 } | 426 } |
427 | 427 |
428 /** | 428 /** |
429 * Returns the siblings nodes of the current selection. | 429 * Returns the siblings nodes of the current selection. |
430 * | 430 * |
485 if (!$this->nodes) { | 485 if (!$this->nodes) { |
486 throw new \InvalidArgumentException('The current node list is empty.'); | 486 throw new \InvalidArgumentException('The current node list is empty.'); |
487 } | 487 } |
488 | 488 |
489 $node = $this->getNode(0); | 489 $node = $this->getNode(0); |
490 $nodes = array(); | 490 $nodes = []; |
491 | 491 |
492 while ($node = $node->parentNode) { | 492 while ($node = $node->parentNode) { |
493 if (XML_ELEMENT_NODE === $node->nodeType) { | 493 if (XML_ELEMENT_NODE === $node->nodeType) { |
494 $nodes[] = $node; | 494 $nodes[] = $node; |
495 } | 495 } |
511 throw new \InvalidArgumentException('The current node list is empty.'); | 511 throw new \InvalidArgumentException('The current node list is empty.'); |
512 } | 512 } |
513 | 513 |
514 $node = $this->getNode(0)->firstChild; | 514 $node = $this->getNode(0)->firstChild; |
515 | 515 |
516 return $this->createSubCrawler($node ? $this->sibling($node) : array()); | 516 return $this->createSubCrawler($node ? $this->sibling($node) : []); |
517 } | 517 } |
518 | 518 |
519 /** | 519 /** |
520 * Returns the attribute value of the first node of the list. | 520 * Returns the attribute value of the first node of the list. |
521 * | 521 * |
603 { | 603 { |
604 if (null === $this->document) { | 604 if (null === $this->document) { |
605 throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.'); | 605 throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.'); |
606 } | 606 } |
607 | 607 |
608 $data = array(); | 608 $data = []; |
609 $domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath)); | 609 $domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath)); |
610 | 610 |
611 foreach ($this->nodes as $node) { | 611 foreach ($this->nodes as $node) { |
612 $data[] = $domxpath->evaluate($xpath, $node); | 612 $data[] = $domxpath->evaluate($xpath, $node); |
613 } | 613 } |
624 * | 624 * |
625 * You can extract attributes or/and the node value (_text). | 625 * You can extract attributes or/and the node value (_text). |
626 * | 626 * |
627 * Example: | 627 * Example: |
628 * | 628 * |
629 * $crawler->filter('h1 a')->extract(array('_text', 'href')); | 629 * $crawler->filter('h1 a')->extract(['_text', 'href']); |
630 * | 630 * |
631 * @param array $attributes An array of attributes | 631 * @param array $attributes An array of attributes |
632 * | 632 * |
633 * @return array An array of extracted values | 633 * @return array An array of extracted values |
634 */ | 634 */ |
635 public function extract($attributes) | 635 public function extract($attributes) |
636 { | 636 { |
637 $attributes = (array) $attributes; | 637 $attributes = (array) $attributes; |
638 $count = count($attributes); | 638 $count = \count($attributes); |
639 | 639 |
640 $data = array(); | 640 $data = []; |
641 foreach ($this->nodes as $node) { | 641 foreach ($this->nodes as $node) { |
642 $elements = array(); | 642 $elements = []; |
643 foreach ($attributes as $attribute) { | 643 foreach ($attributes as $attribute) { |
644 if ('_text' === $attribute) { | 644 if ('_text' === $attribute) { |
645 $elements[] = $node->nodeValue; | 645 $elements[] = $node->nodeValue; |
646 } else { | 646 } else { |
647 $elements[] = $node->getAttribute($attribute); | 647 $elements[] = $node->getAttribute($attribute); |
763 } | 763 } |
764 | 764 |
765 $node = $this->getNode(0); | 765 $node = $this->getNode(0); |
766 | 766 |
767 if (!$node instanceof \DOMElement) { | 767 if (!$node instanceof \DOMElement) { |
768 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); | 768 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node))); |
769 } | 769 } |
770 | 770 |
771 return new Link($node, $this->baseHref, $method); | 771 return new Link($node, $this->baseHref, $method); |
772 } | 772 } |
773 | 773 |
778 * | 778 * |
779 * @throws \InvalidArgumentException If the current node list contains non-DOMElement instances | 779 * @throws \InvalidArgumentException If the current node list contains non-DOMElement instances |
780 */ | 780 */ |
781 public function links() | 781 public function links() |
782 { | 782 { |
783 $links = array(); | 783 $links = []; |
784 foreach ($this->nodes as $node) { | 784 foreach ($this->nodes as $node) { |
785 if (!$node instanceof \DOMElement) { | 785 if (!$node instanceof \DOMElement) { |
786 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node))); | 786 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node))); |
787 } | 787 } |
788 | 788 |
789 $links[] = new Link($node, $this->baseHref, 'get'); | 789 $links[] = new Link($node, $this->baseHref, 'get'); |
790 } | 790 } |
791 | 791 |
799 * | 799 * |
800 * @throws \InvalidArgumentException If the current node list is empty | 800 * @throws \InvalidArgumentException If the current node list is empty |
801 */ | 801 */ |
802 public function image() | 802 public function image() |
803 { | 803 { |
804 if (!count($this)) { | 804 if (!\count($this)) { |
805 throw new \InvalidArgumentException('The current node list is empty.'); | 805 throw new \InvalidArgumentException('The current node list is empty.'); |
806 } | 806 } |
807 | 807 |
808 $node = $this->getNode(0); | 808 $node = $this->getNode(0); |
809 | 809 |
810 if (!$node instanceof \DOMElement) { | 810 if (!$node instanceof \DOMElement) { |
811 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); | 811 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node))); |
812 } | 812 } |
813 | 813 |
814 return new Image($node, $this->baseHref); | 814 return new Image($node, $this->baseHref); |
815 } | 815 } |
816 | 816 |
819 * | 819 * |
820 * @return Image[] An array of Image instances | 820 * @return Image[] An array of Image instances |
821 */ | 821 */ |
822 public function images() | 822 public function images() |
823 { | 823 { |
824 $images = array(); | 824 $images = []; |
825 foreach ($this as $node) { | 825 foreach ($this as $node) { |
826 if (!$node instanceof \DOMElement) { | 826 if (!$node instanceof \DOMElement) { |
827 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node))); | 827 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node))); |
828 } | 828 } |
829 | 829 |
830 $images[] = new Image($node, $this->baseHref); | 830 $images[] = new Image($node, $this->baseHref); |
831 } | 831 } |
832 | 832 |
850 } | 850 } |
851 | 851 |
852 $node = $this->getNode(0); | 852 $node = $this->getNode(0); |
853 | 853 |
854 if (!$node instanceof \DOMElement) { | 854 if (!$node instanceof \DOMElement) { |
855 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); | 855 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node))); |
856 } | 856 } |
857 | 857 |
858 $form = new Form($node, $this->uri, $method, $this->baseHref); | 858 $form = new Form($node, $this->uri, $method, $this->baseHref); |
859 | 859 |
860 if (null !== $values) { | 860 if (null !== $values) { |
887 * Converts string for XPath expressions. | 887 * Converts string for XPath expressions. |
888 * | 888 * |
889 * Escaped characters are: quotes (") and apostrophe ('). | 889 * Escaped characters are: quotes (") and apostrophe ('). |
890 * | 890 * |
891 * Examples: | 891 * Examples: |
892 * <code> | 892 * |
893 * echo Crawler::xpathLiteral('foo " bar'); | 893 * echo Crawler::xpathLiteral('foo " bar'); |
894 * //prints 'foo " bar' | 894 * //prints 'foo " bar' |
895 * | 895 * |
896 * echo Crawler::xpathLiteral("foo ' bar"); | 896 * echo Crawler::xpathLiteral("foo ' bar"); |
897 * //prints "foo ' bar" | 897 * //prints "foo ' bar" |
898 * | 898 * |
899 * echo Crawler::xpathLiteral('a\'b"c'); | 899 * echo Crawler::xpathLiteral('a\'b"c'); |
900 * //prints concat('a', "'", 'b"c') | 900 * //prints concat('a', "'", 'b"c') |
901 * </code> | 901 * |
902 * | 902 * |
903 * @param string $s String to be escaped | 903 * @param string $s String to be escaped |
904 * | 904 * |
905 * @return string Converted string | 905 * @return string Converted string |
906 */ | 906 */ |
913 if (false === strpos($s, '"')) { | 913 if (false === strpos($s, '"')) { |
914 return sprintf('"%s"', $s); | 914 return sprintf('"%s"', $s); |
915 } | 915 } |
916 | 916 |
917 $string = $s; | 917 $string = $s; |
918 $parts = array(); | 918 $parts = []; |
919 while (true) { | 919 while (true) { |
920 if (false !== $pos = strpos($string, "'")) { | 920 if (false !== $pos = strpos($string, "'")) { |
921 $parts[] = sprintf("'%s'", substr($string, 0, $pos)); | 921 $parts[] = sprintf("'%s'", substr($string, 0, $pos)); |
922 $parts[] = "\"'\""; | 922 $parts[] = "\"'\""; |
923 $string = substr($string, $pos + 1); | 923 $string = substr($string, $pos + 1); |
963 * | 963 * |
964 * @return string | 964 * @return string |
965 */ | 965 */ |
966 private function relativize($xpath) | 966 private function relativize($xpath) |
967 { | 967 { |
968 $expressions = array(); | 968 $expressions = []; |
969 | 969 |
970 // An expression which will never match to replace expressions which cannot match in the crawler | 970 // An expression which will never match to replace expressions which cannot match in the crawler |
971 // We cannot simply drop | 971 // We cannot simply drop |
972 $nonMatchingExpression = 'a[name() = "b"]'; | 972 $nonMatchingExpression = 'a[name() = "b"]'; |
973 | 973 |
974 $xpathLen = strlen($xpath); | 974 $xpathLen = \strlen($xpath); |
975 $openedBrackets = 0; | 975 $openedBrackets = 0; |
976 $startPosition = strspn($xpath, " \t\n\r\0\x0B"); | 976 $startPosition = strspn($xpath, " \t\n\r\0\x0B"); |
977 | 977 |
978 for ($i = $startPosition; $i <= $xpathLen; ++$i) { | 978 for ($i = $startPosition; $i <= $xpathLen; ++$i) { |
979 $i += strcspn($xpath, '"\'[]|', $i); | 979 $i += strcspn($xpath, '"\'[]|', $i); |
1062 /** | 1062 /** |
1063 * @return int | 1063 * @return int |
1064 */ | 1064 */ |
1065 public function count() | 1065 public function count() |
1066 { | 1066 { |
1067 return count($this->nodes); | 1067 return \count($this->nodes); |
1068 } | 1068 } |
1069 | 1069 |
1070 /** | 1070 /** |
1071 * @return \ArrayIterator|\DOMElement[] | 1071 * @return \ArrayIterator|\DOMElement[] |
1072 */ | 1072 */ |
1081 * | 1081 * |
1082 * @return array | 1082 * @return array |
1083 */ | 1083 */ |
1084 protected function sibling($node, $siblingDir = 'nextSibling') | 1084 protected function sibling($node, $siblingDir = 'nextSibling') |
1085 { | 1085 { |
1086 $nodes = array(); | 1086 $nodes = []; |
1087 | 1087 |
1088 do { | 1088 do { |
1089 if ($node !== $this->getNode(0) && 1 === $node->nodeType) { | 1089 if ($node !== $this->getNode(0) && 1 === $node->nodeType) { |
1090 $nodes[] = $node; | 1090 $nodes[] = $node; |
1091 } | 1091 } |
1100 * | 1100 * |
1101 * @return \DOMXPath | 1101 * @return \DOMXPath |
1102 * | 1102 * |
1103 * @throws \InvalidArgumentException | 1103 * @throws \InvalidArgumentException |
1104 */ | 1104 */ |
1105 private function createDOMXPath(\DOMDocument $document, array $prefixes = array()) | 1105 private function createDOMXPath(\DOMDocument $document, array $prefixes = []) |
1106 { | 1106 { |
1107 $domxpath = new \DOMXPath($document); | 1107 $domxpath = new \DOMXPath($document); |
1108 | 1108 |
1109 foreach ($prefixes as $prefix) { | 1109 foreach ($prefixes as $prefix) { |
1110 $namespace = $this->discoverNamespace($domxpath, $prefix); | 1110 $namespace = $this->discoverNamespace($domxpath, $prefix); |
1147 { | 1147 { |
1148 if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*+):[^"\/:]/i', $xpath, $matches)) { | 1148 if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*+):[^"\/:]/i', $xpath, $matches)) { |
1149 return array_unique($matches['prefix']); | 1149 return array_unique($matches['prefix']); |
1150 } | 1150 } |
1151 | 1151 |
1152 return array(); | 1152 return []; |
1153 } | 1153 } |
1154 | 1154 |
1155 /** | 1155 /** |
1156 * Creates a crawler for some subnodes. | 1156 * Creates a crawler for some subnodes. |
1157 * | 1157 * |