comparison vendor/symfony/dom-crawler/Crawler.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 5fb285c0d0e3
children af1871eacc83
comparison
equal deleted inserted replaced
16:c2387f117808 17:129ea1e6d783
28 private $defaultNamespacePrefix = 'default'; 28 private $defaultNamespacePrefix = 'default';
29 29
30 /** 30 /**
31 * @var array A map of manually registered namespaces 31 * @var array A map of manually registered namespaces
32 */ 32 */
33 private $namespaces = array(); 33 private $namespaces = [];
34 34
35 /** 35 /**
36 * @var string The base href value 36 * @var string The base href value
37 */ 37 */
38 private $baseHref; 38 private $baseHref;
43 private $document; 43 private $document;
44 44
45 /** 45 /**
46 * @var \DOMElement[] 46 * @var \DOMElement[]
47 */ 47 */
48 private $nodes = array(); 48 private $nodes = [];
49 49
50 /** 50 /**
51 * Whether the Crawler contains HTML or XML content (used when converting CSS to XPath). 51 * Whether the Crawler contains HTML or XML content (used when converting CSS to XPath).
52 * 52 *
53 * @var bool 53 * @var bool
90 /** 90 /**
91 * Removes all the nodes. 91 * Removes all the nodes.
92 */ 92 */
93 public function clear() 93 public function clear()
94 { 94 {
95 $this->nodes = array(); 95 $this->nodes = [];
96 $this->document = null; 96 $this->document = null;
97 } 97 }
98 98
99 /** 99 /**
100 * Adds a node to the current list of nodes. 100 * Adds a node to the current list of nodes.
110 { 110 {
111 if ($node instanceof \DOMNodeList) { 111 if ($node instanceof \DOMNodeList) {
112 $this->addNodeList($node); 112 $this->addNodeList($node);
113 } elseif ($node instanceof \DOMNode) { 113 } elseif ($node instanceof \DOMNode) {
114 $this->addNode($node); 114 $this->addNode($node);
115 } elseif (is_array($node)) { 115 } elseif (\is_array($node)) {
116 $this->addNodes($node); 116 $this->addNodes($node);
117 } elseif (is_string($node)) { 117 } elseif (\is_string($node)) {
118 $this->addContent($node); 118 $this->addContent($node);
119 } elseif (null !== $node) { 119 } elseif (null !== $node) {
120 throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', is_object($node) ? get_class($node) : gettype($node))); 120 throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', \is_object($node) ? \get_class($node) : \gettype($node)));
121 } 121 }
122 } 122 }
123 123
124 /** 124 /**
125 * Adds HTML/XML content. 125 * Adds HTML/XML content.
127 * If the charset is not set via the content type, it is assumed to be UTF-8, 127 * If the charset is not set via the content type, it is assumed to be UTF-8,
128 * or ISO-8859-1 as a fallback, which is the default charset defined by the 128 * or ISO-8859-1 as a fallback, which is the default charset defined by the
129 * HTTP 1.1 specification. 129 * HTTP 1.1 specification.
130 * 130 *
131 * @param string $content A string to parse as HTML/XML 131 * @param string $content A string to parse as HTML/XML
132 * @param null|string $type The content type of the string 132 * @param string|null $type The content type of the string
133 */ 133 */
134 public function addContent($content, $type = null) 134 public function addContent($content, $type = null)
135 { 135 {
136 if (empty($type)) { 136 if (empty($type)) {
137 $type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html'; 137 $type = 0 === strpos($content, '<?xml') ? 'application/xml' : 'text/html';
206 libxml_use_internal_errors($internalErrors); 206 libxml_use_internal_errors($internalErrors);
207 libxml_disable_entity_loader($disableEntities); 207 libxml_disable_entity_loader($disableEntities);
208 208
209 $this->addDocument($dom); 209 $this->addDocument($dom);
210 210
211 $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href')); 211 $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']);
212 212
213 $baseHref = current($base); 213 $baseHref = current($base);
214 if (count($base) && !empty($baseHref)) { 214 if (\count($base) && !empty($baseHref)) {
215 if ($this->baseHref) { 215 if ($this->baseHref) {
216 $linkNode = $dom->createElement('a'); 216 $linkNode = $dom->createElement('a');
217 $linkNode->setAttribute('href', $baseHref); 217 $linkNode->setAttribute('href', $baseHref);
218 $link = new Link($linkNode, $this->baseHref); 218 $link = new Link($linkNode, $this->baseHref);
219 $this->baseHref = $link->getUri(); 219 $this->baseHref = $link->getUri();
320 if (null === $this->document) { 320 if (null === $this->document) {
321 $this->document = $node->ownerDocument; 321 $this->document = $node->ownerDocument;
322 } 322 }
323 323
324 // Don't add duplicate nodes in the Crawler 324 // Don't add duplicate nodes in the Crawler
325 if (in_array($node, $this->nodes, true)) { 325 if (\in_array($node, $this->nodes, true)) {
326 return; 326 return;
327 } 327 }
328 328
329 $this->nodes[] = $node; 329 $this->nodes[] = $node;
330 } 330 }
361 * 361 *
362 * @return array An array of values returned by the anonymous function 362 * @return array An array of values returned by the anonymous function
363 */ 363 */
364 public function each(\Closure $closure) 364 public function each(\Closure $closure)
365 { 365 {
366 $data = array(); 366 $data = [];
367 foreach ($this->nodes as $i => $node) { 367 foreach ($this->nodes as $i => $node) {
368 $data[] = $closure($this->createSubCrawler($node), $i); 368 $data[] = $closure($this->createSubCrawler($node), $i);
369 } 369 }
370 370
371 return $data; 371 return $data;
379 * 379 *
380 * @return self 380 * @return self
381 */ 381 */
382 public function slice($offset = 0, $length = null) 382 public function slice($offset = 0, $length = null)
383 { 383 {
384 return $this->createSubCrawler(array_slice($this->nodes, $offset, $length)); 384 return $this->createSubCrawler(\array_slice($this->nodes, $offset, $length));
385 } 385 }
386 386
387 /** 387 /**
388 * Reduces the list of nodes by calling an anonymous function. 388 * Reduces the list of nodes by calling an anonymous function.
389 * 389 *
393 * 393 *
394 * @return self 394 * @return self
395 */ 395 */
396 public function reduce(\Closure $closure) 396 public function reduce(\Closure $closure)
397 { 397 {
398 $nodes = array(); 398 $nodes = [];
399 foreach ($this->nodes as $i => $node) { 399 foreach ($this->nodes as $i => $node) {
400 if (false !== $closure($this->createSubCrawler($node), $i)) { 400 if (false !== $closure($this->createSubCrawler($node), $i)) {
401 $nodes[] = $node; 401 $nodes[] = $node;
402 } 402 }
403 } 403 }
420 * 420 *
421 * @return self 421 * @return self
422 */ 422 */
423 public function last() 423 public function last()
424 { 424 {
425 return $this->eq(count($this->nodes) - 1); 425 return $this->eq(\count($this->nodes) - 1);
426 } 426 }
427 427
428 /** 428 /**
429 * Returns the siblings nodes of the current selection. 429 * Returns the siblings nodes of the current selection.
430 * 430 *
485 if (!$this->nodes) { 485 if (!$this->nodes) {
486 throw new \InvalidArgumentException('The current node list is empty.'); 486 throw new \InvalidArgumentException('The current node list is empty.');
487 } 487 }
488 488
489 $node = $this->getNode(0); 489 $node = $this->getNode(0);
490 $nodes = array(); 490 $nodes = [];
491 491
492 while ($node = $node->parentNode) { 492 while ($node = $node->parentNode) {
493 if (XML_ELEMENT_NODE === $node->nodeType) { 493 if (XML_ELEMENT_NODE === $node->nodeType) {
494 $nodes[] = $node; 494 $nodes[] = $node;
495 } 495 }
511 throw new \InvalidArgumentException('The current node list is empty.'); 511 throw new \InvalidArgumentException('The current node list is empty.');
512 } 512 }
513 513
514 $node = $this->getNode(0)->firstChild; 514 $node = $this->getNode(0)->firstChild;
515 515
516 return $this->createSubCrawler($node ? $this->sibling($node) : array()); 516 return $this->createSubCrawler($node ? $this->sibling($node) : []);
517 } 517 }
518 518
519 /** 519 /**
520 * Returns the attribute value of the first node of the list. 520 * Returns the attribute value of the first node of the list.
521 * 521 *
603 { 603 {
604 if (null === $this->document) { 604 if (null === $this->document) {
605 throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.'); 605 throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.');
606 } 606 }
607 607
608 $data = array(); 608 $data = [];
609 $domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath)); 609 $domxpath = $this->createDOMXPath($this->document, $this->findNamespacePrefixes($xpath));
610 610
611 foreach ($this->nodes as $node) { 611 foreach ($this->nodes as $node) {
612 $data[] = $domxpath->evaluate($xpath, $node); 612 $data[] = $domxpath->evaluate($xpath, $node);
613 } 613 }
624 * 624 *
625 * You can extract attributes or/and the node value (_text). 625 * You can extract attributes or/and the node value (_text).
626 * 626 *
627 * Example: 627 * Example:
628 * 628 *
629 * $crawler->filter('h1 a')->extract(array('_text', 'href')); 629 * $crawler->filter('h1 a')->extract(['_text', 'href']);
630 * 630 *
631 * @param array $attributes An array of attributes 631 * @param array $attributes An array of attributes
632 * 632 *
633 * @return array An array of extracted values 633 * @return array An array of extracted values
634 */ 634 */
635 public function extract($attributes) 635 public function extract($attributes)
636 { 636 {
637 $attributes = (array) $attributes; 637 $attributes = (array) $attributes;
638 $count = count($attributes); 638 $count = \count($attributes);
639 639
640 $data = array(); 640 $data = [];
641 foreach ($this->nodes as $node) { 641 foreach ($this->nodes as $node) {
642 $elements = array(); 642 $elements = [];
643 foreach ($attributes as $attribute) { 643 foreach ($attributes as $attribute) {
644 if ('_text' === $attribute) { 644 if ('_text' === $attribute) {
645 $elements[] = $node->nodeValue; 645 $elements[] = $node->nodeValue;
646 } else { 646 } else {
647 $elements[] = $node->getAttribute($attribute); 647 $elements[] = $node->getAttribute($attribute);
763 } 763 }
764 764
765 $node = $this->getNode(0); 765 $node = $this->getNode(0);
766 766
767 if (!$node instanceof \DOMElement) { 767 if (!$node instanceof \DOMElement) {
768 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); 768 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
769 } 769 }
770 770
771 return new Link($node, $this->baseHref, $method); 771 return new Link($node, $this->baseHref, $method);
772 } 772 }
773 773
778 * 778 *
779 * @throws \InvalidArgumentException If the current node list contains non-DOMElement instances 779 * @throws \InvalidArgumentException If the current node list contains non-DOMElement instances
780 */ 780 */
781 public function links() 781 public function links()
782 { 782 {
783 $links = array(); 783 $links = [];
784 foreach ($this->nodes as $node) { 784 foreach ($this->nodes as $node) {
785 if (!$node instanceof \DOMElement) { 785 if (!$node instanceof \DOMElement) {
786 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node))); 786 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node)));
787 } 787 }
788 788
789 $links[] = new Link($node, $this->baseHref, 'get'); 789 $links[] = new Link($node, $this->baseHref, 'get');
790 } 790 }
791 791
799 * 799 *
800 * @throws \InvalidArgumentException If the current node list is empty 800 * @throws \InvalidArgumentException If the current node list is empty
801 */ 801 */
802 public function image() 802 public function image()
803 { 803 {
804 if (!count($this)) { 804 if (!\count($this)) {
805 throw new \InvalidArgumentException('The current node list is empty.'); 805 throw new \InvalidArgumentException('The current node list is empty.');
806 } 806 }
807 807
808 $node = $this->getNode(0); 808 $node = $this->getNode(0);
809 809
810 if (!$node instanceof \DOMElement) { 810 if (!$node instanceof \DOMElement) {
811 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); 811 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
812 } 812 }
813 813
814 return new Image($node, $this->baseHref); 814 return new Image($node, $this->baseHref);
815 } 815 }
816 816
819 * 819 *
820 * @return Image[] An array of Image instances 820 * @return Image[] An array of Image instances
821 */ 821 */
822 public function images() 822 public function images()
823 { 823 {
824 $images = array(); 824 $images = [];
825 foreach ($this as $node) { 825 foreach ($this as $node) {
826 if (!$node instanceof \DOMElement) { 826 if (!$node instanceof \DOMElement) {
827 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node))); 827 throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node)));
828 } 828 }
829 829
830 $images[] = new Image($node, $this->baseHref); 830 $images[] = new Image($node, $this->baseHref);
831 } 831 }
832 832
850 } 850 }
851 851
852 $node = $this->getNode(0); 852 $node = $this->getNode(0);
853 853
854 if (!$node instanceof \DOMElement) { 854 if (!$node instanceof \DOMElement) {
855 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node))); 855 throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
856 } 856 }
857 857
858 $form = new Form($node, $this->uri, $method, $this->baseHref); 858 $form = new Form($node, $this->uri, $method, $this->baseHref);
859 859
860 if (null !== $values) { 860 if (null !== $values) {
887 * Converts string for XPath expressions. 887 * Converts string for XPath expressions.
888 * 888 *
889 * Escaped characters are: quotes (") and apostrophe ('). 889 * Escaped characters are: quotes (") and apostrophe (').
890 * 890 *
891 * Examples: 891 * Examples:
892 * <code> 892 *
893 * echo Crawler::xpathLiteral('foo " bar'); 893 * echo Crawler::xpathLiteral('foo " bar');
894 * //prints 'foo " bar' 894 * //prints 'foo " bar'
895 * 895 *
896 * echo Crawler::xpathLiteral("foo ' bar"); 896 * echo Crawler::xpathLiteral("foo ' bar");
897 * //prints "foo ' bar" 897 * //prints "foo ' bar"
898 * 898 *
899 * echo Crawler::xpathLiteral('a\'b"c'); 899 * echo Crawler::xpathLiteral('a\'b"c');
900 * //prints concat('a', "'", 'b"c') 900 * //prints concat('a', "'", 'b"c')
901 * </code> 901 *
902 * 902 *
903 * @param string $s String to be escaped 903 * @param string $s String to be escaped
904 * 904 *
905 * @return string Converted string 905 * @return string Converted string
906 */ 906 */
913 if (false === strpos($s, '"')) { 913 if (false === strpos($s, '"')) {
914 return sprintf('"%s"', $s); 914 return sprintf('"%s"', $s);
915 } 915 }
916 916
917 $string = $s; 917 $string = $s;
918 $parts = array(); 918 $parts = [];
919 while (true) { 919 while (true) {
920 if (false !== $pos = strpos($string, "'")) { 920 if (false !== $pos = strpos($string, "'")) {
921 $parts[] = sprintf("'%s'", substr($string, 0, $pos)); 921 $parts[] = sprintf("'%s'", substr($string, 0, $pos));
922 $parts[] = "\"'\""; 922 $parts[] = "\"'\"";
923 $string = substr($string, $pos + 1); 923 $string = substr($string, $pos + 1);
963 * 963 *
964 * @return string 964 * @return string
965 */ 965 */
966 private function relativize($xpath) 966 private function relativize($xpath)
967 { 967 {
968 $expressions = array(); 968 $expressions = [];
969 969
970 // An expression which will never match to replace expressions which cannot match in the crawler 970 // An expression which will never match to replace expressions which cannot match in the crawler
971 // We cannot simply drop 971 // We cannot simply drop
972 $nonMatchingExpression = 'a[name() = "b"]'; 972 $nonMatchingExpression = 'a[name() = "b"]';
973 973
974 $xpathLen = strlen($xpath); 974 $xpathLen = \strlen($xpath);
975 $openedBrackets = 0; 975 $openedBrackets = 0;
976 $startPosition = strspn($xpath, " \t\n\r\0\x0B"); 976 $startPosition = strspn($xpath, " \t\n\r\0\x0B");
977 977
978 for ($i = $startPosition; $i <= $xpathLen; ++$i) { 978 for ($i = $startPosition; $i <= $xpathLen; ++$i) {
979 $i += strcspn($xpath, '"\'[]|', $i); 979 $i += strcspn($xpath, '"\'[]|', $i);
1062 /** 1062 /**
1063 * @return int 1063 * @return int
1064 */ 1064 */
1065 public function count() 1065 public function count()
1066 { 1066 {
1067 return count($this->nodes); 1067 return \count($this->nodes);
1068 } 1068 }
1069 1069
1070 /** 1070 /**
1071 * @return \ArrayIterator|\DOMElement[] 1071 * @return \ArrayIterator|\DOMElement[]
1072 */ 1072 */
1081 * 1081 *
1082 * @return array 1082 * @return array
1083 */ 1083 */
1084 protected function sibling($node, $siblingDir = 'nextSibling') 1084 protected function sibling($node, $siblingDir = 'nextSibling')
1085 { 1085 {
1086 $nodes = array(); 1086 $nodes = [];
1087 1087
1088 do { 1088 do {
1089 if ($node !== $this->getNode(0) && 1 === $node->nodeType) { 1089 if ($node !== $this->getNode(0) && 1 === $node->nodeType) {
1090 $nodes[] = $node; 1090 $nodes[] = $node;
1091 } 1091 }
1100 * 1100 *
1101 * @return \DOMXPath 1101 * @return \DOMXPath
1102 * 1102 *
1103 * @throws \InvalidArgumentException 1103 * @throws \InvalidArgumentException
1104 */ 1104 */
1105 private function createDOMXPath(\DOMDocument $document, array $prefixes = array()) 1105 private function createDOMXPath(\DOMDocument $document, array $prefixes = [])
1106 { 1106 {
1107 $domxpath = new \DOMXPath($document); 1107 $domxpath = new \DOMXPath($document);
1108 1108
1109 foreach ($prefixes as $prefix) { 1109 foreach ($prefixes as $prefix) {
1110 $namespace = $this->discoverNamespace($domxpath, $prefix); 1110 $namespace = $this->discoverNamespace($domxpath, $prefix);
1147 { 1147 {
1148 if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*+):[^"\/:]/i', $xpath, $matches)) { 1148 if (preg_match_all('/(?P<prefix>[a-z_][a-z_0-9\-\.]*+):[^"\/:]/i', $xpath, $matches)) {
1149 return array_unique($matches['prefix']); 1149 return array_unique($matches['prefix']);
1150 } 1150 }
1151 1151
1152 return array(); 1152 return [];
1153 } 1153 }
1154 1154
1155 /** 1155 /**
1156 * Creates a crawler for some subnodes. 1156 * Creates a crawler for some subnodes.
1157 * 1157 *