diff vendor/masterminds/html5/src/HTML5/Parser/DOMTreeBuilder.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 4c8ae668cc8c
children
line wrap: on
line diff
--- a/vendor/masterminds/html5/src/HTML5/Parser/DOMTreeBuilder.php	Tue Jul 10 15:07:59 2018 +0100
+++ b/vendor/masterminds/html5/src/HTML5/Parser/DOMTreeBuilder.php	Thu Feb 28 13:21:36 2019 +0000
@@ -1,7 +1,9 @@
 <?php
+
 namespace Masterminds\HTML5\Parser;
 
 use Masterminds\HTML5\Elements;
+use Masterminds\HTML5\InstructionProcessor;
 
 /**
  * Create an HTML5 DOM tree from events.
@@ -24,7 +26,7 @@
 class DOMTreeBuilder implements EventHandler
 {
     /**
-     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
+     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
      */
     const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
 
@@ -45,14 +47,14 @@
     const OPT_IMPLICIT_NS = 'implicit_namespaces';
 
     /**
-     * Holds the HTML5 element names that causes a namespace switch
+     * Holds the HTML5 element names that causes a namespace switch.
      *
      * @var array
      */
     protected $nsRoots = array(
         'html' => self::NAMESPACE_HTML,
         'svg' => self::NAMESPACE_SVG,
-        'math' => self::NAMESPACE_MATHML
+        'math' => self::NAMESPACE_MATHML,
     );
 
     /**
@@ -63,7 +65,7 @@
     protected $implicitNamespaces = array(
         'xml' => self::NAMESPACE_XML,
         'xmlns' => self::NAMESPACE_XMLNS,
-        'xlink' => self::NAMESPACE_XLINK
+        'xlink' => self::NAMESPACE_XLINK,
     );
 
     /**
@@ -136,6 +138,7 @@
     protected $stack = array();
 
     protected $current; // Pointer in the tag hierarchy.
+    protected $rules;
     protected $doc;
 
     protected $frag;
@@ -145,15 +148,15 @@
     protected $insertMode = 0;
 
     /**
-     * Track if we are in an element that allows only inline child nodes
+     * Track if we are in an element that allows only inline child nodes.
+     *
      * @var string|null
      */
     protected $onlyInline;
 
     /**
      * Quirks mode is enabled by default.
-     * Any document that is missing the
-     * DT will be considered to be in quirks mode.
+     * Any document that is missing the DT will be considered to be in quirks mode.
      */
     protected $quirks = true;
 
@@ -174,24 +177,23 @@
             // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
             $this->doc = $impl->createDocument(null, null, $dt);
         }
+
         $this->errors = array();
 
         $this->current = $this->doc; // ->documentElement;
 
         // Create a rules engine for tags.
-        $this->rules = new TreeBuildingRules($this->doc);
+        $this->rules = new TreeBuildingRules();
 
         $implicitNS = array();
         if (isset($this->options[self::OPT_IMPLICIT_NS])) {
             $implicitNS = $this->options[self::OPT_IMPLICIT_NS];
-        } elseif (isset($this->options["implicitNamespaces"])) {
-            $implicitNS = $this->options["implicitNamespaces"];
+        } elseif (isset($this->options['implicitNamespaces'])) {
+            $implicitNS = $this->options['implicitNamespaces'];
         }
 
         // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
-        array_unshift($this->nsStack, $implicitNS + array(
-            '' => self::NAMESPACE_HTML
-        ) + $this->implicitNamespaces);
+        array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
 
         if ($isFragment) {
             $this->insertMode = static::IM_IN_BODY;
@@ -216,7 +218,7 @@
      *
      * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
      *
-     * @return \DOMFragmentDocumentFragment
+     * @return \DOMDocumentFragment
      */
     public function fragment()
     {
@@ -228,8 +230,10 @@
      *
      * This is used for handling Processor Instructions as they are
      * inserted. If omitted, PI's are inserted directly into the DOM tree.
+     *
+     * @param InstructionProcessor $proc
      */
-    public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
+    public function setInstructionProcessor(InstructionProcessor $proc)
     {
         $this->processor = $proc;
     }
@@ -241,7 +245,7 @@
         $this->quirks = $quirks;
 
         if ($this->insertMode > static::IM_INITIAL) {
-            $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
+            $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
 
             return;
         }
@@ -255,27 +259,32 @@
      * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
      *       - XLink, MathML and SVG namespace handling
      *       - Omission rules: 8.1.2.4 Optional tags
+     *
+     * @param string $name
+     * @param array  $attributes
+     * @param bool   $selfClosing
+     *
+     * @return int
      */
     public function startTag($name, $attributes = array(), $selfClosing = false)
     {
-        // fprintf(STDOUT, $name);
         $lname = $this->normalizeTagName($name);
 
         // Make sure we have an html element.
-        if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
+        if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
             $this->startTag('html');
         }
 
         // Set quirks mode if we're at IM_INITIAL with no doctype.
-        if ($this->insertMode == static::IM_INITIAL) {
+        if ($this->insertMode === static::IM_INITIAL) {
             $this->quirks = true;
-            $this->parseError("No DOCTYPE specified.");
+            $this->parseError('No DOCTYPE specified.');
         }
 
         // SPECIAL TAG HANDLING:
         // Spec says do this, and "don't ask."
         // find the spec where this is defined... looks problematic
-        if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
+        if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
             $name = 'img';
         }
 
@@ -291,7 +300,7 @@
                 break;
             case 'head':
                 if ($this->insertMode > static::IM_BEFORE_HEAD) {
-                    $this->parseError("Unexpected head tag outside of head context.");
+                    $this->parseError('Unexpected head tag outside of head context.');
                 } else {
                     $this->insertMode = static::IM_IN_HEAD;
                 }
@@ -306,14 +315,14 @@
                 $this->insertMode = static::IM_IN_MATHML;
                 break;
             case 'noscript':
-                if ($this->insertMode == static::IM_IN_HEAD) {
+                if ($this->insertMode === static::IM_IN_HEAD) {
                     $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
                 }
                 break;
         }
 
         // Special case handling for SVG.
-        if ($this->insertMode == static::IM_IN_SVG) {
+        if ($this->insertMode === static::IM_IN_SVG) {
             $lname = Elements::normalizeSvgElement($lname);
         }
 
@@ -321,62 +330,58 @@
         // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
         if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
             array_unshift($this->nsStack, array(
-                '' => $this->nsRoots[$lname]
+                '' => $this->nsRoots[$lname],
             ) + $this->nsStack[0]);
-            $pushes ++;
+            ++$pushes;
         }
         $needsWorkaround = false;
-        if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
+        if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
             // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
             foreach ($attributes as $aName => $aVal) {
-                if ($aName === 'xmlns') {
+                if ('xmlns' === $aName) {
                     $needsWorkaround = $aVal;
                     array_unshift($this->nsStack, array(
-                        '' => $aVal
+                        '' => $aVal,
                     ) + $this->nsStack[0]);
-                    $pushes ++;
-                } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
+                    ++$pushes;
+                } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
                     array_unshift($this->nsStack, array(
-                        substr($aName, $pos + 1) => $aVal
+                        substr($aName, $pos + 1) => $aVal,
                     ) + $this->nsStack[0]);
-                    $pushes ++;
+                    ++$pushes;
                 }
             }
         }
 
         if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
-        	$this->autoclose($this->onlyInline);
-        	$this->onlyInline = null;
+            $this->autoclose($this->onlyInline);
+            $this->onlyInline = null;
         }
 
         try {
             $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
 
-
-            if ($needsWorkaround!==false) {
-
-                $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
+            if (false !== $needsWorkaround) {
+                $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
 
                 $frag = new \DOMDocument('1.0', 'UTF-8');
                 $frag->loadXML($xml);
 
                 $ele = $this->doc->importNode($frag->documentElement, true);
-
             } else {
-                if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
+                if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
                     $ele = $this->doc->createElement($lname);
                 } else {
                     $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
                 }
             }
-
         } catch (\DOMException $e) {
             $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
             $ele = $this->doc->createElement('invalid');
         }
 
         if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
-        	$this->onlyInline = $lname;
+            $this->onlyInline = $lname;
         }
 
         // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
@@ -395,23 +400,23 @@
 
         foreach ($attributes as $aName => $aVal) {
             // xmlns attributes can't be set
-            if ($aName === 'xmlns') {
+            if ('xmlns' === $aName) {
                 continue;
             }
 
-            if ($this->insertMode == static::IM_IN_SVG) {
+            if ($this->insertMode === static::IM_IN_SVG) {
                 $aName = Elements::normalizeSvgAttribute($aName);
-            } elseif ($this->insertMode == static::IM_IN_MATHML) {
+            } elseif ($this->insertMode === static::IM_IN_MATHML) {
                 $aName = Elements::normalizeMathMlAttribute($aName);
             }
 
             try {
                 $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
 
-                if ($prefix==='xmlns') {
-                    $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
-                } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
-                    $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
+                if ('xmlns' === $prefix) {
+                    $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
+                } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
+                    $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
                 } else {
                     $ele->setAttribute($aName, $aVal);
                 }
@@ -421,27 +426,33 @@
             }
 
             // This is necessary on a non-DTD schema, like HTML5.
-            if ($aName == 'id') {
+            if ('id' === $aName) {
                 $ele->setIdAttribute('id', true);
             }
         }
 
-        // Some elements have special processing rules. Handle those separately.
-        if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
+        if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
+            // Some elements have special processing rules. Handle those separately.
             $this->current = $this->rules->evaluate($ele, $this->current);
-        }         // Otherwise, it's a standard element.
-        else {
+        } else {
+            // Otherwise, it's a standard element.
             $this->current->appendChild($ele);
 
-            // XXX: Need to handle self-closing tags and unary tags.
-            if (! Elements::isA($name, Elements::VOID_TAG)) {
+            if (!Elements::isA($name, Elements::VOID_TAG)) {
                 $this->current = $ele;
             }
+
+            // Self-closing tags should only be respected on foreign elements
+            // (and are implied on void elements)
+            // See: https://www.w3.org/TR/html5/syntax.html#start-tags
+            if (Elements::isHtml5Element($name)) {
+                $selfClosing = false;
+            }
         }
 
         // This is sort of a last-ditch attempt to correct for cases where no head/body
         // elements are provided.
-        if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
+        if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
             $this->insertMode = static::IM_IN_BODY;
         }
 
@@ -449,10 +460,15 @@
         // but we have to remove the namespaces pushed to $nsStack.
         if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
             // remove the namespaced definded by current node
-            for ($i = 0; $i < $pushes; $i ++) {
+            for ($i = 0; $i < $pushes; ++$i) {
                 array_shift($this->nsStack);
             }
         }
+
+        if ($selfClosing) {
+            $this->endTag($name);
+        }
+
         // Return the element mask, which the tokenizer can then use to set
         // various processing rules.
         return Elements::element($name);
@@ -473,7 +489,7 @@
                 'html',
                 'br',
                 'head',
-                'title'
+                'title',
             ))) {
                 $this->startTag('html');
                 $this->endTag($name);
@@ -483,13 +499,13 @@
             }
 
             // Ignore the tag.
-            $this->parseError("Illegal closing tag at global scope.");
+            $this->parseError('Illegal closing tag at global scope.');
 
             return;
         }
 
         // Special case handling for SVG.
-        if ($this->insertMode == static::IM_IN_SVG) {
+        if ($this->insertMode === static::IM_IN_SVG) {
             $lname = Elements::normalizeSvgElement($lname);
         }
 
@@ -500,39 +516,33 @@
             $cid = spl_object_hash($this->current);
         }
 
-        // XXX: Not sure whether we need this anymore.
-        // if ($name != $lname) {
-        // return $this->quirksTreeResolver($lname);
-        // }
-
         // XXX: HTML has no parent. What do we do, though,
         // if this element appears in the wrong place?
-        if ($lname == 'html') {
+        if ('html' === $lname) {
             return;
         }
 
         // remove the namespaced definded by current node
         if (isset($this->pushes[$cid])) {
-            for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
+            for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
                 array_shift($this->nsStack);
             }
             unset($this->pushes[$cid]);
         }
 
-        if (! $this->autoclose($lname)) {
+        if (!$this->autoclose($lname)) {
             $this->parseError('Could not find closing tag for ' . $lname);
         }
 
-        // switch ($this->insertMode) {
         switch ($lname) {
-            case "head":
+            case 'head':
                 $this->insertMode = static::IM_AFTER_HEAD;
                 break;
-            case "body":
+            case 'body':
                 $this->insertMode = static::IM_AFTER_BODY;
                 break;
-            case "svg":
-            case "mathml":
+            case 'svg':
+            case 'mathml':
                 $this->insertMode = static::IM_IN_BODY;
                 break;
         }
@@ -554,9 +564,9 @@
             // practical as most documents contain these characters. Other text is not
             // expected here so recording a parse error is necessary.
             $dataTmp = trim($data, " \t\n\r\f");
-            if (! empty($dataTmp)) {
+            if (!empty($dataTmp)) {
                 // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
-                $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
+                $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
             }
 
             return;
@@ -573,7 +583,7 @@
 
     public function parseError($msg, $line = 0, $col = 0)
     {
-        $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
+        $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
     }
 
     public function getErrors()
@@ -590,15 +600,14 @@
     public function processingInstruction($name, $data = null)
     {
         // XXX: Ignore initial XML declaration, per the spec.
-        if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
+        if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
             return;
         }
 
-        // Important: The processor may modify the current DOM tree however
-        // it sees fit.
-        if (isset($this->processor)) {
+        // Important: The processor may modify the current DOM tree however it sees fit.
+        if ($this->processor instanceof InstructionProcessor) {
             $res = $this->processor->process($this->current, $name, $data);
-            if (! empty($res)) {
+            if (!empty($res)) {
                 $this->current = $res;
             }
 
@@ -617,42 +626,46 @@
 
     /**
      * Apply normalization rules to a tag name.
-     *
      * See sections 2.9 and 8.1.2.
      *
-     * @param string $name
-     *            The tag name.
+     * @param string $tagName
+     *
      * @return string The normalized tag name.
      */
-    protected function normalizeTagName($name)
+    protected function normalizeTagName($tagName)
     {
         /*
          * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
          */
-        return $name;
+        return $tagName;
     }
 
     protected function quirksTreeResolver($name)
     {
-        throw new \Exception("Not implemented.");
+        throw new \Exception('Not implemented.');
     }
 
     /**
      * Automatically climb the tree and close the closest node with the matching $tag.
+     *
+     * @param string $tagName
+     *
+     * @return bool
      */
-    protected function autoclose($tag)
+    protected function autoclose($tagName)
     {
         $working = $this->current;
         do {
-            if ($working->nodeType != XML_ELEMENT_NODE) {
+            if (XML_ELEMENT_NODE !== $working->nodeType) {
                 return false;
             }
-            if ($working->tagName == $tag) {
+            if ($working->tagName === $tagName) {
                 $this->current = $working->parentNode;
 
                 return true;
             }
         } while ($working = $working->parentNode);
+
         return false;
     }
 
@@ -661,12 +674,16 @@
      *
      * If $this->current or anything above $this->current matches the given tag
      * name, this returns true.
+     *
+     * @param string $tagName
+     *
+     * @return bool
      */
-    protected function isAncestor($tagname)
+    protected function isAncestor($tagName)
     {
         $candidate = $this->current;
-        while ($candidate->nodeType === XML_ELEMENT_NODE) {
-            if ($candidate->tagName == $tagname) {
+        while (XML_ELEMENT_NODE === $candidate->nodeType) {
+            if ($candidate->tagName === $tagName) {
                 return true;
             }
             $candidate = $candidate->parentNode;
@@ -677,9 +694,13 @@
 
     /**
      * Returns true if the immediate parent element is of the given tagname.
+     *
+     * @param string $tagName
+     *
+     * @return bool
      */
-    protected function isParent($tagname)
+    protected function isParent($tagName)
     {
-        return $this->current->tagName == $tagname;
+        return $this->current->tagName === $tagName;
     }
 }