comparison vendor/masterminds/html5/src/HTML5/Parser/DOMTreeBuilder.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 4c8ae668cc8c
children
comparison
equal deleted inserted replaced
16:c2387f117808 17:129ea1e6d783
1 <?php 1 <?php
2
2 namespace Masterminds\HTML5\Parser; 3 namespace Masterminds\HTML5\Parser;
3 4
4 use Masterminds\HTML5\Elements; 5 use Masterminds\HTML5\Elements;
6 use Masterminds\HTML5\InstructionProcessor;
5 7
6 /** 8 /**
7 * Create an HTML5 DOM tree from events. 9 * Create an HTML5 DOM tree from events.
8 * 10 *
9 * This attempts to create a DOM from events emitted by a parser. This 11 * This attempts to create a DOM from events emitted by a parser. This
22 * parser. 24 * parser.
23 */ 25 */
24 class DOMTreeBuilder implements EventHandler 26 class DOMTreeBuilder implements EventHandler
25 { 27 {
26 /** 28 /**
27 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 29 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
28 */ 30 */
29 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; 31 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
30 32
31 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; 33 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
32 34
43 const OPT_TARGET_DOC = 'target_document'; 45 const OPT_TARGET_DOC = 'target_document';
44 46
45 const OPT_IMPLICIT_NS = 'implicit_namespaces'; 47 const OPT_IMPLICIT_NS = 'implicit_namespaces';
46 48
47 /** 49 /**
48 * Holds the HTML5 element names that causes a namespace switch 50 * Holds the HTML5 element names that causes a namespace switch.
49 * 51 *
50 * @var array 52 * @var array
51 */ 53 */
52 protected $nsRoots = array( 54 protected $nsRoots = array(
53 'html' => self::NAMESPACE_HTML, 55 'html' => self::NAMESPACE_HTML,
54 'svg' => self::NAMESPACE_SVG, 56 'svg' => self::NAMESPACE_SVG,
55 'math' => self::NAMESPACE_MATHML 57 'math' => self::NAMESPACE_MATHML,
56 ); 58 );
57 59
58 /** 60 /**
59 * Holds the always available namespaces (which does not require the XMLNS declaration). 61 * Holds the always available namespaces (which does not require the XMLNS declaration).
60 * 62 *
61 * @var array 63 * @var array
62 */ 64 */
63 protected $implicitNamespaces = array( 65 protected $implicitNamespaces = array(
64 'xml' => self::NAMESPACE_XML, 66 'xml' => self::NAMESPACE_XML,
65 'xmlns' => self::NAMESPACE_XMLNS, 67 'xmlns' => self::NAMESPACE_XMLNS,
66 'xlink' => self::NAMESPACE_XLINK 68 'xlink' => self::NAMESPACE_XLINK,
67 ); 69 );
68 70
69 /** 71 /**
70 * Holds a stack of currently active namespaces. 72 * Holds a stack of currently active namespaces.
71 * 73 *
134 protected $options = array(); 136 protected $options = array();
135 137
136 protected $stack = array(); 138 protected $stack = array();
137 139
138 protected $current; // Pointer in the tag hierarchy. 140 protected $current; // Pointer in the tag hierarchy.
141 protected $rules;
139 protected $doc; 142 protected $doc;
140 143
141 protected $frag; 144 protected $frag;
142 145
143 protected $processor; 146 protected $processor;
144 147
145 protected $insertMode = 0; 148 protected $insertMode = 0;
146 149
147 /** 150 /**
148 * Track if we are in an element that allows only inline child nodes 151 * Track if we are in an element that allows only inline child nodes.
152 *
149 * @var string|null 153 * @var string|null
150 */ 154 */
151 protected $onlyInline; 155 protected $onlyInline;
152 156
153 /** 157 /**
154 * Quirks mode is enabled by default. 158 * Quirks mode is enabled by default.
155 * Any document that is missing the 159 * Any document that is missing the DT will be considered to be in quirks mode.
156 * DT will be considered to be in quirks mode.
157 */ 160 */
158 protected $quirks = true; 161 protected $quirks = true;
159 162
160 protected $errors = array(); 163 protected $errors = array();
161 164
172 // documents, and attempting to up-convert any older DTDs to HTML5. 175 // documents, and attempting to up-convert any older DTDs to HTML5.
173 $dt = $impl->createDocumentType('html'); 176 $dt = $impl->createDocumentType('html');
174 // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); 177 // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
175 $this->doc = $impl->createDocument(null, null, $dt); 178 $this->doc = $impl->createDocument(null, null, $dt);
176 } 179 }
180
177 $this->errors = array(); 181 $this->errors = array();
178 182
179 $this->current = $this->doc; // ->documentElement; 183 $this->current = $this->doc; // ->documentElement;
180 184
181 // Create a rules engine for tags. 185 // Create a rules engine for tags.
182 $this->rules = new TreeBuildingRules($this->doc); 186 $this->rules = new TreeBuildingRules();
183 187
184 $implicitNS = array(); 188 $implicitNS = array();
185 if (isset($this->options[self::OPT_IMPLICIT_NS])) { 189 if (isset($this->options[self::OPT_IMPLICIT_NS])) {
186 $implicitNS = $this->options[self::OPT_IMPLICIT_NS]; 190 $implicitNS = $this->options[self::OPT_IMPLICIT_NS];
187 } elseif (isset($this->options["implicitNamespaces"])) { 191 } elseif (isset($this->options['implicitNamespaces'])) {
188 $implicitNS = $this->options["implicitNamespaces"]; 192 $implicitNS = $this->options['implicitNamespaces'];
189 } 193 }
190 194
191 // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options 195 // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
192 array_unshift($this->nsStack, $implicitNS + array( 196 array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces);
193 '' => self::NAMESPACE_HTML
194 ) + $this->implicitNamespaces);
195 197
196 if ($isFragment) { 198 if ($isFragment) {
197 $this->insertMode = static::IM_IN_BODY; 199 $this->insertMode = static::IM_IN_BODY;
198 $this->frag = $this->doc->createDocumentFragment(); 200 $this->frag = $this->doc->createDocumentFragment();
199 $this->current = $this->frag; 201 $this->current = $this->frag;
214 * This returns a DOMNodeList because a fragment may have zero or more 216 * This returns a DOMNodeList because a fragment may have zero or more
215 * DOMNodes at its root. 217 * DOMNodes at its root.
216 * 218 *
217 * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context 219 * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
218 * 220 *
219 * @return \DOMFragmentDocumentFragment 221 * @return \DOMDocumentFragment
220 */ 222 */
221 public function fragment() 223 public function fragment()
222 { 224 {
223 return $this->frag; 225 return $this->frag;
224 } 226 }
226 /** 228 /**
227 * Provide an instruction processor. 229 * Provide an instruction processor.
228 * 230 *
229 * This is used for handling Processor Instructions as they are 231 * This is used for handling Processor Instructions as they are
230 * inserted. If omitted, PI's are inserted directly into the DOM tree. 232 * inserted. If omitted, PI's are inserted directly into the DOM tree.
231 */ 233 *
232 public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc) 234 * @param InstructionProcessor $proc
235 */
236 public function setInstructionProcessor(InstructionProcessor $proc)
233 { 237 {
234 $this->processor = $proc; 238 $this->processor = $proc;
235 } 239 }
236 240
237 public function doctype($name, $idType = 0, $id = null, $quirks = false) 241 public function doctype($name, $idType = 0, $id = null, $quirks = false)
239 // This is used solely for setting quirks mode. Currently we don't 243 // This is used solely for setting quirks mode. Currently we don't
240 // try to preserve the inbound DT. We convert it to HTML5. 244 // try to preserve the inbound DT. We convert it to HTML5.
241 $this->quirks = $quirks; 245 $this->quirks = $quirks;
242 246
243 if ($this->insertMode > static::IM_INITIAL) { 247 if ($this->insertMode > static::IM_INITIAL) {
244 $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name); 248 $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name);
245 249
246 return; 250 return;
247 } 251 }
248 252
249 $this->insertMode = static::IM_BEFORE_HTML; 253 $this->insertMode = static::IM_BEFORE_HTML;
253 * Process the start tag. 257 * Process the start tag.
254 * 258 *
255 * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) 259 * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
256 * - XLink, MathML and SVG namespace handling 260 * - XLink, MathML and SVG namespace handling
257 * - Omission rules: 8.1.2.4 Optional tags 261 * - Omission rules: 8.1.2.4 Optional tags
262 *
263 * @param string $name
264 * @param array $attributes
265 * @param bool $selfClosing
266 *
267 * @return int
258 */ 268 */
259 public function startTag($name, $attributes = array(), $selfClosing = false) 269 public function startTag($name, $attributes = array(), $selfClosing = false)
260 { 270 {
261 // fprintf(STDOUT, $name);
262 $lname = $this->normalizeTagName($name); 271 $lname = $this->normalizeTagName($name);
263 272
264 // Make sure we have an html element. 273 // Make sure we have an html element.
265 if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) { 274 if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) {
266 $this->startTag('html'); 275 $this->startTag('html');
267 } 276 }
268 277
269 // Set quirks mode if we're at IM_INITIAL with no doctype. 278 // Set quirks mode if we're at IM_INITIAL with no doctype.
270 if ($this->insertMode == static::IM_INITIAL) { 279 if ($this->insertMode === static::IM_INITIAL) {
271 $this->quirks = true; 280 $this->quirks = true;
272 $this->parseError("No DOCTYPE specified."); 281 $this->parseError('No DOCTYPE specified.');
273 } 282 }
274 283
275 // SPECIAL TAG HANDLING: 284 // SPECIAL TAG HANDLING:
276 // Spec says do this, and "don't ask." 285 // Spec says do this, and "don't ask."
277 // find the spec where this is defined... looks problematic 286 // find the spec where this is defined... looks problematic
278 if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) { 287 if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
279 $name = 'img'; 288 $name = 'img';
280 } 289 }
281 290
282 // Autoclose p tags where appropriate. 291 // Autoclose p tags where appropriate.
283 if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) { 292 if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
289 case 'html': 298 case 'html':
290 $this->insertMode = static::IM_BEFORE_HEAD; 299 $this->insertMode = static::IM_BEFORE_HEAD;
291 break; 300 break;
292 case 'head': 301 case 'head':
293 if ($this->insertMode > static::IM_BEFORE_HEAD) { 302 if ($this->insertMode > static::IM_BEFORE_HEAD) {
294 $this->parseError("Unexpected head tag outside of head context."); 303 $this->parseError('Unexpected head tag outside of head context.');
295 } else { 304 } else {
296 $this->insertMode = static::IM_IN_HEAD; 305 $this->insertMode = static::IM_IN_HEAD;
297 } 306 }
298 break; 307 break;
299 case 'body': 308 case 'body':
304 break; 313 break;
305 case 'math': 314 case 'math':
306 $this->insertMode = static::IM_IN_MATHML; 315 $this->insertMode = static::IM_IN_MATHML;
307 break; 316 break;
308 case 'noscript': 317 case 'noscript':
309 if ($this->insertMode == static::IM_IN_HEAD) { 318 if ($this->insertMode === static::IM_IN_HEAD) {
310 $this->insertMode = static::IM_IN_HEAD_NOSCRIPT; 319 $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
311 } 320 }
312 break; 321 break;
313 } 322 }
314 323
315 // Special case handling for SVG. 324 // Special case handling for SVG.
316 if ($this->insertMode == static::IM_IN_SVG) { 325 if ($this->insertMode === static::IM_IN_SVG) {
317 $lname = Elements::normalizeSvgElement($lname); 326 $lname = Elements::normalizeSvgElement($lname);
318 } 327 }
319 328
320 $pushes = 0; 329 $pushes = 0;
321 // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace 330 // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
322 if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) { 331 if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
323 array_unshift($this->nsStack, array( 332 array_unshift($this->nsStack, array(
324 '' => $this->nsRoots[$lname] 333 '' => $this->nsRoots[$lname],
325 ) + $this->nsStack[0]); 334 ) + $this->nsStack[0]);
326 $pushes ++; 335 ++$pushes;
327 } 336 }
328 $needsWorkaround = false; 337 $needsWorkaround = false;
329 if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) { 338 if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) {
330 // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack 339 // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
331 foreach ($attributes as $aName => $aVal) { 340 foreach ($attributes as $aName => $aVal) {
332 if ($aName === 'xmlns') { 341 if ('xmlns' === $aName) {
333 $needsWorkaround = $aVal; 342 $needsWorkaround = $aVal;
334 array_unshift($this->nsStack, array( 343 array_unshift($this->nsStack, array(
335 '' => $aVal 344 '' => $aVal,
336 ) + $this->nsStack[0]); 345 ) + $this->nsStack[0]);
337 $pushes ++; 346 ++$pushes;
338 } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') { 347 } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) {
339 array_unshift($this->nsStack, array( 348 array_unshift($this->nsStack, array(
340 substr($aName, $pos + 1) => $aVal 349 substr($aName, $pos + 1) => $aVal,
341 ) + $this->nsStack[0]); 350 ) + $this->nsStack[0]);
342 $pushes ++; 351 ++$pushes;
343 } 352 }
344 } 353 }
345 } 354 }
346 355
347 if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) { 356 if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
348 $this->autoclose($this->onlyInline); 357 $this->autoclose($this->onlyInline);
349 $this->onlyInline = null; 358 $this->onlyInline = null;
350 } 359 }
351 360
352 try { 361 try {
353 $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; 362 $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
354 363
355 364 if (false !== $needsWorkaround) {
356 if ($needsWorkaround!==false) { 365 $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>';
357
358 $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
359 366
360 $frag = new \DOMDocument('1.0', 'UTF-8'); 367 $frag = new \DOMDocument('1.0', 'UTF-8');
361 $frag->loadXML($xml); 368 $frag->loadXML($xml);
362 369
363 $ele = $this->doc->importNode($frag->documentElement, true); 370 $ele = $this->doc->importNode($frag->documentElement, true);
364
365 } else { 371 } else {
366 if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { 372 if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
367 $ele = $this->doc->createElement($lname); 373 $ele = $this->doc->createElement($lname);
368 } else { 374 } else {
369 $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); 375 $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
370 } 376 }
371 } 377 }
372
373 } catch (\DOMException $e) { 378 } catch (\DOMException $e) {
374 $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); 379 $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
375 $ele = $this->doc->createElement('invalid'); 380 $ele = $this->doc->createElement('invalid');
376 } 381 }
377 382
378 if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) { 383 if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
379 $this->onlyInline = $lname; 384 $this->onlyInline = $lname;
380 } 385 }
381 386
382 // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them. 387 // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
383 // When we are on a void tag, we do not need to care about namesapce nesting. 388 // When we are on a void tag, we do not need to care about namesapce nesting.
384 if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) { 389 if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) {
393 } 398 }
394 } 399 }
395 400
396 foreach ($attributes as $aName => $aVal) { 401 foreach ($attributes as $aName => $aVal) {
397 // xmlns attributes can't be set 402 // xmlns attributes can't be set
398 if ($aName === 'xmlns') { 403 if ('xmlns' === $aName) {
399 continue; 404 continue;
400 } 405 }
401 406
402 if ($this->insertMode == static::IM_IN_SVG) { 407 if ($this->insertMode === static::IM_IN_SVG) {
403 $aName = Elements::normalizeSvgAttribute($aName); 408 $aName = Elements::normalizeSvgAttribute($aName);
404 } elseif ($this->insertMode == static::IM_IN_MATHML) { 409 } elseif ($this->insertMode === static::IM_IN_MATHML) {
405 $aName = Elements::normalizeMathMlAttribute($aName); 410 $aName = Elements::normalizeMathMlAttribute($aName);
406 } 411 }
407 412
408 try { 413 try {
409 $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; 414 $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
410 415
411 if ($prefix==='xmlns') { 416 if ('xmlns' === $prefix) {
412 $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal); 417 $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal);
413 } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) { 418 } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) {
414 $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal); 419 $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal);
415 } else { 420 } else {
416 $ele->setAttribute($aName, $aVal); 421 $ele->setAttribute($aName, $aVal);
417 } 422 }
418 } catch (\DOMException $e) { 423 } catch (\DOMException $e) {
419 $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName"); 424 $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
420 continue; 425 continue;
421 } 426 }
422 427
423 // This is necessary on a non-DTD schema, like HTML5. 428 // This is necessary on a non-DTD schema, like HTML5.
424 if ($aName == 'id') { 429 if ('id' === $aName) {
425 $ele->setIdAttribute('id', true); 430 $ele->setIdAttribute('id', true);
426 } 431 }
427 } 432 }
428 433
429 // Some elements have special processing rules. Handle those separately. 434 if ($this->frag !== $this->current && $this->rules->hasRules($name)) {
430 if ($this->rules->hasRules($name) && $this->frag !== $this->current) { 435 // Some elements have special processing rules. Handle those separately.
431 $this->current = $this->rules->evaluate($ele, $this->current); 436 $this->current = $this->rules->evaluate($ele, $this->current);
432 } // Otherwise, it's a standard element. 437 } else {
433 else { 438 // Otherwise, it's a standard element.
434 $this->current->appendChild($ele); 439 $this->current->appendChild($ele);
435 440
436 // XXX: Need to handle self-closing tags and unary tags. 441 if (!Elements::isA($name, Elements::VOID_TAG)) {
437 if (! Elements::isA($name, Elements::VOID_TAG)) {
438 $this->current = $ele; 442 $this->current = $ele;
443 }
444
445 // Self-closing tags should only be respected on foreign elements
446 // (and are implied on void elements)
447 // See: https://www.w3.org/TR/html5/syntax.html#start-tags
448 if (Elements::isHtml5Element($name)) {
449 $selfClosing = false;
439 } 450 }
440 } 451 }
441 452
442 // This is sort of a last-ditch attempt to correct for cases where no head/body 453 // This is sort of a last-ditch attempt to correct for cases where no head/body
443 // elements are provided. 454 // elements are provided.
444 if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') { 455 if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) {
445 $this->insertMode = static::IM_IN_BODY; 456 $this->insertMode = static::IM_IN_BODY;
446 } 457 }
447 458
448 // When we are on a void tag, we do not need to care about namesapce nesting, 459 // When we are on a void tag, we do not need to care about namesapce nesting,
449 // but we have to remove the namespaces pushed to $nsStack. 460 // but we have to remove the namespaces pushed to $nsStack.
450 if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) { 461 if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
451 // remove the namespaced definded by current node 462 // remove the namespaced definded by current node
452 for ($i = 0; $i < $pushes; $i ++) { 463 for ($i = 0; $i < $pushes; ++$i) {
453 array_shift($this->nsStack); 464 array_shift($this->nsStack);
454 } 465 }
455 } 466 }
467
468 if ($selfClosing) {
469 $this->endTag($name);
470 }
471
456 // Return the element mask, which the tokenizer can then use to set 472 // Return the element mask, which the tokenizer can then use to set
457 // various processing rules. 473 // various processing rules.
458 return Elements::element($name); 474 return Elements::element($name);
459 } 475 }
460 476
471 // 8.2.5.4.2 487 // 8.2.5.4.2
472 if (in_array($name, array( 488 if (in_array($name, array(
473 'html', 489 'html',
474 'br', 490 'br',
475 'head', 491 'head',
476 'title' 492 'title',
477 ))) { 493 ))) {
478 $this->startTag('html'); 494 $this->startTag('html');
479 $this->endTag($name); 495 $this->endTag($name);
480 $this->insertMode = static::IM_BEFORE_HEAD; 496 $this->insertMode = static::IM_BEFORE_HEAD;
481 497
482 return; 498 return;
483 } 499 }
484 500
485 // Ignore the tag. 501 // Ignore the tag.
486 $this->parseError("Illegal closing tag at global scope."); 502 $this->parseError('Illegal closing tag at global scope.');
487 503
488 return; 504 return;
489 } 505 }
490 506
491 // Special case handling for SVG. 507 // Special case handling for SVG.
492 if ($this->insertMode == static::IM_IN_SVG) { 508 if ($this->insertMode === static::IM_IN_SVG) {
493 $lname = Elements::normalizeSvgElement($lname); 509 $lname = Elements::normalizeSvgElement($lname);
494 } 510 }
495 511
496 // See https://github.com/facebook/hhvm/issues/2962 512 // See https://github.com/facebook/hhvm/issues/2962
497 if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) { 513 if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) {
498 $this->current->removeAttribute('html5-php-fake-id-attribute'); 514 $this->current->removeAttribute('html5-php-fake-id-attribute');
499 } else { 515 } else {
500 $cid = spl_object_hash($this->current); 516 $cid = spl_object_hash($this->current);
501 } 517 }
502 518
503 // XXX: Not sure whether we need this anymore.
504 // if ($name != $lname) {
505 // return $this->quirksTreeResolver($lname);
506 // }
507
508 // XXX: HTML has no parent. What do we do, though, 519 // XXX: HTML has no parent. What do we do, though,
509 // if this element appears in the wrong place? 520 // if this element appears in the wrong place?
510 if ($lname == 'html') { 521 if ('html' === $lname) {
511 return; 522 return;
512 } 523 }
513 524
514 // remove the namespaced definded by current node 525 // remove the namespaced definded by current node
515 if (isset($this->pushes[$cid])) { 526 if (isset($this->pushes[$cid])) {
516 for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) { 527 for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) {
517 array_shift($this->nsStack); 528 array_shift($this->nsStack);
518 } 529 }
519 unset($this->pushes[$cid]); 530 unset($this->pushes[$cid]);
520 } 531 }
521 532
522 if (! $this->autoclose($lname)) { 533 if (!$this->autoclose($lname)) {
523 $this->parseError('Could not find closing tag for ' . $lname); 534 $this->parseError('Could not find closing tag for ' . $lname);
524 } 535 }
525 536
526 // switch ($this->insertMode) {
527 switch ($lname) { 537 switch ($lname) {
528 case "head": 538 case 'head':
529 $this->insertMode = static::IM_AFTER_HEAD; 539 $this->insertMode = static::IM_AFTER_HEAD;
530 break; 540 break;
531 case "body": 541 case 'body':
532 $this->insertMode = static::IM_AFTER_BODY; 542 $this->insertMode = static::IM_AFTER_BODY;
533 break; 543 break;
534 case "svg": 544 case 'svg':
535 case "mathml": 545 case 'mathml':
536 $this->insertMode = static::IM_IN_BODY; 546 $this->insertMode = static::IM_IN_BODY;
537 break; 547 break;
538 } 548 }
539 } 549 }
540 550
552 // Per '8.2.5.4.3 The "before head" insertion mode' the characters 562 // Per '8.2.5.4.3 The "before head" insertion mode' the characters
553 // " \t\n\r\f" should be ignored but no mention of a parse error. This is 563 // " \t\n\r\f" should be ignored but no mention of a parse error. This is
554 // practical as most documents contain these characters. Other text is not 564 // practical as most documents contain these characters. Other text is not
555 // expected here so recording a parse error is necessary. 565 // expected here so recording a parse error is necessary.
556 $dataTmp = trim($data, " \t\n\r\f"); 566 $dataTmp = trim($data, " \t\n\r\f");
557 if (! empty($dataTmp)) { 567 if (!empty($dataTmp)) {
558 // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); 568 // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
559 $this->parseError("Unexpected text. Ignoring: " . $dataTmp); 569 $this->parseError('Unexpected text. Ignoring: ' . $dataTmp);
560 } 570 }
561 571
562 return; 572 return;
563 } 573 }
564 // fprintf(STDOUT, "Appending text %s.", $data); 574 // fprintf(STDOUT, "Appending text %s.", $data);
571 // If the $current isn't the $root, do we need to do anything? 581 // If the $current isn't the $root, do we need to do anything?
572 } 582 }
573 583
574 public function parseError($msg, $line = 0, $col = 0) 584 public function parseError($msg, $line = 0, $col = 0)
575 { 585 {
576 $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); 586 $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg);
577 } 587 }
578 588
579 public function getErrors() 589 public function getErrors()
580 { 590 {
581 return $this->errors; 591 return $this->errors;
588 } 598 }
589 599
590 public function processingInstruction($name, $data = null) 600 public function processingInstruction($name, $data = null)
591 { 601 {
592 // XXX: Ignore initial XML declaration, per the spec. 602 // XXX: Ignore initial XML declaration, per the spec.
593 if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) { 603 if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) {
594 return; 604 return;
595 } 605 }
596 606
597 // Important: The processor may modify the current DOM tree however 607 // Important: The processor may modify the current DOM tree however it sees fit.
598 // it sees fit. 608 if ($this->processor instanceof InstructionProcessor) {
599 if (isset($this->processor)) {
600 $res = $this->processor->process($this->current, $name, $data); 609 $res = $this->processor->process($this->current, $name, $data);
601 if (! empty($res)) { 610 if (!empty($res)) {
602 $this->current = $res; 611 $this->current = $res;
603 } 612 }
604 613
605 return; 614 return;
606 } 615 }
615 // UTILITIES 624 // UTILITIES
616 // ========================================================================== 625 // ==========================================================================
617 626
618 /** 627 /**
619 * Apply normalization rules to a tag name. 628 * Apply normalization rules to a tag name.
620 *
621 * See sections 2.9 and 8.1.2. 629 * See sections 2.9 and 8.1.2.
622 * 630 *
623 * @param string $name 631 * @param string $tagName
624 * The tag name. 632 *
625 * @return string The normalized tag name. 633 * @return string The normalized tag name.
626 */ 634 */
627 protected function normalizeTagName($name) 635 protected function normalizeTagName($tagName)
628 { 636 {
629 /* 637 /*
630 * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); } 638 * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
631 */ 639 */
632 return $name; 640 return $tagName;
633 } 641 }
634 642
635 protected function quirksTreeResolver($name) 643 protected function quirksTreeResolver($name)
636 { 644 {
637 throw new \Exception("Not implemented."); 645 throw new \Exception('Not implemented.');
638 } 646 }
639 647
640 /** 648 /**
641 * Automatically climb the tree and close the closest node with the matching $tag. 649 * Automatically climb the tree and close the closest node with the matching $tag.
642 */ 650 *
643 protected function autoclose($tag) 651 * @param string $tagName
652 *
653 * @return bool
654 */
655 protected function autoclose($tagName)
644 { 656 {
645 $working = $this->current; 657 $working = $this->current;
646 do { 658 do {
647 if ($working->nodeType != XML_ELEMENT_NODE) { 659 if (XML_ELEMENT_NODE !== $working->nodeType) {
648 return false; 660 return false;
649 } 661 }
650 if ($working->tagName == $tag) { 662 if ($working->tagName === $tagName) {
651 $this->current = $working->parentNode; 663 $this->current = $working->parentNode;
652 664
653 return true; 665 return true;
654 } 666 }
655 } while ($working = $working->parentNode); 667 } while ($working = $working->parentNode);
668
656 return false; 669 return false;
657 } 670 }
658 671
659 /** 672 /**
660 * Checks if the given tagname is an ancestor of the present candidate. 673 * Checks if the given tagname is an ancestor of the present candidate.
661 * 674 *
662 * If $this->current or anything above $this->current matches the given tag 675 * If $this->current or anything above $this->current matches the given tag
663 * name, this returns true. 676 * name, this returns true.
664 */ 677 *
665 protected function isAncestor($tagname) 678 * @param string $tagName
679 *
680 * @return bool
681 */
682 protected function isAncestor($tagName)
666 { 683 {
667 $candidate = $this->current; 684 $candidate = $this->current;
668 while ($candidate->nodeType === XML_ELEMENT_NODE) { 685 while (XML_ELEMENT_NODE === $candidate->nodeType) {
669 if ($candidate->tagName == $tagname) { 686 if ($candidate->tagName === $tagName) {
670 return true; 687 return true;
671 } 688 }
672 $candidate = $candidate->parentNode; 689 $candidate = $candidate->parentNode;
673 } 690 }
674 691
675 return false; 692 return false;
676 } 693 }
677 694
678 /** 695 /**
679 * Returns true if the immediate parent element is of the given tagname. 696 * Returns true if the immediate parent element is of the given tagname.
680 */ 697 *
681 protected function isParent($tagname) 698 * @param string $tagName
682 { 699 *
683 return $this->current->tagName == $tagname; 700 * @return bool
701 */
702 protected function isParent($tagName)
703 {
704 return $this->current->tagName === $tagName;
684 } 705 }
685 } 706 }