Mercurial > hg > isophonics-drupal-site
comparison vendor/masterminds/html5/src/HTML5/Parser/DOMTreeBuilder.php @ 17:129ea1e6d783
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:21:36 +0000 |
parents | 4c8ae668cc8c |
children |
comparison
equal
deleted
inserted
replaced
16:c2387f117808 | 17:129ea1e6d783 |
---|---|
1 <?php | 1 <?php |
2 | |
2 namespace Masterminds\HTML5\Parser; | 3 namespace Masterminds\HTML5\Parser; |
3 | 4 |
4 use Masterminds\HTML5\Elements; | 5 use Masterminds\HTML5\Elements; |
6 use Masterminds\HTML5\InstructionProcessor; | |
5 | 7 |
6 /** | 8 /** |
7 * Create an HTML5 DOM tree from events. | 9 * Create an HTML5 DOM tree from events. |
8 * | 10 * |
9 * This attempts to create a DOM from events emitted by a parser. This | 11 * This attempts to create a DOM from events emitted by a parser. This |
22 * parser. | 24 * parser. |
23 */ | 25 */ |
24 class DOMTreeBuilder implements EventHandler | 26 class DOMTreeBuilder implements EventHandler |
25 { | 27 { |
26 /** | 28 /** |
27 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 | 29 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
28 */ | 30 */ |
29 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; | 31 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
30 | 32 |
31 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; | 33 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
32 | 34 |
43 const OPT_TARGET_DOC = 'target_document'; | 45 const OPT_TARGET_DOC = 'target_document'; |
44 | 46 |
45 const OPT_IMPLICIT_NS = 'implicit_namespaces'; | 47 const OPT_IMPLICIT_NS = 'implicit_namespaces'; |
46 | 48 |
47 /** | 49 /** |
48 * Holds the HTML5 element names that causes a namespace switch | 50 * Holds the HTML5 element names that causes a namespace switch. |
49 * | 51 * |
50 * @var array | 52 * @var array |
51 */ | 53 */ |
52 protected $nsRoots = array( | 54 protected $nsRoots = array( |
53 'html' => self::NAMESPACE_HTML, | 55 'html' => self::NAMESPACE_HTML, |
54 'svg' => self::NAMESPACE_SVG, | 56 'svg' => self::NAMESPACE_SVG, |
55 'math' => self::NAMESPACE_MATHML | 57 'math' => self::NAMESPACE_MATHML, |
56 ); | 58 ); |
57 | 59 |
58 /** | 60 /** |
59 * Holds the always available namespaces (which does not require the XMLNS declaration). | 61 * Holds the always available namespaces (which does not require the XMLNS declaration). |
60 * | 62 * |
61 * @var array | 63 * @var array |
62 */ | 64 */ |
63 protected $implicitNamespaces = array( | 65 protected $implicitNamespaces = array( |
64 'xml' => self::NAMESPACE_XML, | 66 'xml' => self::NAMESPACE_XML, |
65 'xmlns' => self::NAMESPACE_XMLNS, | 67 'xmlns' => self::NAMESPACE_XMLNS, |
66 'xlink' => self::NAMESPACE_XLINK | 68 'xlink' => self::NAMESPACE_XLINK, |
67 ); | 69 ); |
68 | 70 |
69 /** | 71 /** |
70 * Holds a stack of currently active namespaces. | 72 * Holds a stack of currently active namespaces. |
71 * | 73 * |
134 protected $options = array(); | 136 protected $options = array(); |
135 | 137 |
136 protected $stack = array(); | 138 protected $stack = array(); |
137 | 139 |
138 protected $current; // Pointer in the tag hierarchy. | 140 protected $current; // Pointer in the tag hierarchy. |
141 protected $rules; | |
139 protected $doc; | 142 protected $doc; |
140 | 143 |
141 protected $frag; | 144 protected $frag; |
142 | 145 |
143 protected $processor; | 146 protected $processor; |
144 | 147 |
145 protected $insertMode = 0; | 148 protected $insertMode = 0; |
146 | 149 |
147 /** | 150 /** |
148 * Track if we are in an element that allows only inline child nodes | 151 * Track if we are in an element that allows only inline child nodes. |
152 * | |
149 * @var string|null | 153 * @var string|null |
150 */ | 154 */ |
151 protected $onlyInline; | 155 protected $onlyInline; |
152 | 156 |
153 /** | 157 /** |
154 * Quirks mode is enabled by default. | 158 * Quirks mode is enabled by default. |
155 * Any document that is missing the | 159 * Any document that is missing the DT will be considered to be in quirks mode. |
156 * DT will be considered to be in quirks mode. | |
157 */ | 160 */ |
158 protected $quirks = true; | 161 protected $quirks = true; |
159 | 162 |
160 protected $errors = array(); | 163 protected $errors = array(); |
161 | 164 |
172 // documents, and attempting to up-convert any older DTDs to HTML5. | 175 // documents, and attempting to up-convert any older DTDs to HTML5. |
173 $dt = $impl->createDocumentType('html'); | 176 $dt = $impl->createDocumentType('html'); |
174 // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); | 177 // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt); |
175 $this->doc = $impl->createDocument(null, null, $dt); | 178 $this->doc = $impl->createDocument(null, null, $dt); |
176 } | 179 } |
180 | |
177 $this->errors = array(); | 181 $this->errors = array(); |
178 | 182 |
179 $this->current = $this->doc; // ->documentElement; | 183 $this->current = $this->doc; // ->documentElement; |
180 | 184 |
181 // Create a rules engine for tags. | 185 // Create a rules engine for tags. |
182 $this->rules = new TreeBuildingRules($this->doc); | 186 $this->rules = new TreeBuildingRules(); |
183 | 187 |
184 $implicitNS = array(); | 188 $implicitNS = array(); |
185 if (isset($this->options[self::OPT_IMPLICIT_NS])) { | 189 if (isset($this->options[self::OPT_IMPLICIT_NS])) { |
186 $implicitNS = $this->options[self::OPT_IMPLICIT_NS]; | 190 $implicitNS = $this->options[self::OPT_IMPLICIT_NS]; |
187 } elseif (isset($this->options["implicitNamespaces"])) { | 191 } elseif (isset($this->options['implicitNamespaces'])) { |
188 $implicitNS = $this->options["implicitNamespaces"]; | 192 $implicitNS = $this->options['implicitNamespaces']; |
189 } | 193 } |
190 | 194 |
191 // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options | 195 // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options |
192 array_unshift($this->nsStack, $implicitNS + array( | 196 array_unshift($this->nsStack, $implicitNS + array('' => self::NAMESPACE_HTML) + $this->implicitNamespaces); |
193 '' => self::NAMESPACE_HTML | |
194 ) + $this->implicitNamespaces); | |
195 | 197 |
196 if ($isFragment) { | 198 if ($isFragment) { |
197 $this->insertMode = static::IM_IN_BODY; | 199 $this->insertMode = static::IM_IN_BODY; |
198 $this->frag = $this->doc->createDocumentFragment(); | 200 $this->frag = $this->doc->createDocumentFragment(); |
199 $this->current = $this->frag; | 201 $this->current = $this->frag; |
214 * This returns a DOMNodeList because a fragment may have zero or more | 216 * This returns a DOMNodeList because a fragment may have zero or more |
215 * DOMNodes at its root. | 217 * DOMNodes at its root. |
216 * | 218 * |
217 * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context | 219 * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context |
218 * | 220 * |
219 * @return \DOMFragmentDocumentFragment | 221 * @return \DOMDocumentFragment |
220 */ | 222 */ |
221 public function fragment() | 223 public function fragment() |
222 { | 224 { |
223 return $this->frag; | 225 return $this->frag; |
224 } | 226 } |
226 /** | 228 /** |
227 * Provide an instruction processor. | 229 * Provide an instruction processor. |
228 * | 230 * |
229 * This is used for handling Processor Instructions as they are | 231 * This is used for handling Processor Instructions as they are |
230 * inserted. If omitted, PI's are inserted directly into the DOM tree. | 232 * inserted. If omitted, PI's are inserted directly into the DOM tree. |
231 */ | 233 * |
232 public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc) | 234 * @param InstructionProcessor $proc |
235 */ | |
236 public function setInstructionProcessor(InstructionProcessor $proc) | |
233 { | 237 { |
234 $this->processor = $proc; | 238 $this->processor = $proc; |
235 } | 239 } |
236 | 240 |
237 public function doctype($name, $idType = 0, $id = null, $quirks = false) | 241 public function doctype($name, $idType = 0, $id = null, $quirks = false) |
239 // This is used solely for setting quirks mode. Currently we don't | 243 // This is used solely for setting quirks mode. Currently we don't |
240 // try to preserve the inbound DT. We convert it to HTML5. | 244 // try to preserve the inbound DT. We convert it to HTML5. |
241 $this->quirks = $quirks; | 245 $this->quirks = $quirks; |
242 | 246 |
243 if ($this->insertMode > static::IM_INITIAL) { | 247 if ($this->insertMode > static::IM_INITIAL) { |
244 $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name); | 248 $this->parseError('Illegal placement of DOCTYPE tag. Ignoring: ' . $name); |
245 | 249 |
246 return; | 250 return; |
247 } | 251 } |
248 | 252 |
249 $this->insertMode = static::IM_BEFORE_HTML; | 253 $this->insertMode = static::IM_BEFORE_HTML; |
253 * Process the start tag. | 257 * Process the start tag. |
254 * | 258 * |
255 * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) | 259 * @todo - XMLNS namespace handling (we need to parse, even if it's not valid) |
256 * - XLink, MathML and SVG namespace handling | 260 * - XLink, MathML and SVG namespace handling |
257 * - Omission rules: 8.1.2.4 Optional tags | 261 * - Omission rules: 8.1.2.4 Optional tags |
262 * | |
263 * @param string $name | |
264 * @param array $attributes | |
265 * @param bool $selfClosing | |
266 * | |
267 * @return int | |
258 */ | 268 */ |
259 public function startTag($name, $attributes = array(), $selfClosing = false) | 269 public function startTag($name, $attributes = array(), $selfClosing = false) |
260 { | 270 { |
261 // fprintf(STDOUT, $name); | |
262 $lname = $this->normalizeTagName($name); | 271 $lname = $this->normalizeTagName($name); |
263 | 272 |
264 // Make sure we have an html element. | 273 // Make sure we have an html element. |
265 if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) { | 274 if (!$this->doc->documentElement && 'html' !== $name && !$this->frag) { |
266 $this->startTag('html'); | 275 $this->startTag('html'); |
267 } | 276 } |
268 | 277 |
269 // Set quirks mode if we're at IM_INITIAL with no doctype. | 278 // Set quirks mode if we're at IM_INITIAL with no doctype. |
270 if ($this->insertMode == static::IM_INITIAL) { | 279 if ($this->insertMode === static::IM_INITIAL) { |
271 $this->quirks = true; | 280 $this->quirks = true; |
272 $this->parseError("No DOCTYPE specified."); | 281 $this->parseError('No DOCTYPE specified.'); |
273 } | 282 } |
274 | 283 |
275 // SPECIAL TAG HANDLING: | 284 // SPECIAL TAG HANDLING: |
276 // Spec says do this, and "don't ask." | 285 // Spec says do this, and "don't ask." |
277 // find the spec where this is defined... looks problematic | 286 // find the spec where this is defined... looks problematic |
278 if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) { | 287 if ('image' === $name && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) { |
279 $name = 'img'; | 288 $name = 'img'; |
280 } | 289 } |
281 | 290 |
282 // Autoclose p tags where appropriate. | 291 // Autoclose p tags where appropriate. |
283 if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) { | 292 if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) { |
289 case 'html': | 298 case 'html': |
290 $this->insertMode = static::IM_BEFORE_HEAD; | 299 $this->insertMode = static::IM_BEFORE_HEAD; |
291 break; | 300 break; |
292 case 'head': | 301 case 'head': |
293 if ($this->insertMode > static::IM_BEFORE_HEAD) { | 302 if ($this->insertMode > static::IM_BEFORE_HEAD) { |
294 $this->parseError("Unexpected head tag outside of head context."); | 303 $this->parseError('Unexpected head tag outside of head context.'); |
295 } else { | 304 } else { |
296 $this->insertMode = static::IM_IN_HEAD; | 305 $this->insertMode = static::IM_IN_HEAD; |
297 } | 306 } |
298 break; | 307 break; |
299 case 'body': | 308 case 'body': |
304 break; | 313 break; |
305 case 'math': | 314 case 'math': |
306 $this->insertMode = static::IM_IN_MATHML; | 315 $this->insertMode = static::IM_IN_MATHML; |
307 break; | 316 break; |
308 case 'noscript': | 317 case 'noscript': |
309 if ($this->insertMode == static::IM_IN_HEAD) { | 318 if ($this->insertMode === static::IM_IN_HEAD) { |
310 $this->insertMode = static::IM_IN_HEAD_NOSCRIPT; | 319 $this->insertMode = static::IM_IN_HEAD_NOSCRIPT; |
311 } | 320 } |
312 break; | 321 break; |
313 } | 322 } |
314 | 323 |
315 // Special case handling for SVG. | 324 // Special case handling for SVG. |
316 if ($this->insertMode == static::IM_IN_SVG) { | 325 if ($this->insertMode === static::IM_IN_SVG) { |
317 $lname = Elements::normalizeSvgElement($lname); | 326 $lname = Elements::normalizeSvgElement($lname); |
318 } | 327 } |
319 | 328 |
320 $pushes = 0; | 329 $pushes = 0; |
321 // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace | 330 // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace |
322 if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) { | 331 if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) { |
323 array_unshift($this->nsStack, array( | 332 array_unshift($this->nsStack, array( |
324 '' => $this->nsRoots[$lname] | 333 '' => $this->nsRoots[$lname], |
325 ) + $this->nsStack[0]); | 334 ) + $this->nsStack[0]); |
326 $pushes ++; | 335 ++$pushes; |
327 } | 336 } |
328 $needsWorkaround = false; | 337 $needsWorkaround = false; |
329 if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) { | 338 if (isset($this->options['xmlNamespaces']) && $this->options['xmlNamespaces']) { |
330 // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack | 339 // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack |
331 foreach ($attributes as $aName => $aVal) { | 340 foreach ($attributes as $aName => $aVal) { |
332 if ($aName === 'xmlns') { | 341 if ('xmlns' === $aName) { |
333 $needsWorkaround = $aVal; | 342 $needsWorkaround = $aVal; |
334 array_unshift($this->nsStack, array( | 343 array_unshift($this->nsStack, array( |
335 '' => $aVal | 344 '' => $aVal, |
336 ) + $this->nsStack[0]); | 345 ) + $this->nsStack[0]); |
337 $pushes ++; | 346 ++$pushes; |
338 } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') { | 347 } elseif ('xmlns' === (($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '')) { |
339 array_unshift($this->nsStack, array( | 348 array_unshift($this->nsStack, array( |
340 substr($aName, $pos + 1) => $aVal | 349 substr($aName, $pos + 1) => $aVal, |
341 ) + $this->nsStack[0]); | 350 ) + $this->nsStack[0]); |
342 $pushes ++; | 351 ++$pushes; |
343 } | 352 } |
344 } | 353 } |
345 } | 354 } |
346 | 355 |
347 if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) { | 356 if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) { |
348 $this->autoclose($this->onlyInline); | 357 $this->autoclose($this->onlyInline); |
349 $this->onlyInline = null; | 358 $this->onlyInline = null; |
350 } | 359 } |
351 | 360 |
352 try { | 361 try { |
353 $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; | 362 $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : ''; |
354 | 363 |
355 | 364 if (false !== $needsWorkaround) { |
356 if ($needsWorkaround!==false) { | 365 $xml = "<$lname xmlns=\"$needsWorkaround\" " . (strlen($prefix) && isset($this->nsStack[0][$prefix]) ? ("xmlns:$prefix=\"" . $this->nsStack[0][$prefix] . '"') : '') . '/>'; |
357 | |
358 $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>"; | |
359 | 366 |
360 $frag = new \DOMDocument('1.0', 'UTF-8'); | 367 $frag = new \DOMDocument('1.0', 'UTF-8'); |
361 $frag->loadXML($xml); | 368 $frag->loadXML($xml); |
362 | 369 |
363 $ele = $this->doc->importNode($frag->documentElement, true); | 370 $ele = $this->doc->importNode($frag->documentElement, true); |
364 | |
365 } else { | 371 } else { |
366 if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { | 372 if (!isset($this->nsStack[0][$prefix]) || ('' === $prefix && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) { |
367 $ele = $this->doc->createElement($lname); | 373 $ele = $this->doc->createElement($lname); |
368 } else { | 374 } else { |
369 $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); | 375 $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname); |
370 } | 376 } |
371 } | 377 } |
372 | |
373 } catch (\DOMException $e) { | 378 } catch (\DOMException $e) { |
374 $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); | 379 $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>."); |
375 $ele = $this->doc->createElement('invalid'); | 380 $ele = $this->doc->createElement('invalid'); |
376 } | 381 } |
377 | 382 |
378 if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) { | 383 if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) { |
379 $this->onlyInline = $lname; | 384 $this->onlyInline = $lname; |
380 } | 385 } |
381 | 386 |
382 // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them. | 387 // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them. |
383 // When we are on a void tag, we do not need to care about namesapce nesting. | 388 // When we are on a void tag, we do not need to care about namesapce nesting. |
384 if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) { | 389 if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) { |
393 } | 398 } |
394 } | 399 } |
395 | 400 |
396 foreach ($attributes as $aName => $aVal) { | 401 foreach ($attributes as $aName => $aVal) { |
397 // xmlns attributes can't be set | 402 // xmlns attributes can't be set |
398 if ($aName === 'xmlns') { | 403 if ('xmlns' === $aName) { |
399 continue; | 404 continue; |
400 } | 405 } |
401 | 406 |
402 if ($this->insertMode == static::IM_IN_SVG) { | 407 if ($this->insertMode === static::IM_IN_SVG) { |
403 $aName = Elements::normalizeSvgAttribute($aName); | 408 $aName = Elements::normalizeSvgAttribute($aName); |
404 } elseif ($this->insertMode == static::IM_IN_MATHML) { | 409 } elseif ($this->insertMode === static::IM_IN_MATHML) { |
405 $aName = Elements::normalizeMathMlAttribute($aName); | 410 $aName = Elements::normalizeMathMlAttribute($aName); |
406 } | 411 } |
407 | 412 |
408 try { | 413 try { |
409 $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; | 414 $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false; |
410 | 415 |
411 if ($prefix==='xmlns') { | 416 if ('xmlns' === $prefix) { |
412 $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal); | 417 $ele->setAttributeNS(self::NAMESPACE_XMLNS, $aName, $aVal); |
413 } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) { | 418 } elseif (false !== $prefix && isset($this->nsStack[0][$prefix])) { |
414 $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal); | 419 $ele->setAttributeNS($this->nsStack[0][$prefix], $aName, $aVal); |
415 } else { | 420 } else { |
416 $ele->setAttribute($aName, $aVal); | 421 $ele->setAttribute($aName, $aVal); |
417 } | 422 } |
418 } catch (\DOMException $e) { | 423 } catch (\DOMException $e) { |
419 $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName"); | 424 $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName"); |
420 continue; | 425 continue; |
421 } | 426 } |
422 | 427 |
423 // This is necessary on a non-DTD schema, like HTML5. | 428 // This is necessary on a non-DTD schema, like HTML5. |
424 if ($aName == 'id') { | 429 if ('id' === $aName) { |
425 $ele->setIdAttribute('id', true); | 430 $ele->setIdAttribute('id', true); |
426 } | 431 } |
427 } | 432 } |
428 | 433 |
429 // Some elements have special processing rules. Handle those separately. | 434 if ($this->frag !== $this->current && $this->rules->hasRules($name)) { |
430 if ($this->rules->hasRules($name) && $this->frag !== $this->current) { | 435 // Some elements have special processing rules. Handle those separately. |
431 $this->current = $this->rules->evaluate($ele, $this->current); | 436 $this->current = $this->rules->evaluate($ele, $this->current); |
432 } // Otherwise, it's a standard element. | 437 } else { |
433 else { | 438 // Otherwise, it's a standard element. |
434 $this->current->appendChild($ele); | 439 $this->current->appendChild($ele); |
435 | 440 |
436 // XXX: Need to handle self-closing tags and unary tags. | 441 if (!Elements::isA($name, Elements::VOID_TAG)) { |
437 if (! Elements::isA($name, Elements::VOID_TAG)) { | |
438 $this->current = $ele; | 442 $this->current = $ele; |
443 } | |
444 | |
445 // Self-closing tags should only be respected on foreign elements | |
446 // (and are implied on void elements) | |
447 // See: https://www.w3.org/TR/html5/syntax.html#start-tags | |
448 if (Elements::isHtml5Element($name)) { | |
449 $selfClosing = false; | |
439 } | 450 } |
440 } | 451 } |
441 | 452 |
442 // This is sort of a last-ditch attempt to correct for cases where no head/body | 453 // This is sort of a last-ditch attempt to correct for cases where no head/body |
443 // elements are provided. | 454 // elements are provided. |
444 if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') { | 455 if ($this->insertMode <= static::IM_BEFORE_HEAD && 'head' !== $name && 'html' !== $name) { |
445 $this->insertMode = static::IM_IN_BODY; | 456 $this->insertMode = static::IM_IN_BODY; |
446 } | 457 } |
447 | 458 |
448 // When we are on a void tag, we do not need to care about namesapce nesting, | 459 // When we are on a void tag, we do not need to care about namesapce nesting, |
449 // but we have to remove the namespaces pushed to $nsStack. | 460 // but we have to remove the namespaces pushed to $nsStack. |
450 if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) { | 461 if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) { |
451 // remove the namespaced definded by current node | 462 // remove the namespaced definded by current node |
452 for ($i = 0; $i < $pushes; $i ++) { | 463 for ($i = 0; $i < $pushes; ++$i) { |
453 array_shift($this->nsStack); | 464 array_shift($this->nsStack); |
454 } | 465 } |
455 } | 466 } |
467 | |
468 if ($selfClosing) { | |
469 $this->endTag($name); | |
470 } | |
471 | |
456 // Return the element mask, which the tokenizer can then use to set | 472 // Return the element mask, which the tokenizer can then use to set |
457 // various processing rules. | 473 // various processing rules. |
458 return Elements::element($name); | 474 return Elements::element($name); |
459 } | 475 } |
460 | 476 |
471 // 8.2.5.4.2 | 487 // 8.2.5.4.2 |
472 if (in_array($name, array( | 488 if (in_array($name, array( |
473 'html', | 489 'html', |
474 'br', | 490 'br', |
475 'head', | 491 'head', |
476 'title' | 492 'title', |
477 ))) { | 493 ))) { |
478 $this->startTag('html'); | 494 $this->startTag('html'); |
479 $this->endTag($name); | 495 $this->endTag($name); |
480 $this->insertMode = static::IM_BEFORE_HEAD; | 496 $this->insertMode = static::IM_BEFORE_HEAD; |
481 | 497 |
482 return; | 498 return; |
483 } | 499 } |
484 | 500 |
485 // Ignore the tag. | 501 // Ignore the tag. |
486 $this->parseError("Illegal closing tag at global scope."); | 502 $this->parseError('Illegal closing tag at global scope.'); |
487 | 503 |
488 return; | 504 return; |
489 } | 505 } |
490 | 506 |
491 // Special case handling for SVG. | 507 // Special case handling for SVG. |
492 if ($this->insertMode == static::IM_IN_SVG) { | 508 if ($this->insertMode === static::IM_IN_SVG) { |
493 $lname = Elements::normalizeSvgElement($lname); | 509 $lname = Elements::normalizeSvgElement($lname); |
494 } | 510 } |
495 | 511 |
496 // See https://github.com/facebook/hhvm/issues/2962 | 512 // See https://github.com/facebook/hhvm/issues/2962 |
497 if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) { | 513 if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) { |
498 $this->current->removeAttribute('html5-php-fake-id-attribute'); | 514 $this->current->removeAttribute('html5-php-fake-id-attribute'); |
499 } else { | 515 } else { |
500 $cid = spl_object_hash($this->current); | 516 $cid = spl_object_hash($this->current); |
501 } | 517 } |
502 | 518 |
503 // XXX: Not sure whether we need this anymore. | |
504 // if ($name != $lname) { | |
505 // return $this->quirksTreeResolver($lname); | |
506 // } | |
507 | |
508 // XXX: HTML has no parent. What do we do, though, | 519 // XXX: HTML has no parent. What do we do, though, |
509 // if this element appears in the wrong place? | 520 // if this element appears in the wrong place? |
510 if ($lname == 'html') { | 521 if ('html' === $lname) { |
511 return; | 522 return; |
512 } | 523 } |
513 | 524 |
514 // remove the namespaced definded by current node | 525 // remove the namespaced definded by current node |
515 if (isset($this->pushes[$cid])) { | 526 if (isset($this->pushes[$cid])) { |
516 for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) { | 527 for ($i = 0; $i < $this->pushes[$cid][0]; ++$i) { |
517 array_shift($this->nsStack); | 528 array_shift($this->nsStack); |
518 } | 529 } |
519 unset($this->pushes[$cid]); | 530 unset($this->pushes[$cid]); |
520 } | 531 } |
521 | 532 |
522 if (! $this->autoclose($lname)) { | 533 if (!$this->autoclose($lname)) { |
523 $this->parseError('Could not find closing tag for ' . $lname); | 534 $this->parseError('Could not find closing tag for ' . $lname); |
524 } | 535 } |
525 | 536 |
526 // switch ($this->insertMode) { | |
527 switch ($lname) { | 537 switch ($lname) { |
528 case "head": | 538 case 'head': |
529 $this->insertMode = static::IM_AFTER_HEAD; | 539 $this->insertMode = static::IM_AFTER_HEAD; |
530 break; | 540 break; |
531 case "body": | 541 case 'body': |
532 $this->insertMode = static::IM_AFTER_BODY; | 542 $this->insertMode = static::IM_AFTER_BODY; |
533 break; | 543 break; |
534 case "svg": | 544 case 'svg': |
535 case "mathml": | 545 case 'mathml': |
536 $this->insertMode = static::IM_IN_BODY; | 546 $this->insertMode = static::IM_IN_BODY; |
537 break; | 547 break; |
538 } | 548 } |
539 } | 549 } |
540 | 550 |
552 // Per '8.2.5.4.3 The "before head" insertion mode' the characters | 562 // Per '8.2.5.4.3 The "before head" insertion mode' the characters |
553 // " \t\n\r\f" should be ignored but no mention of a parse error. This is | 563 // " \t\n\r\f" should be ignored but no mention of a parse error. This is |
554 // practical as most documents contain these characters. Other text is not | 564 // practical as most documents contain these characters. Other text is not |
555 // expected here so recording a parse error is necessary. | 565 // expected here so recording a parse error is necessary. |
556 $dataTmp = trim($data, " \t\n\r\f"); | 566 $dataTmp = trim($data, " \t\n\r\f"); |
557 if (! empty($dataTmp)) { | 567 if (!empty($dataTmp)) { |
558 // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); | 568 // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode); |
559 $this->parseError("Unexpected text. Ignoring: " . $dataTmp); | 569 $this->parseError('Unexpected text. Ignoring: ' . $dataTmp); |
560 } | 570 } |
561 | 571 |
562 return; | 572 return; |
563 } | 573 } |
564 // fprintf(STDOUT, "Appending text %s.", $data); | 574 // fprintf(STDOUT, "Appending text %s.", $data); |
571 // If the $current isn't the $root, do we need to do anything? | 581 // If the $current isn't the $root, do we need to do anything? |
572 } | 582 } |
573 | 583 |
574 public function parseError($msg, $line = 0, $col = 0) | 584 public function parseError($msg, $line = 0, $col = 0) |
575 { | 585 { |
576 $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg); | 586 $this->errors[] = sprintf('Line %d, Col %d: %s', $line, $col, $msg); |
577 } | 587 } |
578 | 588 |
579 public function getErrors() | 589 public function getErrors() |
580 { | 590 { |
581 return $this->errors; | 591 return $this->errors; |
588 } | 598 } |
589 | 599 |
590 public function processingInstruction($name, $data = null) | 600 public function processingInstruction($name, $data = null) |
591 { | 601 { |
592 // XXX: Ignore initial XML declaration, per the spec. | 602 // XXX: Ignore initial XML declaration, per the spec. |
593 if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) { | 603 if ($this->insertMode === static::IM_INITIAL && 'xml' === strtolower($name)) { |
594 return; | 604 return; |
595 } | 605 } |
596 | 606 |
597 // Important: The processor may modify the current DOM tree however | 607 // Important: The processor may modify the current DOM tree however it sees fit. |
598 // it sees fit. | 608 if ($this->processor instanceof InstructionProcessor) { |
599 if (isset($this->processor)) { | |
600 $res = $this->processor->process($this->current, $name, $data); | 609 $res = $this->processor->process($this->current, $name, $data); |
601 if (! empty($res)) { | 610 if (!empty($res)) { |
602 $this->current = $res; | 611 $this->current = $res; |
603 } | 612 } |
604 | 613 |
605 return; | 614 return; |
606 } | 615 } |
615 // UTILITIES | 624 // UTILITIES |
616 // ========================================================================== | 625 // ========================================================================== |
617 | 626 |
618 /** | 627 /** |
619 * Apply normalization rules to a tag name. | 628 * Apply normalization rules to a tag name. |
620 * | |
621 * See sections 2.9 and 8.1.2. | 629 * See sections 2.9 and 8.1.2. |
622 * | 630 * |
623 * @param string $name | 631 * @param string $tagName |
624 * The tag name. | 632 * |
625 * @return string The normalized tag name. | 633 * @return string The normalized tag name. |
626 */ | 634 */ |
627 protected function normalizeTagName($name) | 635 protected function normalizeTagName($tagName) |
628 { | 636 { |
629 /* | 637 /* |
630 * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); } | 638 * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); } |
631 */ | 639 */ |
632 return $name; | 640 return $tagName; |
633 } | 641 } |
634 | 642 |
635 protected function quirksTreeResolver($name) | 643 protected function quirksTreeResolver($name) |
636 { | 644 { |
637 throw new \Exception("Not implemented."); | 645 throw new \Exception('Not implemented.'); |
638 } | 646 } |
639 | 647 |
640 /** | 648 /** |
641 * Automatically climb the tree and close the closest node with the matching $tag. | 649 * Automatically climb the tree and close the closest node with the matching $tag. |
642 */ | 650 * |
643 protected function autoclose($tag) | 651 * @param string $tagName |
652 * | |
653 * @return bool | |
654 */ | |
655 protected function autoclose($tagName) | |
644 { | 656 { |
645 $working = $this->current; | 657 $working = $this->current; |
646 do { | 658 do { |
647 if ($working->nodeType != XML_ELEMENT_NODE) { | 659 if (XML_ELEMENT_NODE !== $working->nodeType) { |
648 return false; | 660 return false; |
649 } | 661 } |
650 if ($working->tagName == $tag) { | 662 if ($working->tagName === $tagName) { |
651 $this->current = $working->parentNode; | 663 $this->current = $working->parentNode; |
652 | 664 |
653 return true; | 665 return true; |
654 } | 666 } |
655 } while ($working = $working->parentNode); | 667 } while ($working = $working->parentNode); |
668 | |
656 return false; | 669 return false; |
657 } | 670 } |
658 | 671 |
659 /** | 672 /** |
660 * Checks if the given tagname is an ancestor of the present candidate. | 673 * Checks if the given tagname is an ancestor of the present candidate. |
661 * | 674 * |
662 * If $this->current or anything above $this->current matches the given tag | 675 * If $this->current or anything above $this->current matches the given tag |
663 * name, this returns true. | 676 * name, this returns true. |
664 */ | 677 * |
665 protected function isAncestor($tagname) | 678 * @param string $tagName |
679 * | |
680 * @return bool | |
681 */ | |
682 protected function isAncestor($tagName) | |
666 { | 683 { |
667 $candidate = $this->current; | 684 $candidate = $this->current; |
668 while ($candidate->nodeType === XML_ELEMENT_NODE) { | 685 while (XML_ELEMENT_NODE === $candidate->nodeType) { |
669 if ($candidate->tagName == $tagname) { | 686 if ($candidate->tagName === $tagName) { |
670 return true; | 687 return true; |
671 } | 688 } |
672 $candidate = $candidate->parentNode; | 689 $candidate = $candidate->parentNode; |
673 } | 690 } |
674 | 691 |
675 return false; | 692 return false; |
676 } | 693 } |
677 | 694 |
678 /** | 695 /** |
679 * Returns true if the immediate parent element is of the given tagname. | 696 * Returns true if the immediate parent element is of the given tagname. |
680 */ | 697 * |
681 protected function isParent($tagname) | 698 * @param string $tagName |
682 { | 699 * |
683 return $this->current->tagName == $tagname; | 700 * @return bool |
701 */ | |
702 protected function isParent($tagName) | |
703 { | |
704 return $this->current->tagName === $tagName; | |
684 } | 705 } |
685 } | 706 } |