Mercurial > hg > isophonics-drupal-site
comparison vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php @ 17:129ea1e6d783
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:21:36 +0000 |
parents | 4c8ae668cc8c |
children |
comparison
equal
deleted
inserted
replaced
16:c2387f117808 | 17:129ea1e6d783 |
---|---|
4 * The rules for generating output in the serializer. | 4 * The rules for generating output in the serializer. |
5 * | 5 * |
6 * These output rules are likely to generate output similar to the document that | 6 * These output rules are likely to generate output similar to the document that |
7 * was parsed. It is not intended to output exactly the document that was parsed. | 7 * was parsed. It is not intended to output exactly the document that was parsed. |
8 */ | 8 */ |
9 | |
9 namespace Masterminds\HTML5\Serializer; | 10 namespace Masterminds\HTML5\Serializer; |
10 | 11 |
11 use Masterminds\HTML5\Elements; | 12 use Masterminds\HTML5\Elements; |
12 | 13 |
13 /** | 14 /** |
14 * Generate the output html5 based on element rules. | 15 * Generate the output html5 based on element rules. |
15 */ | 16 */ |
16 class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface | 17 class OutputRules implements RulesInterface |
17 { | 18 { |
18 /** | 19 /** |
19 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 | 20 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. |
20 */ | 21 */ |
21 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; | 22 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; |
22 | 23 |
23 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; | 24 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; |
24 | 25 |
29 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; | 30 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; |
30 | 31 |
31 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; | 32 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; |
32 | 33 |
33 /** | 34 /** |
34 * Holds the HTML5 element names that causes a namespace switch | 35 * Holds the HTML5 element names that causes a namespace switch. |
35 * | 36 * |
36 * @var array | 37 * @var array |
37 */ | 38 */ |
38 protected $implicitNamespaces = array( | 39 protected $implicitNamespaces = array( |
39 self::NAMESPACE_HTML, | 40 self::NAMESPACE_HTML, |
48 const IM_IN_SVG = 2; | 49 const IM_IN_SVG = 2; |
49 | 50 |
50 const IM_IN_MATHML = 3; | 51 const IM_IN_MATHML = 3; |
51 | 52 |
52 /** | 53 /** |
53 * Used as cache to detect if is available ENT_HTML5 | 54 * Used as cache to detect if is available ENT_HTML5. |
54 * @var boolean | 55 * |
56 * @var bool | |
55 */ | 57 */ |
56 private $hasHTML5 = false; | 58 private $hasHTML5 = false; |
57 | 59 |
58 protected $traverser; | 60 protected $traverser; |
59 | 61 |
167 $this->out = $output; | 169 $this->out = $output; |
168 | 170 |
169 // If HHVM, see https://github.com/facebook/hhvm/issues/2727 | 171 // If HHVM, see https://github.com/facebook/hhvm/issues/2727 |
170 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); | 172 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); |
171 } | 173 } |
174 | |
172 public function addRule(array $rule) | 175 public function addRule(array $rule) |
173 { | 176 { |
174 $this->nonBooleanAttributes[] = $rule; | 177 $this->nonBooleanAttributes[] = $rule; |
175 } | 178 } |
176 | 179 |
177 public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) | 180 public function setTraverser(Traverser $traverser) |
178 { | 181 { |
179 $this->traverser = $traverser; | 182 $this->traverser = $traverser; |
180 | 183 |
181 return $this; | 184 return $this; |
182 } | 185 } |
209 $name = $ele->localName; | 212 $name = $ele->localName; |
210 } | 213 } |
211 | 214 |
212 // If we are in SVG or MathML there is special handling. | 215 // If we are in SVG or MathML there is special handling. |
213 // Using if/elseif instead of switch because it's faster in PHP. | 216 // Using if/elseif instead of switch because it's faster in PHP. |
214 if ($name == 'svg') { | 217 if ('svg' == $name) { |
215 $this->outputMode = static::IM_IN_SVG; | 218 $this->outputMode = static::IM_IN_SVG; |
216 $name = Elements::normalizeSvgElement($name); | 219 $name = Elements::normalizeSvgElement($name); |
217 } elseif ($name == 'math') { | 220 } elseif ('math' == $name) { |
218 $this->outputMode = static::IM_IN_MATHML; | 221 $this->outputMode = static::IM_IN_MATHML; |
219 } | 222 } |
220 | 223 |
221 $this->openTag($ele); | 224 $this->openTag($ele); |
222 if (Elements::isA($name, Elements::TEXT_RAW)) { | 225 if (Elements::isA($name, Elements::TEXT_RAW)) { |
232 if ($ele->hasChildNodes()) { | 235 if ($ele->hasChildNodes()) { |
233 $this->traverser->children($ele->childNodes); | 236 $this->traverser->children($ele->childNodes); |
234 } | 237 } |
235 | 238 |
236 // Close out the SVG or MathML special handling. | 239 // Close out the SVG or MathML special handling. |
237 if ($name == 'svg' || $name == 'math') { | 240 if ('svg' == $name || 'math' == $name) { |
238 $this->outputMode = static::IM_IN_HTML; | 241 $this->outputMode = static::IM_IN_HTML; |
239 } | 242 } |
240 } | 243 } |
241 | 244 |
242 // If not unary, add a closing tag. | 245 // If not unary, add a closing tag. |
243 if (! Elements::isA($name, Elements::VOID_TAG)) { | 246 if (!Elements::isA($name, Elements::VOID_TAG)) { |
244 $this->closeTag($ele); | 247 $this->closeTag($ele); |
245 } | 248 } |
246 } | 249 } |
247 | 250 |
248 /** | 251 /** |
249 * Write a text node. | 252 * Write a text node. |
250 * | 253 * |
251 * @param \DOMText $ele | 254 * @param \DOMText $ele The text node to write. |
252 * The text node to write. | |
253 */ | 255 */ |
254 public function text($ele) | 256 public function text($ele) |
255 { | 257 { |
256 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { | 258 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { |
257 $this->wr($ele->data); | 259 $this->wr($ele->data); |
260 | |
258 return; | 261 return; |
259 } | 262 } |
260 | 263 |
261 // FIXME: This probably needs some flags set. | 264 // FIXME: This probably needs some flags set. |
262 $this->wr($this->enc($ele->data)); | 265 $this->wr($this->enc($ele->data)); |
281 ->wr($ele->target) | 284 ->wr($ele->target) |
282 ->wr(' ') | 285 ->wr(' ') |
283 ->wr($ele->data) | 286 ->wr($ele->data) |
284 ->wr('?>'); | 287 ->wr('?>'); |
285 } | 288 } |
286 /** | 289 |
287 * Write the namespace attributes | 290 /** |
288 * | 291 * Write the namespace attributes. |
289 * | 292 * |
290 * @param \DOMNode $ele | 293 * @param \DOMNode $ele The element being written. |
291 * The element being written. | |
292 */ | 294 */ |
293 protected function namespaceAttrs($ele) | 295 protected function namespaceAttrs($ele) |
294 { | 296 { |
295 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){ | 297 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { |
296 $this->xpath = new \DOMXPath($ele->ownerDocument); | 298 $this->xpath = new \DOMXPath($ele->ownerDocument); |
297 } | 299 } |
298 | 300 |
299 foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) { | 301 foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { |
300 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { | 302 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { |
301 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); | 303 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); |
302 } | 304 } |
303 } | 305 } |
304 } | 306 } |
307 * Write the opening tag. | 309 * Write the opening tag. |
308 * | 310 * |
309 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the | 311 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
310 * qualified name (8.3). | 312 * qualified name (8.3). |
311 * | 313 * |
312 * @param \DOMNode $ele | 314 * @param \DOMNode $ele The element being written. |
313 * The element being written. | |
314 */ | 315 */ |
315 protected function openTag($ele) | 316 protected function openTag($ele) |
316 { | 317 { |
317 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); | 318 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); |
318 | |
319 | 319 |
320 $this->attrs($ele); | 320 $this->attrs($ele); |
321 $this->namespaceAttrs($ele); | 321 $this->namespaceAttrs($ele); |
322 | |
323 | 322 |
324 if ($this->outputMode == static::IM_IN_HTML) { | 323 if ($this->outputMode == static::IM_IN_HTML) { |
325 $this->wr('>'); | 324 $this->wr('>'); |
326 } // If we are not in html mode we are in SVG, MathML, or XML embedded content. | 325 } // If we are not in html mode we are in SVG, MathML, or XML embedded content. |
327 else { | 326 else { |
335 } | 334 } |
336 | 335 |
337 protected function attrs($ele) | 336 protected function attrs($ele) |
338 { | 337 { |
339 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. | 338 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. |
340 if (! $ele->hasAttributes()) { | 339 if (!$ele->hasAttributes()) { |
341 return $this; | 340 return $this; |
342 } | 341 } |
343 | 342 |
344 // TODO: Currently, this always writes name="value", and does not do | 343 // TODO: Currently, this always writes name="value", and does not do |
345 // value-less attributes. | 344 // value-less attributes. |
346 $map = $ele->attributes; | 345 $map = $ele->attributes; |
347 $len = $map->length; | 346 $len = $map->length; |
348 for ($i = 0; $i < $len; ++ $i) { | 347 for ($i = 0; $i < $len; ++$i) { |
349 $node = $map->item($i); | 348 $node = $map->item($i); |
350 $val = $this->enc($node->value, true); | 349 $val = $this->enc($node->value, true); |
351 | 350 |
352 // XXX: The spec says that we need to ensure that anything in | 351 // XXX: The spec says that we need to ensure that anything in |
353 // the XML, XMLNS, or XLink NS's should use the canonical | 352 // the XML, XMLNS, or XLink NS's should use the canonical |
363 $name = Elements::normalizeMathMlAttribute($name); | 362 $name = Elements::normalizeMathMlAttribute($name); |
364 } | 363 } |
365 | 364 |
366 $this->wr(' ')->wr($name); | 365 $this->wr(' ')->wr($name); |
367 | 366 |
368 if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) { | 367 if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { |
369 $this->wr('="')->wr($val)->wr('"'); | 368 $this->wr('="')->wr($val)->wr('"'); |
370 } | 369 } |
371 } | 370 } |
372 } | 371 } |
373 | 372 |
374 | |
375 protected function nonBooleanAttribute(\DOMAttr $attr) | 373 protected function nonBooleanAttribute(\DOMAttr $attr) |
376 { | 374 { |
377 $ele = $attr->ownerElement; | 375 $ele = $attr->ownerElement; |
378 foreach($this->nonBooleanAttributes as $rule){ | 376 foreach ($this->nonBooleanAttributes as $rule) { |
379 | 377 if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { |
380 if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){ | |
381 continue; | 378 continue; |
382 } | 379 } |
383 if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){ | 380 if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { |
384 continue; | 381 continue; |
385 } | 382 } |
386 if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){ | 383 if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { |
387 continue; | 384 continue; |
388 } | 385 } |
389 if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){ | 386 if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { |
390 continue; | 387 continue; |
391 } | 388 } |
392 if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){ | 389 if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { |
393 continue; | 390 continue; |
394 } | 391 } |
395 if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){ | 392 if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { |
396 continue; | 393 continue; |
397 } | 394 } |
398 if(isset($rule['xpath'])){ | 395 if (isset($rule['xpath'])) { |
399 | |
400 $xp = $this->getXPath($attr); | 396 $xp = $this->getXPath($attr); |
401 if(isset($rule['prefixes'])){ | 397 if (isset($rule['prefixes'])) { |
402 foreach($rule['prefixes'] as $nsPrefix => $ns){ | 398 foreach ($rule['prefixes'] as $nsPrefix => $ns) { |
403 $xp->registerNamespace($nsPrefix, $ns); | 399 $xp->registerNamespace($nsPrefix, $ns); |
404 } | 400 } |
405 } | 401 } |
406 if(!$xp->evaluate($rule['xpath'], $attr)){ | 402 if (!$xp->evaluate($rule['xpath'], $attr)) { |
407 continue; | 403 continue; |
408 } | 404 } |
409 } | 405 } |
410 | 406 |
411 return true; | 407 return true; |
412 } | 408 } |
413 | 409 |
414 return false; | 410 return false; |
415 } | 411 } |
416 | 412 |
417 private function getXPath(\DOMNode $node){ | 413 private function getXPath(\DOMNode $node) |
418 if(!$this->xpath){ | 414 { |
415 if (!$this->xpath) { | |
419 $this->xpath = new \DOMXPath($node->ownerDocument); | 416 $this->xpath = new \DOMXPath($node->ownerDocument); |
420 } | 417 } |
418 | |
421 return $this->xpath; | 419 return $this->xpath; |
422 } | 420 } |
423 | 421 |
424 /** | 422 /** |
425 * Write the closing tag. | 423 * Write the closing tag. |
426 * | 424 * |
427 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the | 425 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the |
428 * qualified name (8.3). | 426 * qualified name (8.3). |
429 * | 427 * |
430 * @param \DOMNode $ele | 428 * @param \DOMNode $ele The element being written. |
431 * The element being written. | |
432 */ | 429 */ |
433 protected function closeTag($ele) | 430 protected function closeTag($ele) |
434 { | 431 { |
435 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { | 432 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { |
436 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); | 433 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); |
438 } | 435 } |
439 | 436 |
440 /** | 437 /** |
441 * Write to the output. | 438 * Write to the output. |
442 * | 439 * |
443 * @param string $text | 440 * @param string $text The string to put into the output |
444 * The string to put into the output. | 441 * |
445 * | 442 * @return $this |
446 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. | |
447 */ | 443 */ |
448 protected function wr($text) | 444 protected function wr($text) |
449 { | 445 { |
450 fwrite($this->out, $text); | 446 fwrite($this->out, $text); |
447 | |
451 return $this; | 448 return $this; |
452 } | 449 } |
453 | 450 |
454 /** | 451 /** |
455 * Write a new line character. | 452 * Write a new line character. |
456 * | 453 * |
457 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. | 454 * @return $this |
458 */ | 455 */ |
459 protected function nl() | 456 protected function nl() |
460 { | 457 { |
461 fwrite($this->out, PHP_EOL); | 458 fwrite($this->out, PHP_EOL); |
459 | |
462 return $this; | 460 return $this; |
463 } | 461 } |
464 | 462 |
465 /** | 463 /** |
466 * Encode text. | 464 * Encode text. |
482 * | 480 * |
483 * Note, PHP 5.4+ has better html5 encoding. | 481 * Note, PHP 5.4+ has better html5 encoding. |
484 * | 482 * |
485 * @todo Use the Entities class in php 5.3 to have html5 entities. | 483 * @todo Use the Entities class in php 5.3 to have html5 entities. |
486 * | 484 * |
487 * @param string $text | 485 * @param string $text Text to encode. |
488 * text to encode. | 486 * @param bool $attribute True if we are encoding an attrubute, false otherwise. |
489 * @param boolean $attribute | |
490 * True if we are encoding an attrubute, false otherwise | |
491 * | 487 * |
492 * @return string The encoded text. | 488 * @return string The encoded text. |
493 */ | 489 */ |
494 protected function enc($text, $attribute = false) | 490 protected function enc($text, $attribute = false) |
495 { | 491 { |
496 | |
497 // Escape the text rather than convert to named character references. | 492 // Escape the text rather than convert to named character references. |
498 if (! $this->encode) { | 493 if (!$this->encode) { |
499 return $this->escape($text, $attribute); | 494 return $this->escape($text, $attribute); |
500 } | 495 } |
501 | 496 |
502 // If we are in PHP 5.4+ we can use the native html5 entity functionality to | 497 // If we are in PHP 5.4+ we can use the native html5 entity functionality to |
503 // convert the named character references. | 498 // convert the named character references. |
505 if ($this->hasHTML5) { | 500 if ($this->hasHTML5) { |
506 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); | 501 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); |
507 } // If a version earlier than 5.4 html5 entities are not entirely handled. | 502 } // If a version earlier than 5.4 html5 entities are not entirely handled. |
508 // This manually handles them. | 503 // This manually handles them. |
509 else { | 504 else { |
510 return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); | 505 return strtr($text, HTML5Entities::$map); |
511 } | 506 } |
512 } | 507 } |
513 | 508 |
514 /** | 509 /** |
515 * Escape test. | 510 * Escape test. |
523 * ", and when not in attribute mode the < and > should be converted to | 518 * ", and when not in attribute mode the < and > should be converted to |
524 * < and >. | 519 * < and >. |
525 * | 520 * |
526 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString | 521 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString |
527 * | 522 * |
528 * @param string $text | 523 * @param string $text Text to escape. |
529 * text to escape. | 524 * @param bool $attribute True if we are escaping an attrubute, false otherwise. |
530 * @param boolean $attribute | |
531 * True if we are escaping an attrubute, false otherwise | |
532 */ | 525 */ |
533 protected function escape($text, $attribute = false) | 526 protected function escape($text, $attribute = false) |
534 { | 527 { |
535 | |
536 // Not using htmlspecialchars because, while it does escaping, it doesn't | 528 // Not using htmlspecialchars because, while it does escaping, it doesn't |
537 // match the requirements of section 8.5. For example, it doesn't handle | 529 // match the requirements of section 8.5. For example, it doesn't handle |
538 // non-breaking spaces. | 530 // non-breaking spaces. |
539 if ($attribute) { | 531 if ($attribute) { |
540 $replace = array( | 532 $replace = array( |
541 '"' => '"', | 533 '"' => '"', |
542 '&' => '&', | 534 '&' => '&', |
543 "\xc2\xa0" => ' ' | 535 "\xc2\xa0" => ' ', |
544 ); | 536 ); |
545 } else { | 537 } else { |
546 $replace = array( | 538 $replace = array( |
547 '<' => '<', | 539 '<' => '<', |
548 '>' => '>', | 540 '>' => '>', |
549 '&' => '&', | 541 '&' => '&', |
550 "\xc2\xa0" => ' ' | 542 "\xc2\xa0" => ' ', |
551 ); | 543 ); |
552 } | 544 } |
553 | 545 |
554 return strtr($text, $replace); | 546 return strtr($text, $replace); |
555 } | 547 } |