comparison vendor/masterminds/html5/src/HTML5/Serializer/OutputRules.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 4c8ae668cc8c
children
comparison
equal deleted inserted replaced
16:c2387f117808 17:129ea1e6d783
4 * The rules for generating output in the serializer. 4 * The rules for generating output in the serializer.
5 * 5 *
6 * These output rules are likely to generate output similar to the document that 6 * These output rules are likely to generate output similar to the document that
7 * was parsed. It is not intended to output exactly the document that was parsed. 7 * was parsed. It is not intended to output exactly the document that was parsed.
8 */ 8 */
9
9 namespace Masterminds\HTML5\Serializer; 10 namespace Masterminds\HTML5\Serializer;
10 11
11 use Masterminds\HTML5\Elements; 12 use Masterminds\HTML5\Elements;
12 13
13 /** 14 /**
14 * Generate the output html5 based on element rules. 15 * Generate the output html5 based on element rules.
15 */ 16 */
16 class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface 17 class OutputRules implements RulesInterface
17 { 18 {
18 /** 19 /**
19 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0 20 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
20 */ 21 */
21 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; 22 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
22 23
23 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; 24 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
24 25
29 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; 30 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
30 31
31 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; 32 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
32 33
33 /** 34 /**
34 * Holds the HTML5 element names that causes a namespace switch 35 * Holds the HTML5 element names that causes a namespace switch.
35 * 36 *
36 * @var array 37 * @var array
37 */ 38 */
38 protected $implicitNamespaces = array( 39 protected $implicitNamespaces = array(
39 self::NAMESPACE_HTML, 40 self::NAMESPACE_HTML,
48 const IM_IN_SVG = 2; 49 const IM_IN_SVG = 2;
49 50
50 const IM_IN_MATHML = 3; 51 const IM_IN_MATHML = 3;
51 52
52 /** 53 /**
53 * Used as cache to detect if is available ENT_HTML5 54 * Used as cache to detect if is available ENT_HTML5.
54 * @var boolean 55 *
56 * @var bool
55 */ 57 */
56 private $hasHTML5 = false; 58 private $hasHTML5 = false;
57 59
58 protected $traverser; 60 protected $traverser;
59 61
167 $this->out = $output; 169 $this->out = $output;
168 170
169 // If HHVM, see https://github.com/facebook/hhvm/issues/2727 171 // If HHVM, see https://github.com/facebook/hhvm/issues/2727
170 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION'); 172 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
171 } 173 }
174
172 public function addRule(array $rule) 175 public function addRule(array $rule)
173 { 176 {
174 $this->nonBooleanAttributes[] = $rule; 177 $this->nonBooleanAttributes[] = $rule;
175 } 178 }
176 179
177 public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser) 180 public function setTraverser(Traverser $traverser)
178 { 181 {
179 $this->traverser = $traverser; 182 $this->traverser = $traverser;
180 183
181 return $this; 184 return $this;
182 } 185 }
209 $name = $ele->localName; 212 $name = $ele->localName;
210 } 213 }
211 214
212 // If we are in SVG or MathML there is special handling. 215 // If we are in SVG or MathML there is special handling.
213 // Using if/elseif instead of switch because it's faster in PHP. 216 // Using if/elseif instead of switch because it's faster in PHP.
214 if ($name == 'svg') { 217 if ('svg' == $name) {
215 $this->outputMode = static::IM_IN_SVG; 218 $this->outputMode = static::IM_IN_SVG;
216 $name = Elements::normalizeSvgElement($name); 219 $name = Elements::normalizeSvgElement($name);
217 } elseif ($name == 'math') { 220 } elseif ('math' == $name) {
218 $this->outputMode = static::IM_IN_MATHML; 221 $this->outputMode = static::IM_IN_MATHML;
219 } 222 }
220 223
221 $this->openTag($ele); 224 $this->openTag($ele);
222 if (Elements::isA($name, Elements::TEXT_RAW)) { 225 if (Elements::isA($name, Elements::TEXT_RAW)) {
232 if ($ele->hasChildNodes()) { 235 if ($ele->hasChildNodes()) {
233 $this->traverser->children($ele->childNodes); 236 $this->traverser->children($ele->childNodes);
234 } 237 }
235 238
236 // Close out the SVG or MathML special handling. 239 // Close out the SVG or MathML special handling.
237 if ($name == 'svg' || $name == 'math') { 240 if ('svg' == $name || 'math' == $name) {
238 $this->outputMode = static::IM_IN_HTML; 241 $this->outputMode = static::IM_IN_HTML;
239 } 242 }
240 } 243 }
241 244
242 // If not unary, add a closing tag. 245 // If not unary, add a closing tag.
243 if (! Elements::isA($name, Elements::VOID_TAG)) { 246 if (!Elements::isA($name, Elements::VOID_TAG)) {
244 $this->closeTag($ele); 247 $this->closeTag($ele);
245 } 248 }
246 } 249 }
247 250
248 /** 251 /**
249 * Write a text node. 252 * Write a text node.
250 * 253 *
251 * @param \DOMText $ele 254 * @param \DOMText $ele The text node to write.
252 * The text node to write.
253 */ 255 */
254 public function text($ele) 256 public function text($ele)
255 { 257 {
256 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { 258 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
257 $this->wr($ele->data); 259 $this->wr($ele->data);
260
258 return; 261 return;
259 } 262 }
260 263
261 // FIXME: This probably needs some flags set. 264 // FIXME: This probably needs some flags set.
262 $this->wr($this->enc($ele->data)); 265 $this->wr($this->enc($ele->data));
281 ->wr($ele->target) 284 ->wr($ele->target)
282 ->wr(' ') 285 ->wr(' ')
283 ->wr($ele->data) 286 ->wr($ele->data)
284 ->wr('?>'); 287 ->wr('?>');
285 } 288 }
286 /** 289
287 * Write the namespace attributes 290 /**
288 * 291 * Write the namespace attributes.
289 * 292 *
290 * @param \DOMNode $ele 293 * @param \DOMNode $ele The element being written.
291 * The element being written.
292 */ 294 */
293 protected function namespaceAttrs($ele) 295 protected function namespaceAttrs($ele)
294 { 296 {
295 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){ 297 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
296 $this->xpath = new \DOMXPath($ele->ownerDocument); 298 $this->xpath = new \DOMXPath($ele->ownerDocument);
297 } 299 }
298 300
299 foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) { 301 foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
300 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { 302 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
301 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); 303 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
302 } 304 }
303 } 305 }
304 } 306 }
307 * Write the opening tag. 309 * Write the opening tag.
308 * 310 *
309 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the 311 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
310 * qualified name (8.3). 312 * qualified name (8.3).
311 * 313 *
312 * @param \DOMNode $ele 314 * @param \DOMNode $ele The element being written.
313 * The element being written.
314 */ 315 */
315 protected function openTag($ele) 316 protected function openTag($ele)
316 { 317 {
317 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); 318 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
318
319 319
320 $this->attrs($ele); 320 $this->attrs($ele);
321 $this->namespaceAttrs($ele); 321 $this->namespaceAttrs($ele);
322
323 322
324 if ($this->outputMode == static::IM_IN_HTML) { 323 if ($this->outputMode == static::IM_IN_HTML) {
325 $this->wr('>'); 324 $this->wr('>');
326 } // If we are not in html mode we are in SVG, MathML, or XML embedded content. 325 } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
327 else { 326 else {
335 } 334 }
336 335
337 protected function attrs($ele) 336 protected function attrs($ele)
338 { 337 {
339 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. 338 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
340 if (! $ele->hasAttributes()) { 339 if (!$ele->hasAttributes()) {
341 return $this; 340 return $this;
342 } 341 }
343 342
344 // TODO: Currently, this always writes name="value", and does not do 343 // TODO: Currently, this always writes name="value", and does not do
345 // value-less attributes. 344 // value-less attributes.
346 $map = $ele->attributes; 345 $map = $ele->attributes;
347 $len = $map->length; 346 $len = $map->length;
348 for ($i = 0; $i < $len; ++ $i) { 347 for ($i = 0; $i < $len; ++$i) {
349 $node = $map->item($i); 348 $node = $map->item($i);
350 $val = $this->enc($node->value, true); 349 $val = $this->enc($node->value, true);
351 350
352 // XXX: The spec says that we need to ensure that anything in 351 // XXX: The spec says that we need to ensure that anything in
353 // the XML, XMLNS, or XLink NS's should use the canonical 352 // the XML, XMLNS, or XLink NS's should use the canonical
363 $name = Elements::normalizeMathMlAttribute($name); 362 $name = Elements::normalizeMathMlAttribute($name);
364 } 363 }
365 364
366 $this->wr(' ')->wr($name); 365 $this->wr(' ')->wr($name);
367 366
368 if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) { 367 if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
369 $this->wr('="')->wr($val)->wr('"'); 368 $this->wr('="')->wr($val)->wr('"');
370 } 369 }
371 } 370 }
372 } 371 }
373 372
374
375 protected function nonBooleanAttribute(\DOMAttr $attr) 373 protected function nonBooleanAttribute(\DOMAttr $attr)
376 { 374 {
377 $ele = $attr->ownerElement; 375 $ele = $attr->ownerElement;
378 foreach($this->nonBooleanAttributes as $rule){ 376 foreach ($this->nonBooleanAttributes as $rule) {
379 377 if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
380 if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
381 continue; 378 continue;
382 } 379 }
383 if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){ 380 if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
384 continue; 381 continue;
385 } 382 }
386 if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){ 383 if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
387 continue; 384 continue;
388 } 385 }
389 if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){ 386 if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
390 continue; 387 continue;
391 } 388 }
392 if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){ 389 if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
393 continue; 390 continue;
394 } 391 }
395 if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){ 392 if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
396 continue; 393 continue;
397 } 394 }
398 if(isset($rule['xpath'])){ 395 if (isset($rule['xpath'])) {
399
400 $xp = $this->getXPath($attr); 396 $xp = $this->getXPath($attr);
401 if(isset($rule['prefixes'])){ 397 if (isset($rule['prefixes'])) {
402 foreach($rule['prefixes'] as $nsPrefix => $ns){ 398 foreach ($rule['prefixes'] as $nsPrefix => $ns) {
403 $xp->registerNamespace($nsPrefix, $ns); 399 $xp->registerNamespace($nsPrefix, $ns);
404 } 400 }
405 } 401 }
406 if(!$xp->evaluate($rule['xpath'], $attr)){ 402 if (!$xp->evaluate($rule['xpath'], $attr)) {
407 continue; 403 continue;
408 } 404 }
409 } 405 }
410 406
411 return true; 407 return true;
412 } 408 }
413 409
414 return false; 410 return false;
415 } 411 }
416 412
417 private function getXPath(\DOMNode $node){ 413 private function getXPath(\DOMNode $node)
418 if(!$this->xpath){ 414 {
415 if (!$this->xpath) {
419 $this->xpath = new \DOMXPath($node->ownerDocument); 416 $this->xpath = new \DOMXPath($node->ownerDocument);
420 } 417 }
418
421 return $this->xpath; 419 return $this->xpath;
422 } 420 }
423 421
424 /** 422 /**
425 * Write the closing tag. 423 * Write the closing tag.
426 * 424 *
427 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the 425 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
428 * qualified name (8.3). 426 * qualified name (8.3).
429 * 427 *
430 * @param \DOMNode $ele 428 * @param \DOMNode $ele The element being written.
431 * The element being written.
432 */ 429 */
433 protected function closeTag($ele) 430 protected function closeTag($ele)
434 { 431 {
435 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { 432 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
436 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); 433 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
438 } 435 }
439 436
440 /** 437 /**
441 * Write to the output. 438 * Write to the output.
442 * 439 *
443 * @param string $text 440 * @param string $text The string to put into the output
444 * The string to put into the output. 441 *
445 * 442 * @return $this
446 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
447 */ 443 */
448 protected function wr($text) 444 protected function wr($text)
449 { 445 {
450 fwrite($this->out, $text); 446 fwrite($this->out, $text);
447
451 return $this; 448 return $this;
452 } 449 }
453 450
454 /** 451 /**
455 * Write a new line character. 452 * Write a new line character.
456 * 453 *
457 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining. 454 * @return $this
458 */ 455 */
459 protected function nl() 456 protected function nl()
460 { 457 {
461 fwrite($this->out, PHP_EOL); 458 fwrite($this->out, PHP_EOL);
459
462 return $this; 460 return $this;
463 } 461 }
464 462
465 /** 463 /**
466 * Encode text. 464 * Encode text.
482 * 480 *
483 * Note, PHP 5.4+ has better html5 encoding. 481 * Note, PHP 5.4+ has better html5 encoding.
484 * 482 *
485 * @todo Use the Entities class in php 5.3 to have html5 entities. 483 * @todo Use the Entities class in php 5.3 to have html5 entities.
486 * 484 *
487 * @param string $text 485 * @param string $text Text to encode.
488 * text to encode. 486 * @param bool $attribute True if we are encoding an attrubute, false otherwise.
489 * @param boolean $attribute
490 * True if we are encoding an attrubute, false otherwise
491 * 487 *
492 * @return string The encoded text. 488 * @return string The encoded text.
493 */ 489 */
494 protected function enc($text, $attribute = false) 490 protected function enc($text, $attribute = false)
495 { 491 {
496
497 // Escape the text rather than convert to named character references. 492 // Escape the text rather than convert to named character references.
498 if (! $this->encode) { 493 if (!$this->encode) {
499 return $this->escape($text, $attribute); 494 return $this->escape($text, $attribute);
500 } 495 }
501 496
502 // If we are in PHP 5.4+ we can use the native html5 entity functionality to 497 // If we are in PHP 5.4+ we can use the native html5 entity functionality to
503 // convert the named character references. 498 // convert the named character references.
505 if ($this->hasHTML5) { 500 if ($this->hasHTML5) {
506 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); 501 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
507 } // If a version earlier than 5.4 html5 entities are not entirely handled. 502 } // If a version earlier than 5.4 html5 entities are not entirely handled.
508 // This manually handles them. 503 // This manually handles them.
509 else { 504 else {
510 return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map); 505 return strtr($text, HTML5Entities::$map);
511 } 506 }
512 } 507 }
513 508
514 /** 509 /**
515 * Escape test. 510 * Escape test.
523 * &quot;, and when not in attribute mode the < and > should be converted to 518 * &quot;, and when not in attribute mode the < and > should be converted to
524 * &lt; and &gt;. 519 * &lt; and &gt;.
525 * 520 *
526 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString 521 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
527 * 522 *
528 * @param string $text 523 * @param string $text Text to escape.
529 * text to escape. 524 * @param bool $attribute True if we are escaping an attrubute, false otherwise.
530 * @param boolean $attribute
531 * True if we are escaping an attrubute, false otherwise
532 */ 525 */
533 protected function escape($text, $attribute = false) 526 protected function escape($text, $attribute = false)
534 { 527 {
535
536 // Not using htmlspecialchars because, while it does escaping, it doesn't 528 // Not using htmlspecialchars because, while it does escaping, it doesn't
537 // match the requirements of section 8.5. For example, it doesn't handle 529 // match the requirements of section 8.5. For example, it doesn't handle
538 // non-breaking spaces. 530 // non-breaking spaces.
539 if ($attribute) { 531 if ($attribute) {
540 $replace = array( 532 $replace = array(
541 '"' => '&quot;', 533 '"' => '&quot;',
542 '&' => '&amp;', 534 '&' => '&amp;',
543 "\xc2\xa0" => '&nbsp;' 535 "\xc2\xa0" => '&nbsp;',
544 ); 536 );
545 } else { 537 } else {
546 $replace = array( 538 $replace = array(
547 '<' => '&lt;', 539 '<' => '&lt;',
548 '>' => '&gt;', 540 '>' => '&gt;',
549 '&' => '&amp;', 541 '&' => '&amp;',
550 "\xc2\xa0" => '&nbsp;' 542 "\xc2\xa0" => '&nbsp;',
551 ); 543 );
552 } 544 }
553 545
554 return strtr($text, $replace); 546 return strtr($text, $replace);
555 } 547 }