Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * @file
|
Chris@0
|
4 * The rules for generating output in the serializer.
|
Chris@0
|
5 *
|
Chris@0
|
6 * These output rules are likely to generate output similar to the document that
|
Chris@0
|
7 * was parsed. It is not intended to output exactly the document that was parsed.
|
Chris@0
|
8 */
|
Chris@0
|
9 namespace Masterminds\HTML5\Serializer;
|
Chris@0
|
10
|
Chris@0
|
11 use Masterminds\HTML5\Elements;
|
Chris@0
|
12
|
Chris@0
|
13 /**
|
Chris@0
|
14 * Generate the output html5 based on element rules.
|
Chris@0
|
15 */
|
Chris@0
|
16 class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
|
Chris@0
|
17 {
|
Chris@0
|
18 /**
|
Chris@0
|
19 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
|
Chris@0
|
20 */
|
Chris@0
|
21 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
|
Chris@0
|
22
|
Chris@0
|
23 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
Chris@0
|
24
|
Chris@0
|
25 const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
|
Chris@0
|
26
|
Chris@0
|
27 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
|
Chris@0
|
28
|
Chris@0
|
29 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
|
Chris@0
|
30
|
Chris@0
|
31 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
|
Chris@0
|
32
|
Chris@0
|
33 /**
|
Chris@0
|
34 * Holds the HTML5 element names that causes a namespace switch
|
Chris@0
|
35 *
|
Chris@0
|
36 * @var array
|
Chris@0
|
37 */
|
Chris@0
|
38 protected $implicitNamespaces = array(
|
Chris@0
|
39 self::NAMESPACE_HTML,
|
Chris@0
|
40 self::NAMESPACE_SVG,
|
Chris@0
|
41 self::NAMESPACE_MATHML,
|
Chris@0
|
42 self::NAMESPACE_XML,
|
Chris@0
|
43 self::NAMESPACE_XMLNS,
|
Chris@0
|
44 );
|
Chris@0
|
45
|
Chris@0
|
46 const IM_IN_HTML = 1;
|
Chris@0
|
47
|
Chris@0
|
48 const IM_IN_SVG = 2;
|
Chris@0
|
49
|
Chris@0
|
50 const IM_IN_MATHML = 3;
|
Chris@0
|
51
|
Chris@0
|
52 /**
|
Chris@0
|
53 * Used as cache to detect if is available ENT_HTML5
|
Chris@0
|
54 * @var boolean
|
Chris@0
|
55 */
|
Chris@0
|
56 private $hasHTML5 = false;
|
Chris@0
|
57
|
Chris@0
|
58 protected $traverser;
|
Chris@0
|
59
|
Chris@0
|
60 protected $encode = false;
|
Chris@0
|
61
|
Chris@0
|
62 protected $out;
|
Chris@0
|
63
|
Chris@0
|
64 protected $outputMode;
|
Chris@0
|
65
|
Chris@0
|
66 private $xpath;
|
Chris@0
|
67
|
Chris@0
|
68 protected $nonBooleanAttributes = array(
|
Chris@0
|
69 /*
|
Chris@0
|
70 array(
|
Chris@0
|
71 'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
|
Chris@0
|
72 'attrNamespace'=>'http://www.w3.org/1999/xhtml',
|
Chris@0
|
73
|
Chris@0
|
74 'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
|
Chris@0
|
75 'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
|
Chris@0
|
76 ),
|
Chris@0
|
77 */
|
Chris@0
|
78 array(
|
Chris@0
|
79 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
|
Chris@0
|
80 'attrName' => array('href',
|
Chris@0
|
81 'hreflang',
|
Chris@0
|
82 'http-equiv',
|
Chris@0
|
83 'icon',
|
Chris@0
|
84 'id',
|
Chris@0
|
85 'keytype',
|
Chris@0
|
86 'kind',
|
Chris@0
|
87 'label',
|
Chris@0
|
88 'lang',
|
Chris@0
|
89 'language',
|
Chris@0
|
90 'list',
|
Chris@0
|
91 'maxlength',
|
Chris@0
|
92 'media',
|
Chris@0
|
93 'method',
|
Chris@0
|
94 'name',
|
Chris@0
|
95 'placeholder',
|
Chris@0
|
96 'rel',
|
Chris@0
|
97 'rows',
|
Chris@0
|
98 'rowspan',
|
Chris@0
|
99 'sandbox',
|
Chris@0
|
100 'spellcheck',
|
Chris@0
|
101 'scope',
|
Chris@0
|
102 'seamless',
|
Chris@0
|
103 'shape',
|
Chris@0
|
104 'size',
|
Chris@0
|
105 'sizes',
|
Chris@0
|
106 'span',
|
Chris@0
|
107 'src',
|
Chris@0
|
108 'srcdoc',
|
Chris@0
|
109 'srclang',
|
Chris@0
|
110 'srcset',
|
Chris@0
|
111 'start',
|
Chris@0
|
112 'step',
|
Chris@0
|
113 'style',
|
Chris@0
|
114 'summary',
|
Chris@0
|
115 'tabindex',
|
Chris@0
|
116 'target',
|
Chris@0
|
117 'title',
|
Chris@0
|
118 'type',
|
Chris@0
|
119 'value',
|
Chris@0
|
120 'width',
|
Chris@0
|
121 'border',
|
Chris@0
|
122 'charset',
|
Chris@0
|
123 'cite',
|
Chris@0
|
124 'class',
|
Chris@0
|
125 'code',
|
Chris@0
|
126 'codebase',
|
Chris@0
|
127 'color',
|
Chris@0
|
128 'cols',
|
Chris@0
|
129 'colspan',
|
Chris@0
|
130 'content',
|
Chris@0
|
131 'coords',
|
Chris@0
|
132 'data',
|
Chris@0
|
133 'datetime',
|
Chris@0
|
134 'default',
|
Chris@0
|
135 'dir',
|
Chris@0
|
136 'dirname',
|
Chris@0
|
137 'enctype',
|
Chris@0
|
138 'for',
|
Chris@0
|
139 'form',
|
Chris@0
|
140 'formaction',
|
Chris@0
|
141 'headers',
|
Chris@0
|
142 'height',
|
Chris@0
|
143 'accept',
|
Chris@0
|
144 'accept-charset',
|
Chris@0
|
145 'accesskey',
|
Chris@0
|
146 'action',
|
Chris@0
|
147 'align',
|
Chris@0
|
148 'alt',
|
Chris@0
|
149 'bgcolor',
|
Chris@0
|
150 ),
|
Chris@0
|
151 ),
|
Chris@0
|
152 array(
|
Chris@0
|
153 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
|
Chris@0
|
154 'xpath' => 'starts-with(local-name(), \'data-\')',
|
Chris@0
|
155 ),
|
Chris@0
|
156 );
|
Chris@0
|
157
|
Chris@0
|
158 const DOCTYPE = '<!DOCTYPE html>';
|
Chris@0
|
159
|
Chris@0
|
160 public function __construct($output, $options = array())
|
Chris@0
|
161 {
|
Chris@0
|
162 if (isset($options['encode_entities'])) {
|
Chris@0
|
163 $this->encode = $options['encode_entities'];
|
Chris@0
|
164 }
|
Chris@0
|
165
|
Chris@0
|
166 $this->outputMode = static::IM_IN_HTML;
|
Chris@0
|
167 $this->out = $output;
|
Chris@0
|
168
|
Chris@0
|
169 // If HHVM, see https://github.com/facebook/hhvm/issues/2727
|
Chris@0
|
170 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
|
Chris@0
|
171 }
|
Chris@0
|
172 public function addRule(array $rule)
|
Chris@0
|
173 {
|
Chris@0
|
174 $this->nonBooleanAttributes[] = $rule;
|
Chris@0
|
175 }
|
Chris@0
|
176
|
Chris@0
|
177 public function setTraverser(\Masterminds\HTML5\Serializer\Traverser $traverser)
|
Chris@0
|
178 {
|
Chris@0
|
179 $this->traverser = $traverser;
|
Chris@0
|
180
|
Chris@0
|
181 return $this;
|
Chris@0
|
182 }
|
Chris@0
|
183
|
Chris@0
|
184 public function document($dom)
|
Chris@0
|
185 {
|
Chris@0
|
186 $this->doctype();
|
Chris@0
|
187 if ($dom->documentElement) {
|
Chris@0
|
188 foreach ($dom->childNodes as $node) {
|
Chris@0
|
189 $this->traverser->node($node);
|
Chris@0
|
190 }
|
Chris@0
|
191 $this->nl();
|
Chris@0
|
192 }
|
Chris@0
|
193 }
|
Chris@0
|
194
|
Chris@0
|
195 protected function doctype()
|
Chris@0
|
196 {
|
Chris@0
|
197 $this->wr(static::DOCTYPE);
|
Chris@0
|
198 $this->nl();
|
Chris@0
|
199 }
|
Chris@0
|
200
|
Chris@0
|
201 public function element($ele)
|
Chris@0
|
202 {
|
Chris@0
|
203 $name = $ele->tagName;
|
Chris@0
|
204
|
Chris@0
|
205 // Per spec:
|
Chris@0
|
206 // If the element has a declared namespace in the HTML, MathML or
|
Chris@0
|
207 // SVG namespaces, we use the lname instead of the tagName.
|
Chris@0
|
208 if ($this->traverser->isLocalElement($ele)) {
|
Chris@0
|
209 $name = $ele->localName;
|
Chris@0
|
210 }
|
Chris@0
|
211
|
Chris@0
|
212 // If we are in SVG or MathML there is special handling.
|
Chris@0
|
213 // Using if/elseif instead of switch because it's faster in PHP.
|
Chris@0
|
214 if ($name == 'svg') {
|
Chris@0
|
215 $this->outputMode = static::IM_IN_SVG;
|
Chris@0
|
216 $name = Elements::normalizeSvgElement($name);
|
Chris@0
|
217 } elseif ($name == 'math') {
|
Chris@0
|
218 $this->outputMode = static::IM_IN_MATHML;
|
Chris@0
|
219 }
|
Chris@0
|
220
|
Chris@0
|
221 $this->openTag($ele);
|
Chris@0
|
222 if (Elements::isA($name, Elements::TEXT_RAW)) {
|
Chris@0
|
223 foreach ($ele->childNodes as $child) {
|
Chris@0
|
224 if ($child instanceof \DOMCharacterData) {
|
Chris@0
|
225 $this->wr($child->data);
|
Chris@0
|
226 } elseif ($child instanceof \DOMElement) {
|
Chris@0
|
227 $this->element($child);
|
Chris@0
|
228 }
|
Chris@0
|
229 }
|
Chris@0
|
230 } else {
|
Chris@0
|
231 // Handle children.
|
Chris@0
|
232 if ($ele->hasChildNodes()) {
|
Chris@0
|
233 $this->traverser->children($ele->childNodes);
|
Chris@0
|
234 }
|
Chris@0
|
235
|
Chris@0
|
236 // Close out the SVG or MathML special handling.
|
Chris@0
|
237 if ($name == 'svg' || $name == 'math') {
|
Chris@0
|
238 $this->outputMode = static::IM_IN_HTML;
|
Chris@0
|
239 }
|
Chris@0
|
240 }
|
Chris@0
|
241
|
Chris@0
|
242 // If not unary, add a closing tag.
|
Chris@0
|
243 if (! Elements::isA($name, Elements::VOID_TAG)) {
|
Chris@0
|
244 $this->closeTag($ele);
|
Chris@0
|
245 }
|
Chris@0
|
246 }
|
Chris@0
|
247
|
Chris@0
|
248 /**
|
Chris@0
|
249 * Write a text node.
|
Chris@0
|
250 *
|
Chris@0
|
251 * @param \DOMText $ele
|
Chris@0
|
252 * The text node to write.
|
Chris@0
|
253 */
|
Chris@0
|
254 public function text($ele)
|
Chris@0
|
255 {
|
Chris@0
|
256 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
|
Chris@0
|
257 $this->wr($ele->data);
|
Chris@0
|
258 return;
|
Chris@0
|
259 }
|
Chris@0
|
260
|
Chris@0
|
261 // FIXME: This probably needs some flags set.
|
Chris@0
|
262 $this->wr($this->enc($ele->data));
|
Chris@0
|
263 }
|
Chris@0
|
264
|
Chris@0
|
265 public function cdata($ele)
|
Chris@0
|
266 {
|
Chris@0
|
267 // This encodes CDATA.
|
Chris@0
|
268 $this->wr($ele->ownerDocument->saveXML($ele));
|
Chris@0
|
269 }
|
Chris@0
|
270
|
Chris@0
|
271 public function comment($ele)
|
Chris@0
|
272 {
|
Chris@0
|
273 // These produce identical output.
|
Chris@0
|
274 // $this->wr('<!--')->wr($ele->data)->wr('-->');
|
Chris@0
|
275 $this->wr($ele->ownerDocument->saveXML($ele));
|
Chris@0
|
276 }
|
Chris@0
|
277
|
Chris@0
|
278 public function processorInstruction($ele)
|
Chris@0
|
279 {
|
Chris@0
|
280 $this->wr('<?')
|
Chris@0
|
281 ->wr($ele->target)
|
Chris@0
|
282 ->wr(' ')
|
Chris@0
|
283 ->wr($ele->data)
|
Chris@0
|
284 ->wr('?>');
|
Chris@0
|
285 }
|
Chris@0
|
286 /**
|
Chris@0
|
287 * Write the namespace attributes
|
Chris@0
|
288 *
|
Chris@0
|
289 *
|
Chris@0
|
290 * @param \DOMNode $ele
|
Chris@0
|
291 * The element being written.
|
Chris@0
|
292 */
|
Chris@0
|
293 protected function namespaceAttrs($ele)
|
Chris@0
|
294 {
|
Chris@0
|
295 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument){
|
Chris@0
|
296 $this->xpath = new \DOMXPath($ele->ownerDocument);
|
Chris@0
|
297 }
|
Chris@0
|
298
|
Chris@0
|
299 foreach( $this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele ) as $nsNode ) {
|
Chris@0
|
300 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
|
Chris@0
|
301 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
|
Chris@0
|
302 }
|
Chris@0
|
303 }
|
Chris@0
|
304 }
|
Chris@0
|
305
|
Chris@0
|
306 /**
|
Chris@0
|
307 * Write the opening tag.
|
Chris@0
|
308 *
|
Chris@0
|
309 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
|
Chris@0
|
310 * qualified name (8.3).
|
Chris@0
|
311 *
|
Chris@0
|
312 * @param \DOMNode $ele
|
Chris@0
|
313 * The element being written.
|
Chris@0
|
314 */
|
Chris@0
|
315 protected function openTag($ele)
|
Chris@0
|
316 {
|
Chris@0
|
317 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
|
Chris@0
|
318
|
Chris@0
|
319
|
Chris@0
|
320 $this->attrs($ele);
|
Chris@0
|
321 $this->namespaceAttrs($ele);
|
Chris@0
|
322
|
Chris@0
|
323
|
Chris@0
|
324 if ($this->outputMode == static::IM_IN_HTML) {
|
Chris@0
|
325 $this->wr('>');
|
Chris@0
|
326 } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
|
Chris@0
|
327 else {
|
Chris@0
|
328 if ($ele->hasChildNodes()) {
|
Chris@0
|
329 $this->wr('>');
|
Chris@0
|
330 } // If there are no children this is self closing.
|
Chris@0
|
331 else {
|
Chris@0
|
332 $this->wr(' />');
|
Chris@0
|
333 }
|
Chris@0
|
334 }
|
Chris@0
|
335 }
|
Chris@0
|
336
|
Chris@0
|
337 protected function attrs($ele)
|
Chris@0
|
338 {
|
Chris@0
|
339 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
|
Chris@0
|
340 if (! $ele->hasAttributes()) {
|
Chris@0
|
341 return $this;
|
Chris@0
|
342 }
|
Chris@0
|
343
|
Chris@0
|
344 // TODO: Currently, this always writes name="value", and does not do
|
Chris@0
|
345 // value-less attributes.
|
Chris@0
|
346 $map = $ele->attributes;
|
Chris@0
|
347 $len = $map->length;
|
Chris@0
|
348 for ($i = 0; $i < $len; ++ $i) {
|
Chris@0
|
349 $node = $map->item($i);
|
Chris@0
|
350 $val = $this->enc($node->value, true);
|
Chris@0
|
351
|
Chris@0
|
352 // XXX: The spec says that we need to ensure that anything in
|
Chris@0
|
353 // the XML, XMLNS, or XLink NS's should use the canonical
|
Chris@0
|
354 // prefix. It seems that DOM does this for us already, but there
|
Chris@0
|
355 // may be exceptions.
|
Chris@0
|
356 $name = $node->nodeName;
|
Chris@0
|
357
|
Chris@0
|
358 // Special handling for attributes in SVG and MathML.
|
Chris@0
|
359 // Using if/elseif instead of switch because it's faster in PHP.
|
Chris@0
|
360 if ($this->outputMode == static::IM_IN_SVG) {
|
Chris@0
|
361 $name = Elements::normalizeSvgAttribute($name);
|
Chris@0
|
362 } elseif ($this->outputMode == static::IM_IN_MATHML) {
|
Chris@0
|
363 $name = Elements::normalizeMathMlAttribute($name);
|
Chris@0
|
364 }
|
Chris@0
|
365
|
Chris@0
|
366 $this->wr(' ')->wr($name);
|
Chris@0
|
367
|
Chris@0
|
368 if ((isset($val) && $val !== '') || $this->nonBooleanAttribute($node)) {
|
Chris@0
|
369 $this->wr('="')->wr($val)->wr('"');
|
Chris@0
|
370 }
|
Chris@0
|
371 }
|
Chris@0
|
372 }
|
Chris@0
|
373
|
Chris@0
|
374
|
Chris@0
|
375 protected function nonBooleanAttribute(\DOMAttr $attr)
|
Chris@0
|
376 {
|
Chris@0
|
377 $ele = $attr->ownerElement;
|
Chris@0
|
378 foreach($this->nonBooleanAttributes as $rule){
|
Chris@0
|
379
|
Chris@0
|
380 if(isset($rule['nodeNamespace']) && $rule['nodeNamespace']!==$ele->namespaceURI){
|
Chris@0
|
381 continue;
|
Chris@0
|
382 }
|
Chris@0
|
383 if(isset($rule['attNamespace']) && $rule['attNamespace']!==$attr->namespaceURI){
|
Chris@0
|
384 continue;
|
Chris@0
|
385 }
|
Chris@0
|
386 if(isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName']!==$ele->localName){
|
Chris@0
|
387 continue;
|
Chris@0
|
388 }
|
Chris@0
|
389 if(isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)){
|
Chris@0
|
390 continue;
|
Chris@0
|
391 }
|
Chris@0
|
392 if(isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName']!==$attr->localName){
|
Chris@0
|
393 continue;
|
Chris@0
|
394 }
|
Chris@0
|
395 if(isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)){
|
Chris@0
|
396 continue;
|
Chris@0
|
397 }
|
Chris@0
|
398 if(isset($rule['xpath'])){
|
Chris@0
|
399
|
Chris@0
|
400 $xp = $this->getXPath($attr);
|
Chris@0
|
401 if(isset($rule['prefixes'])){
|
Chris@0
|
402 foreach($rule['prefixes'] as $nsPrefix => $ns){
|
Chris@0
|
403 $xp->registerNamespace($nsPrefix, $ns);
|
Chris@0
|
404 }
|
Chris@0
|
405 }
|
Chris@0
|
406 if(!$xp->evaluate($rule['xpath'], $attr)){
|
Chris@0
|
407 continue;
|
Chris@0
|
408 }
|
Chris@0
|
409 }
|
Chris@0
|
410
|
Chris@0
|
411 return true;
|
Chris@0
|
412 }
|
Chris@0
|
413
|
Chris@0
|
414 return false;
|
Chris@0
|
415 }
|
Chris@0
|
416
|
Chris@0
|
417 private function getXPath(\DOMNode $node){
|
Chris@0
|
418 if(!$this->xpath){
|
Chris@0
|
419 $this->xpath = new \DOMXPath($node->ownerDocument);
|
Chris@0
|
420 }
|
Chris@0
|
421 return $this->xpath;
|
Chris@0
|
422 }
|
Chris@0
|
423
|
Chris@0
|
424 /**
|
Chris@0
|
425 * Write the closing tag.
|
Chris@0
|
426 *
|
Chris@0
|
427 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
|
Chris@0
|
428 * qualified name (8.3).
|
Chris@0
|
429 *
|
Chris@0
|
430 * @param \DOMNode $ele
|
Chris@0
|
431 * The element being written.
|
Chris@0
|
432 */
|
Chris@0
|
433 protected function closeTag($ele)
|
Chris@0
|
434 {
|
Chris@0
|
435 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
|
Chris@0
|
436 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
|
Chris@0
|
437 }
|
Chris@0
|
438 }
|
Chris@0
|
439
|
Chris@0
|
440 /**
|
Chris@0
|
441 * Write to the output.
|
Chris@0
|
442 *
|
Chris@0
|
443 * @param string $text
|
Chris@0
|
444 * The string to put into the output.
|
Chris@0
|
445 *
|
Chris@0
|
446 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
|
Chris@0
|
447 */
|
Chris@0
|
448 protected function wr($text)
|
Chris@0
|
449 {
|
Chris@0
|
450 fwrite($this->out, $text);
|
Chris@0
|
451 return $this;
|
Chris@0
|
452 }
|
Chris@0
|
453
|
Chris@0
|
454 /**
|
Chris@0
|
455 * Write a new line character.
|
Chris@0
|
456 *
|
Chris@0
|
457 * @return \Masterminds\HTML5\Serializer\Traverser $this so it can be used in chaining.
|
Chris@0
|
458 */
|
Chris@0
|
459 protected function nl()
|
Chris@0
|
460 {
|
Chris@0
|
461 fwrite($this->out, PHP_EOL);
|
Chris@0
|
462 return $this;
|
Chris@0
|
463 }
|
Chris@0
|
464
|
Chris@0
|
465 /**
|
Chris@0
|
466 * Encode text.
|
Chris@0
|
467 *
|
Chris@0
|
468 * When encode is set to false, the default value, the text passed in is
|
Chris@0
|
469 * escaped per section 8.3 of the html5 spec. For details on how text is
|
Chris@0
|
470 * escaped see the escape() method.
|
Chris@0
|
471 *
|
Chris@0
|
472 * When encoding is set to true the text is converted to named character
|
Chris@0
|
473 * references where appropriate. Section 8.1.4 Character references of the
|
Chris@0
|
474 * html5 spec refers to using named character references. This is useful for
|
Chris@0
|
475 * characters that can't otherwise legally be used in the text.
|
Chris@0
|
476 *
|
Chris@0
|
477 * The named character references are listed in section 8.5.
|
Chris@0
|
478 *
|
Chris@0
|
479 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
|
Chris@0
|
480 * This includes such characters as +.# and many other common ones. By default
|
Chris@0
|
481 * encoding here will just escape &'<>".
|
Chris@0
|
482 *
|
Chris@0
|
483 * Note, PHP 5.4+ has better html5 encoding.
|
Chris@0
|
484 *
|
Chris@0
|
485 * @todo Use the Entities class in php 5.3 to have html5 entities.
|
Chris@0
|
486 *
|
Chris@0
|
487 * @param string $text
|
Chris@0
|
488 * text to encode.
|
Chris@0
|
489 * @param boolean $attribute
|
Chris@0
|
490 * True if we are encoding an attrubute, false otherwise
|
Chris@0
|
491 *
|
Chris@0
|
492 * @return string The encoded text.
|
Chris@0
|
493 */
|
Chris@0
|
494 protected function enc($text, $attribute = false)
|
Chris@0
|
495 {
|
Chris@0
|
496
|
Chris@0
|
497 // Escape the text rather than convert to named character references.
|
Chris@0
|
498 if (! $this->encode) {
|
Chris@0
|
499 return $this->escape($text, $attribute);
|
Chris@0
|
500 }
|
Chris@0
|
501
|
Chris@0
|
502 // If we are in PHP 5.4+ we can use the native html5 entity functionality to
|
Chris@0
|
503 // convert the named character references.
|
Chris@0
|
504
|
Chris@0
|
505 if ($this->hasHTML5) {
|
Chris@0
|
506 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
|
Chris@0
|
507 } // If a version earlier than 5.4 html5 entities are not entirely handled.
|
Chris@0
|
508 // This manually handles them.
|
Chris@0
|
509 else {
|
Chris@0
|
510 return strtr($text, \Masterminds\HTML5\Serializer\HTML5Entities::$map);
|
Chris@0
|
511 }
|
Chris@0
|
512 }
|
Chris@0
|
513
|
Chris@0
|
514 /**
|
Chris@0
|
515 * Escape test.
|
Chris@0
|
516 *
|
Chris@0
|
517 * According to the html5 spec section 8.3 Serializing HTML fragments, text
|
Chris@0
|
518 * within tags that are not style, script, xmp, iframe, noembed, and noframes
|
Chris@0
|
519 * need to be properly escaped.
|
Chris@0
|
520 *
|
Chris@0
|
521 * The & should be converted to &, no breaking space unicode characters
|
Chris@0
|
522 * converted to , when in attribute mode the " should be converted to
|
Chris@0
|
523 * ", and when not in attribute mode the < and > should be converted to
|
Chris@0
|
524 * < and >.
|
Chris@0
|
525 *
|
Chris@0
|
526 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
|
Chris@0
|
527 *
|
Chris@0
|
528 * @param string $text
|
Chris@0
|
529 * text to escape.
|
Chris@0
|
530 * @param boolean $attribute
|
Chris@0
|
531 * True if we are escaping an attrubute, false otherwise
|
Chris@0
|
532 */
|
Chris@0
|
533 protected function escape($text, $attribute = false)
|
Chris@0
|
534 {
|
Chris@0
|
535
|
Chris@0
|
536 // Not using htmlspecialchars because, while it does escaping, it doesn't
|
Chris@0
|
537 // match the requirements of section 8.5. For example, it doesn't handle
|
Chris@0
|
538 // non-breaking spaces.
|
Chris@0
|
539 if ($attribute) {
|
Chris@0
|
540 $replace = array(
|
Chris@0
|
541 '"' => '"',
|
Chris@0
|
542 '&' => '&',
|
Chris@0
|
543 "\xc2\xa0" => ' '
|
Chris@0
|
544 );
|
Chris@0
|
545 } else {
|
Chris@0
|
546 $replace = array(
|
Chris@0
|
547 '<' => '<',
|
Chris@0
|
548 '>' => '>',
|
Chris@0
|
549 '&' => '&',
|
Chris@0
|
550 "\xc2\xa0" => ' '
|
Chris@0
|
551 );
|
Chris@0
|
552 }
|
Chris@0
|
553
|
Chris@0
|
554 return strtr($text, $replace);
|
Chris@0
|
555 }
|
Chris@0
|
556 }
|