Chris@0
|
1 <?php
|
Chris@0
|
2 /**
|
Chris@0
|
3 * @file
|
Chris@0
|
4 * The rules for generating output in the serializer.
|
Chris@0
|
5 *
|
Chris@0
|
6 * These output rules are likely to generate output similar to the document that
|
Chris@0
|
7 * was parsed. It is not intended to output exactly the document that was parsed.
|
Chris@0
|
8 */
|
Chris@17
|
9
|
Chris@0
|
10 namespace Masterminds\HTML5\Serializer;
|
Chris@0
|
11
|
Chris@0
|
12 use Masterminds\HTML5\Elements;
|
Chris@0
|
13
|
Chris@0
|
14 /**
|
Chris@0
|
15 * Generate the output html5 based on element rules.
|
Chris@0
|
16 */
|
Chris@17
|
17 class OutputRules implements RulesInterface
|
Chris@0
|
18 {
|
Chris@0
|
19 /**
|
Chris@17
|
20 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
|
Chris@0
|
21 */
|
Chris@0
|
22 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
|
Chris@0
|
23
|
Chris@0
|
24 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
Chris@0
|
25
|
Chris@0
|
26 const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
|
Chris@0
|
27
|
Chris@0
|
28 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
|
Chris@0
|
29
|
Chris@0
|
30 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
|
Chris@0
|
31
|
Chris@0
|
32 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
|
Chris@0
|
33
|
Chris@0
|
34 /**
|
Chris@17
|
35 * Holds the HTML5 element names that causes a namespace switch.
|
Chris@0
|
36 *
|
Chris@0
|
37 * @var array
|
Chris@0
|
38 */
|
Chris@0
|
39 protected $implicitNamespaces = array(
|
Chris@0
|
40 self::NAMESPACE_HTML,
|
Chris@0
|
41 self::NAMESPACE_SVG,
|
Chris@0
|
42 self::NAMESPACE_MATHML,
|
Chris@0
|
43 self::NAMESPACE_XML,
|
Chris@0
|
44 self::NAMESPACE_XMLNS,
|
Chris@0
|
45 );
|
Chris@0
|
46
|
Chris@0
|
47 const IM_IN_HTML = 1;
|
Chris@0
|
48
|
Chris@0
|
49 const IM_IN_SVG = 2;
|
Chris@0
|
50
|
Chris@0
|
51 const IM_IN_MATHML = 3;
|
Chris@0
|
52
|
Chris@0
|
53 /**
|
Chris@17
|
54 * Used as cache to detect if is available ENT_HTML5.
|
Chris@17
|
55 *
|
Chris@17
|
56 * @var bool
|
Chris@0
|
57 */
|
Chris@0
|
58 private $hasHTML5 = false;
|
Chris@0
|
59
|
Chris@0
|
60 protected $traverser;
|
Chris@0
|
61
|
Chris@0
|
62 protected $encode = false;
|
Chris@0
|
63
|
Chris@0
|
64 protected $out;
|
Chris@0
|
65
|
Chris@0
|
66 protected $outputMode;
|
Chris@0
|
67
|
Chris@0
|
68 private $xpath;
|
Chris@0
|
69
|
Chris@0
|
70 protected $nonBooleanAttributes = array(
|
Chris@0
|
71 /*
|
Chris@0
|
72 array(
|
Chris@0
|
73 'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
|
Chris@0
|
74 'attrNamespace'=>'http://www.w3.org/1999/xhtml',
|
Chris@0
|
75
|
Chris@0
|
76 'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
|
Chris@0
|
77 'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
|
Chris@0
|
78 ),
|
Chris@0
|
79 */
|
Chris@0
|
80 array(
|
Chris@0
|
81 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
|
Chris@0
|
82 'attrName' => array('href',
|
Chris@0
|
83 'hreflang',
|
Chris@0
|
84 'http-equiv',
|
Chris@0
|
85 'icon',
|
Chris@0
|
86 'id',
|
Chris@0
|
87 'keytype',
|
Chris@0
|
88 'kind',
|
Chris@0
|
89 'label',
|
Chris@0
|
90 'lang',
|
Chris@0
|
91 'language',
|
Chris@0
|
92 'list',
|
Chris@0
|
93 'maxlength',
|
Chris@0
|
94 'media',
|
Chris@0
|
95 'method',
|
Chris@0
|
96 'name',
|
Chris@0
|
97 'placeholder',
|
Chris@0
|
98 'rel',
|
Chris@0
|
99 'rows',
|
Chris@0
|
100 'rowspan',
|
Chris@0
|
101 'sandbox',
|
Chris@0
|
102 'spellcheck',
|
Chris@0
|
103 'scope',
|
Chris@0
|
104 'seamless',
|
Chris@0
|
105 'shape',
|
Chris@0
|
106 'size',
|
Chris@0
|
107 'sizes',
|
Chris@0
|
108 'span',
|
Chris@0
|
109 'src',
|
Chris@0
|
110 'srcdoc',
|
Chris@0
|
111 'srclang',
|
Chris@0
|
112 'srcset',
|
Chris@0
|
113 'start',
|
Chris@0
|
114 'step',
|
Chris@0
|
115 'style',
|
Chris@0
|
116 'summary',
|
Chris@0
|
117 'tabindex',
|
Chris@0
|
118 'target',
|
Chris@0
|
119 'title',
|
Chris@0
|
120 'type',
|
Chris@0
|
121 'value',
|
Chris@0
|
122 'width',
|
Chris@0
|
123 'border',
|
Chris@0
|
124 'charset',
|
Chris@0
|
125 'cite',
|
Chris@0
|
126 'class',
|
Chris@0
|
127 'code',
|
Chris@0
|
128 'codebase',
|
Chris@0
|
129 'color',
|
Chris@0
|
130 'cols',
|
Chris@0
|
131 'colspan',
|
Chris@0
|
132 'content',
|
Chris@0
|
133 'coords',
|
Chris@0
|
134 'data',
|
Chris@0
|
135 'datetime',
|
Chris@0
|
136 'default',
|
Chris@0
|
137 'dir',
|
Chris@0
|
138 'dirname',
|
Chris@0
|
139 'enctype',
|
Chris@0
|
140 'for',
|
Chris@0
|
141 'form',
|
Chris@0
|
142 'formaction',
|
Chris@0
|
143 'headers',
|
Chris@0
|
144 'height',
|
Chris@0
|
145 'accept',
|
Chris@0
|
146 'accept-charset',
|
Chris@0
|
147 'accesskey',
|
Chris@0
|
148 'action',
|
Chris@0
|
149 'align',
|
Chris@0
|
150 'alt',
|
Chris@0
|
151 'bgcolor',
|
Chris@0
|
152 ),
|
Chris@0
|
153 ),
|
Chris@0
|
154 array(
|
Chris@0
|
155 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
|
Chris@0
|
156 'xpath' => 'starts-with(local-name(), \'data-\')',
|
Chris@0
|
157 ),
|
Chris@0
|
158 );
|
Chris@0
|
159
|
Chris@0
|
160 const DOCTYPE = '<!DOCTYPE html>';
|
Chris@0
|
161
|
Chris@0
|
162 public function __construct($output, $options = array())
|
Chris@0
|
163 {
|
Chris@0
|
164 if (isset($options['encode_entities'])) {
|
Chris@0
|
165 $this->encode = $options['encode_entities'];
|
Chris@0
|
166 }
|
Chris@0
|
167
|
Chris@0
|
168 $this->outputMode = static::IM_IN_HTML;
|
Chris@0
|
169 $this->out = $output;
|
Chris@0
|
170
|
Chris@0
|
171 // If HHVM, see https://github.com/facebook/hhvm/issues/2727
|
Chris@0
|
172 $this->hasHTML5 = defined('ENT_HTML5') && !defined('HHVM_VERSION');
|
Chris@0
|
173 }
|
Chris@17
|
174
|
Chris@0
|
175 public function addRule(array $rule)
|
Chris@0
|
176 {
|
Chris@0
|
177 $this->nonBooleanAttributes[] = $rule;
|
Chris@0
|
178 }
|
Chris@0
|
179
|
Chris@17
|
180 public function setTraverser(Traverser $traverser)
|
Chris@0
|
181 {
|
Chris@0
|
182 $this->traverser = $traverser;
|
Chris@0
|
183
|
Chris@0
|
184 return $this;
|
Chris@0
|
185 }
|
Chris@0
|
186
|
Chris@0
|
187 public function document($dom)
|
Chris@0
|
188 {
|
Chris@0
|
189 $this->doctype();
|
Chris@0
|
190 if ($dom->documentElement) {
|
Chris@0
|
191 foreach ($dom->childNodes as $node) {
|
Chris@0
|
192 $this->traverser->node($node);
|
Chris@0
|
193 }
|
Chris@0
|
194 $this->nl();
|
Chris@0
|
195 }
|
Chris@0
|
196 }
|
Chris@0
|
197
|
Chris@0
|
198 protected function doctype()
|
Chris@0
|
199 {
|
Chris@0
|
200 $this->wr(static::DOCTYPE);
|
Chris@0
|
201 $this->nl();
|
Chris@0
|
202 }
|
Chris@0
|
203
|
Chris@0
|
204 public function element($ele)
|
Chris@0
|
205 {
|
Chris@0
|
206 $name = $ele->tagName;
|
Chris@0
|
207
|
Chris@0
|
208 // Per spec:
|
Chris@0
|
209 // If the element has a declared namespace in the HTML, MathML or
|
Chris@0
|
210 // SVG namespaces, we use the lname instead of the tagName.
|
Chris@0
|
211 if ($this->traverser->isLocalElement($ele)) {
|
Chris@0
|
212 $name = $ele->localName;
|
Chris@0
|
213 }
|
Chris@0
|
214
|
Chris@0
|
215 // If we are in SVG or MathML there is special handling.
|
Chris@0
|
216 // Using if/elseif instead of switch because it's faster in PHP.
|
Chris@17
|
217 if ('svg' == $name) {
|
Chris@0
|
218 $this->outputMode = static::IM_IN_SVG;
|
Chris@0
|
219 $name = Elements::normalizeSvgElement($name);
|
Chris@17
|
220 } elseif ('math' == $name) {
|
Chris@0
|
221 $this->outputMode = static::IM_IN_MATHML;
|
Chris@0
|
222 }
|
Chris@0
|
223
|
Chris@0
|
224 $this->openTag($ele);
|
Chris@0
|
225 if (Elements::isA($name, Elements::TEXT_RAW)) {
|
Chris@0
|
226 foreach ($ele->childNodes as $child) {
|
Chris@0
|
227 if ($child instanceof \DOMCharacterData) {
|
Chris@0
|
228 $this->wr($child->data);
|
Chris@0
|
229 } elseif ($child instanceof \DOMElement) {
|
Chris@0
|
230 $this->element($child);
|
Chris@0
|
231 }
|
Chris@0
|
232 }
|
Chris@0
|
233 } else {
|
Chris@0
|
234 // Handle children.
|
Chris@0
|
235 if ($ele->hasChildNodes()) {
|
Chris@0
|
236 $this->traverser->children($ele->childNodes);
|
Chris@0
|
237 }
|
Chris@0
|
238
|
Chris@0
|
239 // Close out the SVG or MathML special handling.
|
Chris@17
|
240 if ('svg' == $name || 'math' == $name) {
|
Chris@0
|
241 $this->outputMode = static::IM_IN_HTML;
|
Chris@0
|
242 }
|
Chris@0
|
243 }
|
Chris@0
|
244
|
Chris@0
|
245 // If not unary, add a closing tag.
|
Chris@17
|
246 if (!Elements::isA($name, Elements::VOID_TAG)) {
|
Chris@0
|
247 $this->closeTag($ele);
|
Chris@0
|
248 }
|
Chris@0
|
249 }
|
Chris@0
|
250
|
Chris@0
|
251 /**
|
Chris@0
|
252 * Write a text node.
|
Chris@0
|
253 *
|
Chris@17
|
254 * @param \DOMText $ele The text node to write.
|
Chris@0
|
255 */
|
Chris@0
|
256 public function text($ele)
|
Chris@0
|
257 {
|
Chris@0
|
258 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
|
Chris@0
|
259 $this->wr($ele->data);
|
Chris@17
|
260
|
Chris@0
|
261 return;
|
Chris@0
|
262 }
|
Chris@0
|
263
|
Chris@0
|
264 // FIXME: This probably needs some flags set.
|
Chris@0
|
265 $this->wr($this->enc($ele->data));
|
Chris@0
|
266 }
|
Chris@0
|
267
|
Chris@0
|
268 public function cdata($ele)
|
Chris@0
|
269 {
|
Chris@0
|
270 // This encodes CDATA.
|
Chris@0
|
271 $this->wr($ele->ownerDocument->saveXML($ele));
|
Chris@0
|
272 }
|
Chris@0
|
273
|
Chris@0
|
274 public function comment($ele)
|
Chris@0
|
275 {
|
Chris@0
|
276 // These produce identical output.
|
Chris@0
|
277 // $this->wr('<!--')->wr($ele->data)->wr('-->');
|
Chris@0
|
278 $this->wr($ele->ownerDocument->saveXML($ele));
|
Chris@0
|
279 }
|
Chris@0
|
280
|
Chris@0
|
281 public function processorInstruction($ele)
|
Chris@0
|
282 {
|
Chris@0
|
283 $this->wr('<?')
|
Chris@0
|
284 ->wr($ele->target)
|
Chris@0
|
285 ->wr(' ')
|
Chris@0
|
286 ->wr($ele->data)
|
Chris@0
|
287 ->wr('?>');
|
Chris@0
|
288 }
|
Chris@17
|
289
|
Chris@0
|
290 /**
|
Chris@17
|
291 * Write the namespace attributes.
|
Chris@0
|
292 *
|
Chris@17
|
293 * @param \DOMNode $ele The element being written.
|
Chris@0
|
294 */
|
Chris@0
|
295 protected function namespaceAttrs($ele)
|
Chris@0
|
296 {
|
Chris@17
|
297 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
|
Chris@0
|
298 $this->xpath = new \DOMXPath($ele->ownerDocument);
|
Chris@0
|
299 }
|
Chris@0
|
300
|
Chris@17
|
301 foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
|
Chris@0
|
302 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
|
Chris@0
|
303 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
|
Chris@0
|
304 }
|
Chris@0
|
305 }
|
Chris@0
|
306 }
|
Chris@0
|
307
|
Chris@0
|
308 /**
|
Chris@0
|
309 * Write the opening tag.
|
Chris@0
|
310 *
|
Chris@0
|
311 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
|
Chris@0
|
312 * qualified name (8.3).
|
Chris@0
|
313 *
|
Chris@17
|
314 * @param \DOMNode $ele The element being written.
|
Chris@0
|
315 */
|
Chris@0
|
316 protected function openTag($ele)
|
Chris@0
|
317 {
|
Chris@0
|
318 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
|
Chris@0
|
319
|
Chris@0
|
320 $this->attrs($ele);
|
Chris@0
|
321 $this->namespaceAttrs($ele);
|
Chris@0
|
322
|
Chris@0
|
323 if ($this->outputMode == static::IM_IN_HTML) {
|
Chris@0
|
324 $this->wr('>');
|
Chris@0
|
325 } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
|
Chris@0
|
326 else {
|
Chris@0
|
327 if ($ele->hasChildNodes()) {
|
Chris@0
|
328 $this->wr('>');
|
Chris@0
|
329 } // If there are no children this is self closing.
|
Chris@0
|
330 else {
|
Chris@0
|
331 $this->wr(' />');
|
Chris@0
|
332 }
|
Chris@0
|
333 }
|
Chris@0
|
334 }
|
Chris@0
|
335
|
Chris@0
|
336 protected function attrs($ele)
|
Chris@0
|
337 {
|
Chris@0
|
338 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
|
Chris@17
|
339 if (!$ele->hasAttributes()) {
|
Chris@0
|
340 return $this;
|
Chris@0
|
341 }
|
Chris@0
|
342
|
Chris@0
|
343 // TODO: Currently, this always writes name="value", and does not do
|
Chris@0
|
344 // value-less attributes.
|
Chris@0
|
345 $map = $ele->attributes;
|
Chris@0
|
346 $len = $map->length;
|
Chris@17
|
347 for ($i = 0; $i < $len; ++$i) {
|
Chris@0
|
348 $node = $map->item($i);
|
Chris@0
|
349 $val = $this->enc($node->value, true);
|
Chris@0
|
350
|
Chris@0
|
351 // XXX: The spec says that we need to ensure that anything in
|
Chris@0
|
352 // the XML, XMLNS, or XLink NS's should use the canonical
|
Chris@0
|
353 // prefix. It seems that DOM does this for us already, but there
|
Chris@0
|
354 // may be exceptions.
|
Chris@0
|
355 $name = $node->nodeName;
|
Chris@0
|
356
|
Chris@0
|
357 // Special handling for attributes in SVG and MathML.
|
Chris@0
|
358 // Using if/elseif instead of switch because it's faster in PHP.
|
Chris@0
|
359 if ($this->outputMode == static::IM_IN_SVG) {
|
Chris@0
|
360 $name = Elements::normalizeSvgAttribute($name);
|
Chris@0
|
361 } elseif ($this->outputMode == static::IM_IN_MATHML) {
|
Chris@0
|
362 $name = Elements::normalizeMathMlAttribute($name);
|
Chris@0
|
363 }
|
Chris@0
|
364
|
Chris@0
|
365 $this->wr(' ')->wr($name);
|
Chris@0
|
366
|
Chris@17
|
367 if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
|
Chris@0
|
368 $this->wr('="')->wr($val)->wr('"');
|
Chris@0
|
369 }
|
Chris@0
|
370 }
|
Chris@0
|
371 }
|
Chris@0
|
372
|
Chris@0
|
373 protected function nonBooleanAttribute(\DOMAttr $attr)
|
Chris@0
|
374 {
|
Chris@0
|
375 $ele = $attr->ownerElement;
|
Chris@17
|
376 foreach ($this->nonBooleanAttributes as $rule) {
|
Chris@17
|
377 if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
|
Chris@0
|
378 continue;
|
Chris@0
|
379 }
|
Chris@17
|
380 if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
|
Chris@0
|
381 continue;
|
Chris@0
|
382 }
|
Chris@17
|
383 if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
|
Chris@0
|
384 continue;
|
Chris@0
|
385 }
|
Chris@17
|
386 if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
|
Chris@0
|
387 continue;
|
Chris@0
|
388 }
|
Chris@17
|
389 if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
|
Chris@0
|
390 continue;
|
Chris@0
|
391 }
|
Chris@17
|
392 if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
|
Chris@0
|
393 continue;
|
Chris@0
|
394 }
|
Chris@17
|
395 if (isset($rule['xpath'])) {
|
Chris@0
|
396 $xp = $this->getXPath($attr);
|
Chris@17
|
397 if (isset($rule['prefixes'])) {
|
Chris@17
|
398 foreach ($rule['prefixes'] as $nsPrefix => $ns) {
|
Chris@0
|
399 $xp->registerNamespace($nsPrefix, $ns);
|
Chris@0
|
400 }
|
Chris@0
|
401 }
|
Chris@17
|
402 if (!$xp->evaluate($rule['xpath'], $attr)) {
|
Chris@0
|
403 continue;
|
Chris@0
|
404 }
|
Chris@0
|
405 }
|
Chris@0
|
406
|
Chris@0
|
407 return true;
|
Chris@0
|
408 }
|
Chris@0
|
409
|
Chris@0
|
410 return false;
|
Chris@0
|
411 }
|
Chris@0
|
412
|
Chris@17
|
413 private function getXPath(\DOMNode $node)
|
Chris@17
|
414 {
|
Chris@17
|
415 if (!$this->xpath) {
|
Chris@0
|
416 $this->xpath = new \DOMXPath($node->ownerDocument);
|
Chris@0
|
417 }
|
Chris@17
|
418
|
Chris@0
|
419 return $this->xpath;
|
Chris@0
|
420 }
|
Chris@0
|
421
|
Chris@0
|
422 /**
|
Chris@0
|
423 * Write the closing tag.
|
Chris@0
|
424 *
|
Chris@0
|
425 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
|
Chris@0
|
426 * qualified name (8.3).
|
Chris@0
|
427 *
|
Chris@17
|
428 * @param \DOMNode $ele The element being written.
|
Chris@0
|
429 */
|
Chris@0
|
430 protected function closeTag($ele)
|
Chris@0
|
431 {
|
Chris@0
|
432 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
|
Chris@0
|
433 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
|
Chris@0
|
434 }
|
Chris@0
|
435 }
|
Chris@0
|
436
|
Chris@0
|
437 /**
|
Chris@0
|
438 * Write to the output.
|
Chris@0
|
439 *
|
Chris@17
|
440 * @param string $text The string to put into the output
|
Chris@0
|
441 *
|
Chris@17
|
442 * @return $this
|
Chris@0
|
443 */
|
Chris@0
|
444 protected function wr($text)
|
Chris@0
|
445 {
|
Chris@0
|
446 fwrite($this->out, $text);
|
Chris@17
|
447
|
Chris@0
|
448 return $this;
|
Chris@0
|
449 }
|
Chris@0
|
450
|
Chris@0
|
451 /**
|
Chris@0
|
452 * Write a new line character.
|
Chris@0
|
453 *
|
Chris@17
|
454 * @return $this
|
Chris@0
|
455 */
|
Chris@0
|
456 protected function nl()
|
Chris@0
|
457 {
|
Chris@0
|
458 fwrite($this->out, PHP_EOL);
|
Chris@17
|
459
|
Chris@0
|
460 return $this;
|
Chris@0
|
461 }
|
Chris@0
|
462
|
Chris@0
|
463 /**
|
Chris@0
|
464 * Encode text.
|
Chris@0
|
465 *
|
Chris@0
|
466 * When encode is set to false, the default value, the text passed in is
|
Chris@0
|
467 * escaped per section 8.3 of the html5 spec. For details on how text is
|
Chris@0
|
468 * escaped see the escape() method.
|
Chris@0
|
469 *
|
Chris@0
|
470 * When encoding is set to true the text is converted to named character
|
Chris@0
|
471 * references where appropriate. Section 8.1.4 Character references of the
|
Chris@0
|
472 * html5 spec refers to using named character references. This is useful for
|
Chris@0
|
473 * characters that can't otherwise legally be used in the text.
|
Chris@0
|
474 *
|
Chris@0
|
475 * The named character references are listed in section 8.5.
|
Chris@0
|
476 *
|
Chris@0
|
477 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
|
Chris@0
|
478 * This includes such characters as +.# and many other common ones. By default
|
Chris@0
|
479 * encoding here will just escape &'<>".
|
Chris@0
|
480 *
|
Chris@0
|
481 * Note, PHP 5.4+ has better html5 encoding.
|
Chris@0
|
482 *
|
Chris@0
|
483 * @todo Use the Entities class in php 5.3 to have html5 entities.
|
Chris@0
|
484 *
|
Chris@17
|
485 * @param string $text Text to encode.
|
Chris@17
|
486 * @param bool $attribute True if we are encoding an attrubute, false otherwise.
|
Chris@0
|
487 *
|
Chris@0
|
488 * @return string The encoded text.
|
Chris@0
|
489 */
|
Chris@0
|
490 protected function enc($text, $attribute = false)
|
Chris@0
|
491 {
|
Chris@0
|
492 // Escape the text rather than convert to named character references.
|
Chris@17
|
493 if (!$this->encode) {
|
Chris@0
|
494 return $this->escape($text, $attribute);
|
Chris@0
|
495 }
|
Chris@0
|
496
|
Chris@0
|
497 // If we are in PHP 5.4+ we can use the native html5 entity functionality to
|
Chris@0
|
498 // convert the named character references.
|
Chris@0
|
499
|
Chris@0
|
500 if ($this->hasHTML5) {
|
Chris@0
|
501 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
|
Chris@0
|
502 } // If a version earlier than 5.4 html5 entities are not entirely handled.
|
Chris@0
|
503 // This manually handles them.
|
Chris@0
|
504 else {
|
Chris@17
|
505 return strtr($text, HTML5Entities::$map);
|
Chris@0
|
506 }
|
Chris@0
|
507 }
|
Chris@0
|
508
|
Chris@0
|
509 /**
|
Chris@0
|
510 * Escape test.
|
Chris@0
|
511 *
|
Chris@0
|
512 * According to the html5 spec section 8.3 Serializing HTML fragments, text
|
Chris@0
|
513 * within tags that are not style, script, xmp, iframe, noembed, and noframes
|
Chris@0
|
514 * need to be properly escaped.
|
Chris@0
|
515 *
|
Chris@0
|
516 * The & should be converted to &, no breaking space unicode characters
|
Chris@0
|
517 * converted to , when in attribute mode the " should be converted to
|
Chris@0
|
518 * ", and when not in attribute mode the < and > should be converted to
|
Chris@0
|
519 * < and >.
|
Chris@0
|
520 *
|
Chris@0
|
521 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
|
Chris@0
|
522 *
|
Chris@17
|
523 * @param string $text Text to escape.
|
Chris@17
|
524 * @param bool $attribute True if we are escaping an attrubute, false otherwise.
|
Chris@0
|
525 */
|
Chris@0
|
526 protected function escape($text, $attribute = false)
|
Chris@0
|
527 {
|
Chris@0
|
528 // Not using htmlspecialchars because, while it does escaping, it doesn't
|
Chris@0
|
529 // match the requirements of section 8.5. For example, it doesn't handle
|
Chris@0
|
530 // non-breaking spaces.
|
Chris@0
|
531 if ($attribute) {
|
Chris@0
|
532 $replace = array(
|
Chris@0
|
533 '"' => '"',
|
Chris@0
|
534 '&' => '&',
|
Chris@17
|
535 "\xc2\xa0" => ' ',
|
Chris@0
|
536 );
|
Chris@0
|
537 } else {
|
Chris@0
|
538 $replace = array(
|
Chris@0
|
539 '<' => '<',
|
Chris@0
|
540 '>' => '>',
|
Chris@0
|
541 '&' => '&',
|
Chris@17
|
542 "\xc2\xa0" => ' ',
|
Chris@0
|
543 );
|
Chris@0
|
544 }
|
Chris@0
|
545
|
Chris@0
|
546 return strtr($text, $replace);
|
Chris@0
|
547 }
|
Chris@0
|
548 }
|