Chris@0
|
1 <?php
|
Chris@0
|
2
|
Chris@0
|
3 namespace Drupal\Component\Utility;
|
Chris@0
|
4
|
Chris@0
|
5 /**
|
Chris@0
|
6 * Provides DOMDocument helpers for parsing and serializing HTML strings.
|
Chris@0
|
7 *
|
Chris@0
|
8 * @ingroup utility
|
Chris@0
|
9 */
|
Chris@0
|
10 class Html {
|
Chris@0
|
11
|
Chris@0
|
12 /**
|
Chris@0
|
13 * An array of previously cleaned HTML classes.
|
Chris@0
|
14 *
|
Chris@0
|
15 * @var array
|
Chris@0
|
16 */
|
Chris@0
|
17 protected static $classes = [];
|
Chris@0
|
18
|
Chris@0
|
19 /**
|
Chris@0
|
20 * An array of the initial IDs used in one request.
|
Chris@0
|
21 *
|
Chris@0
|
22 * @var array
|
Chris@0
|
23 */
|
Chris@0
|
24 protected static $seenIdsInit;
|
Chris@0
|
25
|
Chris@0
|
26 /**
|
Chris@0
|
27 * An array of IDs, including incremented versions when an ID is duplicated.
|
Chris@0
|
28 * @var array
|
Chris@0
|
29 */
|
Chris@0
|
30 protected static $seenIds;
|
Chris@0
|
31
|
Chris@0
|
32 /**
|
Chris@0
|
33 * Stores whether the current request was sent via AJAX.
|
Chris@0
|
34 *
|
Chris@0
|
35 * @var bool
|
Chris@0
|
36 */
|
Chris@0
|
37 protected static $isAjax = FALSE;
|
Chris@0
|
38
|
Chris@0
|
39 /**
|
Chris@0
|
40 * All attributes that may contain URIs.
|
Chris@0
|
41 *
|
Chris@0
|
42 * - The attributes 'code' and 'codebase' are omitted, because they only exist
|
Chris@0
|
43 * for the <applet> tag. The time of Java applets has passed.
|
Chris@0
|
44 * - The attribute 'icon' is omitted, because no browser implements the
|
Chris@0
|
45 * <command> tag anymore.
|
Chris@0
|
46 * See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/command.
|
Chris@0
|
47 * - The 'manifest' attribute is omitted because it only exists for the <html>
|
Chris@0
|
48 * tag. That tag only makes sense in a HTML-served-as-HTML context, in which
|
Chris@0
|
49 * case relative URLs are guaranteed to work.
|
Chris@0
|
50 *
|
Chris@0
|
51 * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes
|
Chris@0
|
52 * @see https://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
Chris@0
|
53 *
|
Chris@0
|
54 * @var string[]
|
Chris@0
|
55 */
|
Chris@0
|
56 protected static $uriAttributes = ['href', 'poster', 'src', 'cite', 'data', 'action', 'formaction', 'srcset', 'about'];
|
Chris@0
|
57
|
Chris@0
|
58 /**
|
Chris@0
|
59 * Prepares a string for use as a valid class name.
|
Chris@0
|
60 *
|
Chris@0
|
61 * Do not pass one string containing multiple classes as they will be
|
Chris@0
|
62 * incorrectly concatenated with dashes, i.e. "one two" will become "one-two".
|
Chris@0
|
63 *
|
Chris@0
|
64 * @param mixed $class
|
Chris@0
|
65 * The class name to clean. It can be a string or anything that can be cast
|
Chris@0
|
66 * to string.
|
Chris@0
|
67 *
|
Chris@0
|
68 * @return string
|
Chris@0
|
69 * The cleaned class name.
|
Chris@0
|
70 */
|
Chris@0
|
71 public static function getClass($class) {
|
Chris@0
|
72 $class = (string) $class;
|
Chris@0
|
73 if (!isset(static::$classes[$class])) {
|
Chris@17
|
74 static::$classes[$class] = static::cleanCssIdentifier(mb_strtolower($class));
|
Chris@0
|
75 }
|
Chris@0
|
76 return static::$classes[$class];
|
Chris@0
|
77 }
|
Chris@0
|
78
|
Chris@0
|
79 /**
|
Chris@0
|
80 * Prepares a string for use as a CSS identifier (element, class, or ID name).
|
Chris@0
|
81 *
|
Chris@17
|
82 * Link below shows the syntax for valid CSS identifiers (including element
|
Chris@17
|
83 * names, classes, and IDs in selectors).
|
Chris@17
|
84 *
|
Chris@17
|
85 * @see http://www.w3.org/TR/CSS21/syndata.html#characters
|
Chris@0
|
86 *
|
Chris@0
|
87 * @param string $identifier
|
Chris@0
|
88 * The identifier to clean.
|
Chris@0
|
89 * @param array $filter
|
Chris@0
|
90 * An array of string replacements to use on the identifier.
|
Chris@0
|
91 *
|
Chris@0
|
92 * @return string
|
Chris@0
|
93 * The cleaned identifier.
|
Chris@0
|
94 */
|
Chris@0
|
95 public static function cleanCssIdentifier($identifier, array $filter = [
|
Chris@0
|
96 ' ' => '-',
|
Chris@0
|
97 '_' => '-',
|
Chris@0
|
98 '/' => '-',
|
Chris@0
|
99 '[' => '-',
|
Chris@0
|
100 ']' => '',
|
Chris@0
|
101 ]) {
|
Chris@0
|
102 // We could also use strtr() here but its much slower than str_replace(). In
|
Chris@0
|
103 // order to keep '__' to stay '__' we first replace it with a different
|
Chris@0
|
104 // placeholder after checking that it is not defined as a filter.
|
Chris@0
|
105 $double_underscore_replacements = 0;
|
Chris@0
|
106 if (!isset($filter['__'])) {
|
Chris@0
|
107 $identifier = str_replace('__', '##', $identifier, $double_underscore_replacements);
|
Chris@0
|
108 }
|
Chris@0
|
109 $identifier = str_replace(array_keys($filter), array_values($filter), $identifier);
|
Chris@0
|
110 // Replace temporary placeholder '##' with '__' only if the original
|
Chris@0
|
111 // $identifier contained '__'.
|
Chris@0
|
112 if ($double_underscore_replacements > 0) {
|
Chris@0
|
113 $identifier = str_replace('##', '__', $identifier);
|
Chris@0
|
114 }
|
Chris@0
|
115
|
Chris@0
|
116 // Valid characters in a CSS identifier are:
|
Chris@0
|
117 // - the hyphen (U+002D)
|
Chris@0
|
118 // - a-z (U+0030 - U+0039)
|
Chris@0
|
119 // - A-Z (U+0041 - U+005A)
|
Chris@0
|
120 // - the underscore (U+005F)
|
Chris@0
|
121 // - 0-9 (U+0061 - U+007A)
|
Chris@0
|
122 // - ISO 10646 characters U+00A1 and higher
|
Chris@0
|
123 // We strip out any character not in the above list.
|
Chris@0
|
124 $identifier = preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $identifier);
|
Chris@0
|
125 // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
|
Chris@0
|
126 $identifier = preg_replace([
|
Chris@0
|
127 '/^[0-9]/',
|
Chris@17
|
128 '/^(-[0-9])|^(--)/',
|
Chris@0
|
129 ], ['_', '__'], $identifier);
|
Chris@0
|
130 return $identifier;
|
Chris@0
|
131 }
|
Chris@0
|
132
|
Chris@0
|
133 /**
|
Chris@0
|
134 * Sets if this request is an Ajax request.
|
Chris@0
|
135 *
|
Chris@0
|
136 * @param bool $is_ajax
|
Chris@0
|
137 * TRUE if this request is an Ajax request, FALSE otherwise.
|
Chris@0
|
138 */
|
Chris@0
|
139 public static function setIsAjax($is_ajax) {
|
Chris@0
|
140 static::$isAjax = $is_ajax;
|
Chris@0
|
141 }
|
Chris@0
|
142
|
Chris@0
|
143 /**
|
Chris@0
|
144 * Prepares a string for use as a valid HTML ID and guarantees uniqueness.
|
Chris@0
|
145 *
|
Chris@0
|
146 * This function ensures that each passed HTML ID value only exists once on
|
Chris@0
|
147 * the page. By tracking the already returned ids, this function enables
|
Chris@0
|
148 * forms, blocks, and other content to be output multiple times on the same
|
Chris@0
|
149 * page, without breaking (X)HTML validation.
|
Chris@0
|
150 *
|
Chris@0
|
151 * For already existing IDs, a counter is appended to the ID string.
|
Chris@0
|
152 * Therefore, JavaScript and CSS code should not rely on any value that was
|
Chris@0
|
153 * generated by this function and instead should rely on manually added CSS
|
Chris@0
|
154 * classes or similarly reliable constructs.
|
Chris@0
|
155 *
|
Chris@0
|
156 * Two consecutive hyphens separate the counter from the original ID. To
|
Chris@0
|
157 * manage uniqueness across multiple Ajax requests on the same page, Ajax
|
Chris@0
|
158 * requests POST an array of all IDs currently present on the page, which are
|
Chris@0
|
159 * used to prime this function's cache upon first invocation.
|
Chris@0
|
160 *
|
Chris@0
|
161 * To allow reverse-parsing of IDs submitted via Ajax, any multiple
|
Chris@0
|
162 * consecutive hyphens in the originally passed $id are replaced with a
|
Chris@0
|
163 * single hyphen.
|
Chris@0
|
164 *
|
Chris@0
|
165 * @param string $id
|
Chris@0
|
166 * The ID to clean.
|
Chris@0
|
167 *
|
Chris@0
|
168 * @return string
|
Chris@0
|
169 * The cleaned ID.
|
Chris@0
|
170 */
|
Chris@0
|
171 public static function getUniqueId($id) {
|
Chris@0
|
172 // If this is an Ajax request, then content returned by this page request
|
Chris@0
|
173 // will be merged with content already on the base page. The HTML IDs must
|
Chris@0
|
174 // be unique for the fully merged content. Therefore use unique IDs.
|
Chris@0
|
175 if (static::$isAjax) {
|
Chris@0
|
176 return static::getId($id) . '--' . Crypt::randomBytesBase64(8);
|
Chris@0
|
177 }
|
Chris@0
|
178
|
Chris@0
|
179 // @todo Remove all that code once we switch over to random IDs only,
|
Chris@0
|
180 // see https://www.drupal.org/node/1090592.
|
Chris@0
|
181 if (!isset(static::$seenIdsInit)) {
|
Chris@0
|
182 static::$seenIdsInit = [];
|
Chris@0
|
183 }
|
Chris@0
|
184 if (!isset(static::$seenIds)) {
|
Chris@0
|
185 static::$seenIds = static::$seenIdsInit;
|
Chris@0
|
186 }
|
Chris@0
|
187
|
Chris@0
|
188 $id = static::getId($id);
|
Chris@0
|
189
|
Chris@0
|
190 // Ensure IDs are unique by appending a counter after the first occurrence.
|
Chris@0
|
191 // The counter needs to be appended with a delimiter that does not exist in
|
Chris@0
|
192 // the base ID. Requiring a unique delimiter helps ensure that we really do
|
Chris@0
|
193 // return unique IDs and also helps us re-create the $seen_ids array during
|
Chris@0
|
194 // Ajax requests.
|
Chris@0
|
195 if (isset(static::$seenIds[$id])) {
|
Chris@0
|
196 $id = $id . '--' . ++static::$seenIds[$id];
|
Chris@0
|
197 }
|
Chris@0
|
198 else {
|
Chris@0
|
199 static::$seenIds[$id] = 1;
|
Chris@0
|
200 }
|
Chris@0
|
201 return $id;
|
Chris@0
|
202 }
|
Chris@0
|
203
|
Chris@0
|
204 /**
|
Chris@0
|
205 * Prepares a string for use as a valid HTML ID.
|
Chris@0
|
206 *
|
Chris@0
|
207 * Only use this function when you want to intentionally skip the uniqueness
|
Chris@0
|
208 * guarantee of self::getUniqueId().
|
Chris@0
|
209 *
|
Chris@0
|
210 * @param string $id
|
Chris@0
|
211 * The ID to clean.
|
Chris@0
|
212 *
|
Chris@0
|
213 * @return string
|
Chris@0
|
214 * The cleaned ID.
|
Chris@0
|
215 *
|
Chris@0
|
216 * @see self::getUniqueId()
|
Chris@0
|
217 */
|
Chris@0
|
218 public static function getId($id) {
|
Chris@17
|
219 $id = str_replace([' ', '_', '[', ']'], ['-', '-', '-', ''], mb_strtolower($id));
|
Chris@0
|
220
|
Chris@0
|
221 // As defined in http://www.w3.org/TR/html4/types.html#type-name, HTML IDs can
|
Chris@0
|
222 // only contain letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
|
Chris@0
|
223 // colons (":"), and periods ("."). We strip out any character not in that
|
Chris@0
|
224 // list. Note that the CSS spec doesn't allow colons or periods in identifiers
|
Chris@0
|
225 // (http://www.w3.org/TR/CSS21/syndata.html#characters), so we strip those two
|
Chris@0
|
226 // characters as well.
|
Chris@0
|
227 $id = preg_replace('/[^A-Za-z0-9\-_]/', '', $id);
|
Chris@0
|
228
|
Chris@0
|
229 // Removing multiple consecutive hyphens.
|
Chris@0
|
230 $id = preg_replace('/\-+/', '-', $id);
|
Chris@0
|
231 return $id;
|
Chris@0
|
232 }
|
Chris@0
|
233
|
Chris@0
|
234 /**
|
Chris@0
|
235 * Resets the list of seen IDs.
|
Chris@0
|
236 */
|
Chris@0
|
237 public static function resetSeenIds() {
|
Chris@0
|
238 static::$seenIds = NULL;
|
Chris@0
|
239 }
|
Chris@0
|
240
|
Chris@0
|
241 /**
|
Chris@0
|
242 * Normalizes an HTML snippet.
|
Chris@0
|
243 *
|
Chris@0
|
244 * This function is essentially \DOMDocument::normalizeDocument(), but
|
Chris@0
|
245 * operates on an HTML string instead of a \DOMDocument.
|
Chris@0
|
246 *
|
Chris@0
|
247 * @param string $html
|
Chris@0
|
248 * The HTML string to normalize.
|
Chris@0
|
249 *
|
Chris@0
|
250 * @return string
|
Chris@0
|
251 * The normalized HTML string.
|
Chris@0
|
252 */
|
Chris@0
|
253 public static function normalize($html) {
|
Chris@0
|
254 $document = static::load($html);
|
Chris@0
|
255 return static::serialize($document);
|
Chris@0
|
256 }
|
Chris@0
|
257
|
Chris@0
|
258 /**
|
Chris@0
|
259 * Parses an HTML snippet and returns it as a DOM object.
|
Chris@0
|
260 *
|
Chris@0
|
261 * This function loads the body part of a partial (X)HTML document and returns
|
Chris@0
|
262 * a full \DOMDocument object that represents this document.
|
Chris@0
|
263 *
|
Chris@0
|
264 * Use \Drupal\Component\Utility\Html::serialize() to serialize this
|
Chris@0
|
265 * \DOMDocument back to a string.
|
Chris@0
|
266 *
|
Chris@0
|
267 * @param string $html
|
Chris@0
|
268 * The partial (X)HTML snippet to load. Invalid markup will be corrected on
|
Chris@0
|
269 * import.
|
Chris@0
|
270 *
|
Chris@0
|
271 * @return \DOMDocument
|
Chris@0
|
272 * A \DOMDocument that represents the loaded (X)HTML snippet.
|
Chris@0
|
273 */
|
Chris@0
|
274 public static function load($html) {
|
Chris@0
|
275 $document = <<<EOD
|
Chris@0
|
276 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
Chris@0
|
277 <html xmlns="http://www.w3.org/1999/xhtml">
|
Chris@0
|
278 <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
|
Chris@0
|
279 <body>!html</body>
|
Chris@0
|
280 </html>
|
Chris@0
|
281 EOD;
|
Chris@0
|
282 // PHP's \DOMDocument serialization adds extra whitespace when the markup
|
Chris@0
|
283 // of the wrapping document contains newlines, so ensure we remove all
|
Chris@0
|
284 // newlines before injecting the actual HTML body to be processed.
|
Chris@0
|
285 $document = strtr($document, ["\n" => '', '!html' => $html]);
|
Chris@0
|
286
|
Chris@0
|
287 $dom = new \DOMDocument();
|
Chris@0
|
288 // Ignore warnings during HTML soup loading.
|
Chris@0
|
289 @$dom->loadHTML($document);
|
Chris@0
|
290
|
Chris@0
|
291 return $dom;
|
Chris@0
|
292 }
|
Chris@0
|
293
|
Chris@0
|
294 /**
|
Chris@0
|
295 * Converts the body of a \DOMDocument back to an HTML snippet.
|
Chris@0
|
296 *
|
Chris@0
|
297 * The function serializes the body part of a \DOMDocument back to an (X)HTML
|
Chris@0
|
298 * snippet. The resulting (X)HTML snippet will be properly formatted to be
|
Chris@0
|
299 * compatible with HTML user agents.
|
Chris@0
|
300 *
|
Chris@0
|
301 * @param \DOMDocument $document
|
Chris@0
|
302 * A \DOMDocument object to serialize, only the tags below the first <body>
|
Chris@0
|
303 * node will be converted.
|
Chris@0
|
304 *
|
Chris@0
|
305 * @return string
|
Chris@0
|
306 * A valid (X)HTML snippet, as a string.
|
Chris@0
|
307 */
|
Chris@0
|
308 public static function serialize(\DOMDocument $document) {
|
Chris@0
|
309 $body_node = $document->getElementsByTagName('body')->item(0);
|
Chris@0
|
310 $html = '';
|
Chris@0
|
311
|
Chris@0
|
312 if ($body_node !== NULL) {
|
Chris@0
|
313 foreach ($body_node->getElementsByTagName('script') as $node) {
|
Chris@0
|
314 static::escapeCdataElement($node);
|
Chris@0
|
315 }
|
Chris@0
|
316 foreach ($body_node->getElementsByTagName('style') as $node) {
|
Chris@0
|
317 static::escapeCdataElement($node, '/*', '*/');
|
Chris@0
|
318 }
|
Chris@0
|
319 foreach ($body_node->childNodes as $node) {
|
Chris@0
|
320 $html .= $document->saveXML($node);
|
Chris@0
|
321 }
|
Chris@0
|
322 }
|
Chris@0
|
323 return $html;
|
Chris@0
|
324 }
|
Chris@0
|
325
|
Chris@0
|
326 /**
|
Chris@0
|
327 * Adds comments around a <!CDATA section in a \DOMNode.
|
Chris@0
|
328 *
|
Chris@0
|
329 * \DOMDocument::loadHTML() in \Drupal\Component\Utility\Html::load() makes
|
Chris@0
|
330 * CDATA sections from the contents of inline script and style tags. This can
|
Chris@0
|
331 * cause HTML4 browsers to throw exceptions.
|
Chris@0
|
332 *
|
Chris@0
|
333 * This function attempts to solve the problem by creating a
|
Chris@0
|
334 * \DOMDocumentFragment to comment the CDATA tag.
|
Chris@0
|
335 *
|
Chris@0
|
336 * @param \DOMNode $node
|
Chris@0
|
337 * The element potentially containing a CDATA node.
|
Chris@0
|
338 * @param string $comment_start
|
Chris@0
|
339 * (optional) A string to use as a comment start marker to escape the CDATA
|
Chris@0
|
340 * declaration. Defaults to '//'.
|
Chris@0
|
341 * @param string $comment_end
|
Chris@0
|
342 * (optional) A string to use as a comment end marker to escape the CDATA
|
Chris@0
|
343 * declaration. Defaults to an empty string.
|
Chris@0
|
344 */
|
Chris@0
|
345 public static function escapeCdataElement(\DOMNode $node, $comment_start = '//', $comment_end = '') {
|
Chris@0
|
346 foreach ($node->childNodes as $child_node) {
|
Chris@0
|
347 if ($child_node instanceof \DOMCdataSection) {
|
Chris@0
|
348 $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
|
Chris@0
|
349 $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
|
Chris@0
|
350
|
Chris@0
|
351 // Prevent invalid cdata escaping as this would throw a DOM error.
|
Chris@0
|
352 // This is the same behavior as found in libxml2.
|
Chris@0
|
353 // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection
|
Chris@0
|
354 // Fix explanation: http://wikipedia.org/wiki/CDATA#Nesting
|
Chris@0
|
355 $data = str_replace(']]>', ']]]]><![CDATA[>', $child_node->data);
|
Chris@0
|
356
|
Chris@0
|
357 $fragment = $node->ownerDocument->createDocumentFragment();
|
Chris@0
|
358 $fragment->appendXML($embed_prefix . $data . $embed_suffix);
|
Chris@0
|
359 $node->appendChild($fragment);
|
Chris@0
|
360 $node->removeChild($child_node);
|
Chris@0
|
361 }
|
Chris@0
|
362 }
|
Chris@0
|
363 }
|
Chris@0
|
364
|
Chris@0
|
365 /**
|
Chris@0
|
366 * Decodes all HTML entities including numerical ones to regular UTF-8 bytes.
|
Chris@0
|
367 *
|
Chris@0
|
368 * Double-escaped entities will only be decoded once ("&lt;" becomes
|
Chris@0
|
369 * "<", not "<"). Be careful when using this function, as it will revert
|
Chris@0
|
370 * previous sanitization efforts (<script> will become <script>).
|
Chris@0
|
371 *
|
Chris@0
|
372 * This method is not the opposite of Html::escape(). For example, this method
|
Chris@0
|
373 * will convert "é" to "é", whereas Html::escape() will not convert "é"
|
Chris@0
|
374 * to "é".
|
Chris@0
|
375 *
|
Chris@0
|
376 * @param string $text
|
Chris@0
|
377 * The text to decode entities in.
|
Chris@0
|
378 *
|
Chris@0
|
379 * @return string
|
Chris@0
|
380 * The input $text, with all HTML entities decoded once.
|
Chris@0
|
381 *
|
Chris@0
|
382 * @see html_entity_decode()
|
Chris@0
|
383 * @see \Drupal\Component\Utility\Html::escape()
|
Chris@0
|
384 */
|
Chris@0
|
385 public static function decodeEntities($text) {
|
Chris@0
|
386 return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
|
Chris@0
|
387 }
|
Chris@0
|
388
|
Chris@0
|
389 /**
|
Chris@0
|
390 * Escapes text by converting special characters to HTML entities.
|
Chris@0
|
391 *
|
Chris@0
|
392 * This method escapes HTML for sanitization purposes by replacing the
|
Chris@0
|
393 * following special characters with their HTML entity equivalents:
|
Chris@0
|
394 * - & (ampersand) becomes &
|
Chris@0
|
395 * - " (double quote) becomes "
|
Chris@0
|
396 * - ' (single quote) becomes '
|
Chris@0
|
397 * - < (less than) becomes <
|
Chris@0
|
398 * - > (greater than) becomes >
|
Chris@0
|
399 * Special characters that have already been escaped will be double-escaped
|
Chris@0
|
400 * (for example, "<" becomes "&lt;"), and invalid UTF-8 encoding
|
Chris@0
|
401 * will be converted to the Unicode replacement character ("�").
|
Chris@0
|
402 *
|
Chris@0
|
403 * This method is not the opposite of Html::decodeEntities(). For example,
|
Chris@0
|
404 * this method will not encode "é" to "é", whereas
|
Chris@0
|
405 * Html::decodeEntities() will convert all HTML entities to UTF-8 bytes,
|
Chris@0
|
406 * including "é" and "<" to "é" and "<".
|
Chris@0
|
407 *
|
Chris@0
|
408 * When constructing @link theme_render render arrays @endlink passing the output of Html::escape() to
|
Chris@0
|
409 * '#markup' is not recommended. Use the '#plain_text' key instead and the
|
Chris@0
|
410 * renderer will autoescape the text.
|
Chris@0
|
411 *
|
Chris@0
|
412 * @param string $text
|
Chris@0
|
413 * The input text.
|
Chris@0
|
414 *
|
Chris@0
|
415 * @return string
|
Chris@0
|
416 * The text with all HTML special characters converted.
|
Chris@0
|
417 *
|
Chris@0
|
418 * @see htmlspecialchars()
|
Chris@0
|
419 * @see \Drupal\Component\Utility\Html::decodeEntities()
|
Chris@0
|
420 *
|
Chris@0
|
421 * @ingroup sanitization
|
Chris@0
|
422 */
|
Chris@0
|
423 public static function escape($text) {
|
Chris@0
|
424 return htmlspecialchars($text, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
|
Chris@0
|
425 }
|
Chris@0
|
426
|
Chris@0
|
427 /**
|
Chris@0
|
428 * Converts all root-relative URLs to absolute URLs.
|
Chris@0
|
429 *
|
Chris@0
|
430 * Does not change any existing protocol-relative or absolute URLs. Does not
|
Chris@0
|
431 * change other relative URLs because they would result in different absolute
|
Chris@0
|
432 * URLs depending on the current path. For example: when the same content
|
Chris@0
|
433 * containing such a relative URL (for example 'image.png'), is served from
|
Chris@0
|
434 * its canonical URL (for example 'http://example.com/some-article') or from
|
Chris@0
|
435 * a listing or feed (for example 'http://example.com/all-articles') their
|
Chris@0
|
436 * "current path" differs, resulting in different absolute URLs:
|
Chris@0
|
437 * 'http://example.com/some-article/image.png' versus
|
Chris@0
|
438 * 'http://example.com/all-articles/image.png'. Only one can be correct.
|
Chris@0
|
439 * Therefore relative URLs that are not root-relative cannot be safely
|
Chris@0
|
440 * transformed and should generally be avoided.
|
Chris@0
|
441 *
|
Chris@0
|
442 * Necessary for HTML that is served outside of a website, for example, RSS
|
Chris@0
|
443 * and e-mail.
|
Chris@0
|
444 *
|
Chris@0
|
445 * @param string $html
|
Chris@0
|
446 * The partial (X)HTML snippet to load. Invalid markup will be corrected on
|
Chris@0
|
447 * import.
|
Chris@0
|
448 * @param string $scheme_and_host
|
Chris@0
|
449 * The root URL, which has a URI scheme, host and optional port.
|
Chris@0
|
450 *
|
Chris@0
|
451 * @return string
|
Chris@0
|
452 * The updated (X)HTML snippet.
|
Chris@0
|
453 */
|
Chris@0
|
454 public static function transformRootRelativeUrlsToAbsolute($html, $scheme_and_host) {
|
Chris@14
|
455 assert(empty(array_diff(array_keys(parse_url($scheme_and_host)), ["scheme", "host", "port"])), '$scheme_and_host contains scheme, host and port at most.');
|
Chris@14
|
456 assert(isset(parse_url($scheme_and_host)["scheme"]), '$scheme_and_host is absolute and hence has a scheme.');
|
Chris@14
|
457 assert(isset(parse_url($scheme_and_host)["host"]), '$base_url is absolute and hence has a host.');
|
Chris@0
|
458
|
Chris@0
|
459 $html_dom = Html::load($html);
|
Chris@0
|
460 $xpath = new \DOMXpath($html_dom);
|
Chris@0
|
461
|
Chris@0
|
462 // Update all root-relative URLs to absolute URLs in the given HTML.
|
Chris@0
|
463 foreach (static::$uriAttributes as $attr) {
|
Chris@0
|
464 foreach ($xpath->query("//*[starts-with(@$attr, '/') and not(starts-with(@$attr, '//'))]") as $node) {
|
Chris@0
|
465 $node->setAttribute($attr, $scheme_and_host . $node->getAttribute($attr));
|
Chris@0
|
466 }
|
Chris@0
|
467 foreach ($xpath->query("//*[@srcset]") as $node) {
|
Chris@0
|
468 // @see https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
|
Chris@0
|
469 // @see https://html.spec.whatwg.org/multipage/embedded-content.html#image-candidate-string
|
Chris@0
|
470 $image_candidate_strings = explode(',', $node->getAttribute('srcset'));
|
Chris@0
|
471 $image_candidate_strings = array_map('trim', $image_candidate_strings);
|
Chris@0
|
472 for ($i = 0; $i < count($image_candidate_strings); $i++) {
|
Chris@0
|
473 $image_candidate_string = $image_candidate_strings[$i];
|
Chris@0
|
474 if ($image_candidate_string[0] === '/' && $image_candidate_string[1] !== '/') {
|
Chris@0
|
475 $image_candidate_strings[$i] = $scheme_and_host . $image_candidate_string;
|
Chris@0
|
476 }
|
Chris@0
|
477 }
|
Chris@0
|
478 $node->setAttribute('srcset', implode(', ', $image_candidate_strings));
|
Chris@0
|
479 }
|
Chris@0
|
480 }
|
Chris@0
|
481 return Html::serialize($html_dom);
|
Chris@0
|
482 }
|
Chris@0
|
483
|
Chris@0
|
484 }
|