comparison vendor/masterminds/html5/src/HTML5.php @ 4:a9cd425dd02b

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:11:55 +0000
parents c75dbcec494b
children 12f9dff5fda9
comparison
equal deleted inserted replaced
3:307d7a7fd348 4:a9cd425dd02b
1 <?php 1 <?php
2
2 namespace Masterminds; 3 namespace Masterminds;
3 4
4 use Masterminds\HTML5\Parser\FileInputStream;
5 use Masterminds\HTML5\Parser\StringInputStream;
6 use Masterminds\HTML5\Parser\DOMTreeBuilder; 5 use Masterminds\HTML5\Parser\DOMTreeBuilder;
7 use Masterminds\HTML5\Parser\Scanner; 6 use Masterminds\HTML5\Parser\Scanner;
8 use Masterminds\HTML5\Parser\Tokenizer; 7 use Masterminds\HTML5\Parser\Tokenizer;
9 use Masterminds\HTML5\Serializer\OutputRules; 8 use Masterminds\HTML5\Serializer\OutputRules;
10 use Masterminds\HTML5\Serializer\Traverser; 9 use Masterminds\HTML5\Serializer\Traverser;
11 10
12 /** 11 /**
13 * This class offers convenience methods for parsing and serializing HTML5. 12 * This class offers convenience methods for parsing and serializing HTML5.
14 * It is roughly designed to mirror the \DOMDocument class that is 13 * It is roughly designed to mirror the \DOMDocument native class.
15 * provided with most versions of PHP.
16 *
17 * EXPERIMENTAL. This may change or be completely replaced.
18 */ 14 */
19 class HTML5 15 class HTML5
20 { 16 {
21
22 /** 17 /**
23 * Global options for the parser and serializer. 18 * Global options for the parser and serializer.
24 * 19 *
25 * @var array 20 * @var array
26 */ 21 */
27 protected $options = array( 22 private $defaultOptions = array(
28 // If the serializer should encode all entities. 23 // Whether the serializer should aggressively encode all characters as entities.
29 'encode_entities' => false 24 'encode_entities' => false,
25
26 // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
27 'disable_html_ns' => false,
30 ); 28 );
31 29
32 protected $errors = array(); 30 protected $errors = array();
33 31
34 public function __construct(array $options = array()) 32 public function __construct(array $defaultOptions = array())
35 { 33 {
36 $this->options = array_merge($this->options, $options); 34 $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
37 } 35 }
38 36
39 /** 37 /**
40 * Get the default options. 38 * Get the current default options.
41 * 39 *
42 * @return array The default options. 40 * @return array
43 */ 41 */
44 public function getOptions() 42 public function getOptions()
45 { 43 {
46 return $this->options; 44 return $this->defaultOptions;
47 } 45 }
48 46
49 /** 47 /**
50 * Load and parse an HTML file. 48 * Load and parse an HTML file.
51 * 49 *
54 * 3. Note that in these cases, not all of the old data will be 52 * 3. Note that in these cases, not all of the old data will be
55 * preserved. For example, XHTML's XML declaration will be removed. 53 * preserved. For example, XHTML's XML declaration will be removed.
56 * 54 *
57 * The rules governing parsing are set out in the HTML 5 spec. 55 * The rules governing parsing are set out in the HTML 5 spec.
58 * 56 *
59 * @param string $file 57 * @param string|resource $file The path to the file to parse. If this is a resource, it is
60 * The path to the file to parse. If this is a resource, it is 58 * assumed to be an open stream whose pointer is set to the first
61 * assumed to be an open stream whose pointer is set to the first 59 * byte of input.
62 * byte of input. 60 * @param array $options Configuration options when parsing the HTML.
63 * @param array $options 61 *
64 * Configuration options when parsing the HTML
65 * @return \DOMDocument A DOM document. These object type is defined by the libxml 62 * @return \DOMDocument A DOM document. These object type is defined by the libxml
66 * library, and should have been included with your version of PHP. 63 * library, and should have been included with your version of PHP.
67 */ 64 */
68 public function load($file, array $options = array()) 65 public function load($file, array $options = array())
69 { 66 {
70 // Handle the case where file is a resource. 67 // Handle the case where file is a resource.
71 if (is_resource($file)) { 68 if (is_resource($file)) {
72 // FIXME: We need a StreamInputStream class. 69 return $this->parse(stream_get_contents($file), $options);
73 return $this->loadHTML(stream_get_contents($file), $options);
74 } 70 }
75 71
76 $input = new FileInputStream($file); 72 return $this->parse(file_get_contents($file), $options);
77
78 return $this->parse($input, $options);
79 } 73 }
80 74
81 /** 75 /**
82 * Parse a HTML Document from a string. 76 * Parse a HTML Document from a string.
83 * 77 *
84 * Take a string of HTML 5 (or earlier) and parse it into a 78 * Take a string of HTML 5 (or earlier) and parse it into a
85 * DOMDocument. 79 * DOMDocument.
86 * 80 *
87 * @param string $string 81 * @param string $string A html5 document as a string.
88 * A html5 document as a string. 82 * @param array $options Configuration options when parsing the HTML.
89 * @param array $options 83 *
90 * Configuration options when parsing the HTML
91 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with 84 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
92 * almost all distribtions of PHP. 85 * almost all distribtions of PHP.
93 */ 86 */
94 public function loadHTML($string, array $options = array()) 87 public function loadHTML($string, array $options = array())
95 { 88 {
96 $input = new StringInputStream($string); 89 return $this->parse($string, $options);
97
98 return $this->parse($input, $options);
99 } 90 }
100 91
101 /** 92 /**
102 * Convenience function to load an HTML file. 93 * Convenience function to load an HTML file.
103 * 94 *
104 * This is here to provide backwards compatibility with the 95 * This is here to provide backwards compatibility with the
105 * PHP DOM implementation. It simply calls load(). 96 * PHP DOM implementation. It simply calls load().
106 * 97 *
107 * @param string $file 98 * @param string $file The path to the file to parse. If this is a resource, it is
108 * The path to the file to parse. If this is a resource, it is 99 * assumed to be an open stream whose pointer is set to the first
109 * assumed to be an open stream whose pointer is set to the first 100 * byte of input.
110 * byte of input. 101 * @param array $options Configuration options when parsing the HTML.
111 * @param array $options
112 * Configuration options when parsing the HTML
113 * 102 *
114 * @return \DOMDocument A DOM document. These object type is defined by the libxml 103 * @return \DOMDocument A DOM document. These object type is defined by the libxml
115 * library, and should have been included with your version of PHP. 104 * library, and should have been included with your version of PHP.
116 */ 105 */
117 public function loadHTMLFile($file, array $options = array()) 106 public function loadHTMLFile($file, array $options = array())
118 { 107 {
119 return $this->load($file, $options); 108 return $this->load($file, $options);
120 } 109 }
121 110
122 /** 111 /**
123 * Parse a HTML fragment from a string. 112 * Parse a HTML fragment from a string.
124 * 113 *
125 * @param string $string 114 * @param string $string the HTML5 fragment as a string
126 * The html5 fragment as a string. 115 * @param array $options Configuration options when parsing the HTML
127 * @param array $options
128 * Configuration options when parsing the HTML
129 * 116 *
130 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with 117 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
131 * almost all distributions of PHP. 118 * almost all distributions of PHP.
132 */ 119 */
133 public function loadHTMLFragment($string, array $options = array()) 120 public function loadHTMLFragment($string, array $options = array())
134 { 121 {
135 $input = new StringInputStream($string); 122 return $this->parseFragment($string, $options);
136 123 }
137 return $this->parseFragment($input, $options); 124
138 } 125 /**
139 126 * Return all errors encountered into parsing phase.
140 /**
141 * Return all errors encountered into parsing phase
142 * 127 *
143 * @return array 128 * @return array
144 */ 129 */
145 public function getErrors() 130 public function getErrors()
146 { 131 {
147 return $this->errors; 132 return $this->errors;
148 } 133 }
149 134
150 /** 135 /**
151 * Return true it some errors were encountered into parsing phase 136 * Return true it some errors were encountered into parsing phase.
152 * 137 *
153 * @return bool 138 * @return bool
154 */ 139 */
155 public function hasErrors() 140 public function hasErrors()
156 { 141 {
157 return count($this->errors) > 0; 142 return count($this->errors) > 0;
158 } 143 }
159 144
160 /** 145 /**
161 * Parse an input stream. 146 * Parse an input string.
147 *
148 * @param string $input
149 * @param array $options
150 *
151 * @return \DOMDocument
152 */
153 public function parse($input, array $options = array())
154 {
155 $this->errors = array();
156 $options = array_merge($this->defaultOptions, $options);
157 $events = new DOMTreeBuilder(false, $options);
158 $scanner = new Scanner($input);
159 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
160
161 $parser->parse();
162 $this->errors = $events->getErrors();
163
164 return $events->document();
165 }
166
167 /**
168 * Parse an input stream where the stream is a fragment.
162 * 169 *
163 * Lower-level loading function. This requires an input stream instead 170 * Lower-level loading function. This requires an input stream instead
164 * of a string, file, or resource. 171 * of a string, file, or resource.
165 */ 172 *
166 public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) 173 * @param string $input The input data to parse in the form of a string.
167 { 174 * @param array $options An array of options.
168 $this->errors = array(); 175 *
169 $options = array_merge($this->getOptions(), $options); 176 * @return \DOMDocumentFragment
170 $events = new DOMTreeBuilder(false, $options); 177 */
178 public function parseFragment($input, array $options = array())
179 {
180 $options = array_merge($this->defaultOptions, $options);
181 $events = new DOMTreeBuilder(true, $options);
171 $scanner = new Scanner($input); 182 $scanner = new Scanner($input);
172 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); 183 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
173 184
174 $parser->parse(); 185 $parser->parse();
175 $this->errors = $events->getErrors(); 186 $this->errors = $events->getErrors();
176 187
177 return $events->document();
178 }
179
180 /**
181 * Parse an input stream where the stream is a fragment.
182 *
183 * Lower-level loading function. This requires an input stream instead
184 * of a string, file, or resource.
185 */
186 public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array())
187 {
188 $options = array_merge($this->getOptions(), $options);
189 $events = new DOMTreeBuilder(true, $options);
190 $scanner = new Scanner($input);
191 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
192
193 $parser->parse();
194 $this->errors = $events->getErrors();
195
196 return $events->fragment(); 188 return $events->fragment();
197 } 189 }
198 190
199 /** 191 /**
200 * Save a DOM into a given file as HTML5. 192 * Save a DOM into a given file as HTML5.
201 * 193 *
202 * @param mixed $dom 194 * @param mixed $dom The DOM to be serialized.
203 * The DOM to be serialized. 195 * @param string|resource $file The filename to be written or resource to write to.
204 * @param string $file 196 * @param array $options Configuration options when serializing the DOM. These include:
205 * The filename to be written. 197 * - encode_entities: Text written to the output is escaped by default and not all
206 * @param array $options 198 * entities are encoded. If this is set to true all entities will be encoded.
207 * Configuration options when serializing the DOM. These include: 199 * Defaults to false.
208 * - encode_entities: Text written to the output is escaped by default and not all
209 * entities are encoded. If this is set to true all entities will be encoded.
210 * Defaults to false.
211 */ 200 */
212 public function save($dom, $file, $options = array()) 201 public function save($dom, $file, $options = array())
213 { 202 {
214 $close = true; 203 $close = true;
215 if (is_resource($file)) { 204 if (is_resource($file)) {
216 $stream = $file; 205 $stream = $file;
217 $close = false; 206 $close = false;
218 } else { 207 } else {
219 $stream = fopen($file, 'w'); 208 $stream = fopen($file, 'wb');
220 } 209 }
221 $options = array_merge($this->getOptions(), $options); 210 $options = array_merge($this->defaultOptions, $options);
222 $rules = new OutputRules($stream, $options); 211 $rules = new OutputRules($stream, $options);
223 $trav = new Traverser($dom, $stream, $rules, $options); 212 $trav = new Traverser($dom, $stream, $rules, $options);
224 213
225 $trav->walk(); 214 $trav->walk();
226 215
230 } 219 }
231 220
232 /** 221 /**
233 * Convert a DOM into an HTML5 string. 222 * Convert a DOM into an HTML5 string.
234 * 223 *
235 * @param mixed $dom 224 * @param mixed $dom The DOM to be serialized.
236 * The DOM to be serialized. 225 * @param array $options Configuration options when serializing the DOM. These include:
237 * @param array $options 226 * - encode_entities: Text written to the output is escaped by default and not all
238 * Configuration options when serializing the DOM. These include: 227 * entities are encoded. If this is set to true all entities will be encoded.
239 * - encode_entities: Text written to the output is escaped by default and not all 228 * Defaults to false.
240 * entities are encoded. If this is set to true all entities will be encoded.
241 * Defaults to false.
242 * 229 *
243 * @return string A HTML5 documented generated from the DOM. 230 * @return string A HTML5 documented generated from the DOM.
244 */ 231 */
245 public function saveHTML($dom, $options = array()) 232 public function saveHTML($dom, $options = array())
246 { 233 {
247 $stream = fopen('php://temp', 'w'); 234 $stream = fopen('php://temp', 'wb');
248 $this->save($dom, $stream, array_merge($this->getOptions(), $options)); 235 $this->save($dom, $stream, array_merge($this->defaultOptions, $options));
249 236
250 return stream_get_contents($stream, - 1, 0); 237 return stream_get_contents($stream, -1, 0);
251 } 238 }
252 } 239 }