Mercurial > hg > cmmr2012-drupal-site
comparison vendor/masterminds/html5/src/HTML5.php @ 4:a9cd425dd02b
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:11:55 +0000 |
parents | c75dbcec494b |
children | 12f9dff5fda9 |
comparison
equal
deleted
inserted
replaced
3:307d7a7fd348 | 4:a9cd425dd02b |
---|---|
1 <?php | 1 <?php |
2 | |
2 namespace Masterminds; | 3 namespace Masterminds; |
3 | 4 |
4 use Masterminds\HTML5\Parser\FileInputStream; | |
5 use Masterminds\HTML5\Parser\StringInputStream; | |
6 use Masterminds\HTML5\Parser\DOMTreeBuilder; | 5 use Masterminds\HTML5\Parser\DOMTreeBuilder; |
7 use Masterminds\HTML5\Parser\Scanner; | 6 use Masterminds\HTML5\Parser\Scanner; |
8 use Masterminds\HTML5\Parser\Tokenizer; | 7 use Masterminds\HTML5\Parser\Tokenizer; |
9 use Masterminds\HTML5\Serializer\OutputRules; | 8 use Masterminds\HTML5\Serializer\OutputRules; |
10 use Masterminds\HTML5\Serializer\Traverser; | 9 use Masterminds\HTML5\Serializer\Traverser; |
11 | 10 |
12 /** | 11 /** |
13 * This class offers convenience methods for parsing and serializing HTML5. | 12 * This class offers convenience methods for parsing and serializing HTML5. |
14 * It is roughly designed to mirror the \DOMDocument class that is | 13 * It is roughly designed to mirror the \DOMDocument native class. |
15 * provided with most versions of PHP. | |
16 * | |
17 * EXPERIMENTAL. This may change or be completely replaced. | |
18 */ | 14 */ |
19 class HTML5 | 15 class HTML5 |
20 { | 16 { |
21 | |
22 /** | 17 /** |
23 * Global options for the parser and serializer. | 18 * Global options for the parser and serializer. |
24 * | 19 * |
25 * @var array | 20 * @var array |
26 */ | 21 */ |
27 protected $options = array( | 22 private $defaultOptions = array( |
28 // If the serializer should encode all entities. | 23 // Whether the serializer should aggressively encode all characters as entities. |
29 'encode_entities' => false | 24 'encode_entities' => false, |
25 | |
26 // Prevents the parser from automatically assigning the HTML5 namespace to the DOM document. | |
27 'disable_html_ns' => false, | |
30 ); | 28 ); |
31 | 29 |
32 protected $errors = array(); | 30 protected $errors = array(); |
33 | 31 |
34 public function __construct(array $options = array()) | 32 public function __construct(array $defaultOptions = array()) |
35 { | 33 { |
36 $this->options = array_merge($this->options, $options); | 34 $this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions); |
37 } | 35 } |
38 | 36 |
39 /** | 37 /** |
40 * Get the default options. | 38 * Get the current default options. |
41 * | 39 * |
42 * @return array The default options. | 40 * @return array |
43 */ | 41 */ |
44 public function getOptions() | 42 public function getOptions() |
45 { | 43 { |
46 return $this->options; | 44 return $this->defaultOptions; |
47 } | 45 } |
48 | 46 |
49 /** | 47 /** |
50 * Load and parse an HTML file. | 48 * Load and parse an HTML file. |
51 * | 49 * |
54 * 3. Note that in these cases, not all of the old data will be | 52 * 3. Note that in these cases, not all of the old data will be |
55 * preserved. For example, XHTML's XML declaration will be removed. | 53 * preserved. For example, XHTML's XML declaration will be removed. |
56 * | 54 * |
57 * The rules governing parsing are set out in the HTML 5 spec. | 55 * The rules governing parsing are set out in the HTML 5 spec. |
58 * | 56 * |
59 * @param string $file | 57 * @param string|resource $file The path to the file to parse. If this is a resource, it is |
60 * The path to the file to parse. If this is a resource, it is | 58 * assumed to be an open stream whose pointer is set to the first |
61 * assumed to be an open stream whose pointer is set to the first | 59 * byte of input. |
62 * byte of input. | 60 * @param array $options Configuration options when parsing the HTML. |
63 * @param array $options | 61 * |
64 * Configuration options when parsing the HTML | |
65 * @return \DOMDocument A DOM document. These object type is defined by the libxml | 62 * @return \DOMDocument A DOM document. These object type is defined by the libxml |
66 * library, and should have been included with your version of PHP. | 63 * library, and should have been included with your version of PHP. |
67 */ | 64 */ |
68 public function load($file, array $options = array()) | 65 public function load($file, array $options = array()) |
69 { | 66 { |
70 // Handle the case where file is a resource. | 67 // Handle the case where file is a resource. |
71 if (is_resource($file)) { | 68 if (is_resource($file)) { |
72 // FIXME: We need a StreamInputStream class. | 69 return $this->parse(stream_get_contents($file), $options); |
73 return $this->loadHTML(stream_get_contents($file), $options); | |
74 } | 70 } |
75 | 71 |
76 $input = new FileInputStream($file); | 72 return $this->parse(file_get_contents($file), $options); |
77 | |
78 return $this->parse($input, $options); | |
79 } | 73 } |
80 | 74 |
81 /** | 75 /** |
82 * Parse a HTML Document from a string. | 76 * Parse a HTML Document from a string. |
83 * | 77 * |
84 * Take a string of HTML 5 (or earlier) and parse it into a | 78 * Take a string of HTML 5 (or earlier) and parse it into a |
85 * DOMDocument. | 79 * DOMDocument. |
86 * | 80 * |
87 * @param string $string | 81 * @param string $string A html5 document as a string. |
88 * A html5 document as a string. | 82 * @param array $options Configuration options when parsing the HTML. |
89 * @param array $options | 83 * |
90 * Configuration options when parsing the HTML | |
91 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with | 84 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with |
92 * almost all distribtions of PHP. | 85 * almost all distribtions of PHP. |
93 */ | 86 */ |
94 public function loadHTML($string, array $options = array()) | 87 public function loadHTML($string, array $options = array()) |
95 { | 88 { |
96 $input = new StringInputStream($string); | 89 return $this->parse($string, $options); |
97 | |
98 return $this->parse($input, $options); | |
99 } | 90 } |
100 | 91 |
101 /** | 92 /** |
102 * Convenience function to load an HTML file. | 93 * Convenience function to load an HTML file. |
103 * | 94 * |
104 * This is here to provide backwards compatibility with the | 95 * This is here to provide backwards compatibility with the |
105 * PHP DOM implementation. It simply calls load(). | 96 * PHP DOM implementation. It simply calls load(). |
106 * | 97 * |
107 * @param string $file | 98 * @param string $file The path to the file to parse. If this is a resource, it is |
108 * The path to the file to parse. If this is a resource, it is | 99 * assumed to be an open stream whose pointer is set to the first |
109 * assumed to be an open stream whose pointer is set to the first | 100 * byte of input. |
110 * byte of input. | 101 * @param array $options Configuration options when parsing the HTML. |
111 * @param array $options | |
112 * Configuration options when parsing the HTML | |
113 * | 102 * |
114 * @return \DOMDocument A DOM document. These object type is defined by the libxml | 103 * @return \DOMDocument A DOM document. These object type is defined by the libxml |
115 * library, and should have been included with your version of PHP. | 104 * library, and should have been included with your version of PHP. |
116 */ | 105 */ |
117 public function loadHTMLFile($file, array $options = array()) | 106 public function loadHTMLFile($file, array $options = array()) |
118 { | 107 { |
119 return $this->load($file, $options); | 108 return $this->load($file, $options); |
120 } | 109 } |
121 | 110 |
122 /** | 111 /** |
123 * Parse a HTML fragment from a string. | 112 * Parse a HTML fragment from a string. |
124 * | 113 * |
125 * @param string $string | 114 * @param string $string the HTML5 fragment as a string |
126 * The html5 fragment as a string. | 115 * @param array $options Configuration options when parsing the HTML |
127 * @param array $options | |
128 * Configuration options when parsing the HTML | |
129 * | 116 * |
130 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with | 117 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with |
131 * almost all distributions of PHP. | 118 * almost all distributions of PHP. |
132 */ | 119 */ |
133 public function loadHTMLFragment($string, array $options = array()) | 120 public function loadHTMLFragment($string, array $options = array()) |
134 { | 121 { |
135 $input = new StringInputStream($string); | 122 return $this->parseFragment($string, $options); |
136 | 123 } |
137 return $this->parseFragment($input, $options); | 124 |
138 } | 125 /** |
139 | 126 * Return all errors encountered into parsing phase. |
140 /** | |
141 * Return all errors encountered into parsing phase | |
142 * | 127 * |
143 * @return array | 128 * @return array |
144 */ | 129 */ |
145 public function getErrors() | 130 public function getErrors() |
146 { | 131 { |
147 return $this->errors; | 132 return $this->errors; |
148 } | 133 } |
149 | 134 |
150 /** | 135 /** |
151 * Return true it some errors were encountered into parsing phase | 136 * Return true it some errors were encountered into parsing phase. |
152 * | 137 * |
153 * @return bool | 138 * @return bool |
154 */ | 139 */ |
155 public function hasErrors() | 140 public function hasErrors() |
156 { | 141 { |
157 return count($this->errors) > 0; | 142 return count($this->errors) > 0; |
158 } | 143 } |
159 | 144 |
160 /** | 145 /** |
161 * Parse an input stream. | 146 * Parse an input string. |
147 * | |
148 * @param string $input | |
149 * @param array $options | |
150 * | |
151 * @return \DOMDocument | |
152 */ | |
153 public function parse($input, array $options = array()) | |
154 { | |
155 $this->errors = array(); | |
156 $options = array_merge($this->defaultOptions, $options); | |
157 $events = new DOMTreeBuilder(false, $options); | |
158 $scanner = new Scanner($input); | |
159 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML); | |
160 | |
161 $parser->parse(); | |
162 $this->errors = $events->getErrors(); | |
163 | |
164 return $events->document(); | |
165 } | |
166 | |
167 /** | |
168 * Parse an input stream where the stream is a fragment. | |
162 * | 169 * |
163 * Lower-level loading function. This requires an input stream instead | 170 * Lower-level loading function. This requires an input stream instead |
164 * of a string, file, or resource. | 171 * of a string, file, or resource. |
165 */ | 172 * |
166 public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) | 173 * @param string $input The input data to parse in the form of a string. |
167 { | 174 * @param array $options An array of options. |
168 $this->errors = array(); | 175 * |
169 $options = array_merge($this->getOptions(), $options); | 176 * @return \DOMDocumentFragment |
170 $events = new DOMTreeBuilder(false, $options); | 177 */ |
178 public function parseFragment($input, array $options = array()) | |
179 { | |
180 $options = array_merge($this->defaultOptions, $options); | |
181 $events = new DOMTreeBuilder(true, $options); | |
171 $scanner = new Scanner($input); | 182 $scanner = new Scanner($input); |
172 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); | 183 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML); |
173 | 184 |
174 $parser->parse(); | 185 $parser->parse(); |
175 $this->errors = $events->getErrors(); | 186 $this->errors = $events->getErrors(); |
176 | 187 |
177 return $events->document(); | |
178 } | |
179 | |
180 /** | |
181 * Parse an input stream where the stream is a fragment. | |
182 * | |
183 * Lower-level loading function. This requires an input stream instead | |
184 * of a string, file, or resource. | |
185 */ | |
186 public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) | |
187 { | |
188 $options = array_merge($this->getOptions(), $options); | |
189 $events = new DOMTreeBuilder(true, $options); | |
190 $scanner = new Scanner($input); | |
191 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); | |
192 | |
193 $parser->parse(); | |
194 $this->errors = $events->getErrors(); | |
195 | |
196 return $events->fragment(); | 188 return $events->fragment(); |
197 } | 189 } |
198 | 190 |
199 /** | 191 /** |
200 * Save a DOM into a given file as HTML5. | 192 * Save a DOM into a given file as HTML5. |
201 * | 193 * |
202 * @param mixed $dom | 194 * @param mixed $dom The DOM to be serialized. |
203 * The DOM to be serialized. | 195 * @param string|resource $file The filename to be written or resource to write to. |
204 * @param string $file | 196 * @param array $options Configuration options when serializing the DOM. These include: |
205 * The filename to be written. | 197 * - encode_entities: Text written to the output is escaped by default and not all |
206 * @param array $options | 198 * entities are encoded. If this is set to true all entities will be encoded. |
207 * Configuration options when serializing the DOM. These include: | 199 * Defaults to false. |
208 * - encode_entities: Text written to the output is escaped by default and not all | |
209 * entities are encoded. If this is set to true all entities will be encoded. | |
210 * Defaults to false. | |
211 */ | 200 */ |
212 public function save($dom, $file, $options = array()) | 201 public function save($dom, $file, $options = array()) |
213 { | 202 { |
214 $close = true; | 203 $close = true; |
215 if (is_resource($file)) { | 204 if (is_resource($file)) { |
216 $stream = $file; | 205 $stream = $file; |
217 $close = false; | 206 $close = false; |
218 } else { | 207 } else { |
219 $stream = fopen($file, 'w'); | 208 $stream = fopen($file, 'wb'); |
220 } | 209 } |
221 $options = array_merge($this->getOptions(), $options); | 210 $options = array_merge($this->defaultOptions, $options); |
222 $rules = new OutputRules($stream, $options); | 211 $rules = new OutputRules($stream, $options); |
223 $trav = new Traverser($dom, $stream, $rules, $options); | 212 $trav = new Traverser($dom, $stream, $rules, $options); |
224 | 213 |
225 $trav->walk(); | 214 $trav->walk(); |
226 | 215 |
230 } | 219 } |
231 | 220 |
232 /** | 221 /** |
233 * Convert a DOM into an HTML5 string. | 222 * Convert a DOM into an HTML5 string. |
234 * | 223 * |
235 * @param mixed $dom | 224 * @param mixed $dom The DOM to be serialized. |
236 * The DOM to be serialized. | 225 * @param array $options Configuration options when serializing the DOM. These include: |
237 * @param array $options | 226 * - encode_entities: Text written to the output is escaped by default and not all |
238 * Configuration options when serializing the DOM. These include: | 227 * entities are encoded. If this is set to true all entities will be encoded. |
239 * - encode_entities: Text written to the output is escaped by default and not all | 228 * Defaults to false. |
240 * entities are encoded. If this is set to true all entities will be encoded. | |
241 * Defaults to false. | |
242 * | 229 * |
243 * @return string A HTML5 documented generated from the DOM. | 230 * @return string A HTML5 documented generated from the DOM. |
244 */ | 231 */ |
245 public function saveHTML($dom, $options = array()) | 232 public function saveHTML($dom, $options = array()) |
246 { | 233 { |
247 $stream = fopen('php://temp', 'w'); | 234 $stream = fopen('php://temp', 'wb'); |
248 $this->save($dom, $stream, array_merge($this->getOptions(), $options)); | 235 $this->save($dom, $stream, array_merge($this->defaultOptions, $options)); |
249 | 236 |
250 return stream_get_contents($stream, - 1, 0); | 237 return stream_get_contents($stream, -1, 0); |
251 } | 238 } |
252 } | 239 } |