Mercurial > hg > isophonics-drupal-site
comparison vendor/masterminds/html5/src/HTML5.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 129ea1e6d783 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 namespace Masterminds; | |
3 | |
4 use Masterminds\HTML5\Parser\FileInputStream; | |
5 use Masterminds\HTML5\Parser\StringInputStream; | |
6 use Masterminds\HTML5\Parser\DOMTreeBuilder; | |
7 use Masterminds\HTML5\Parser\Scanner; | |
8 use Masterminds\HTML5\Parser\Tokenizer; | |
9 use Masterminds\HTML5\Serializer\OutputRules; | |
10 use Masterminds\HTML5\Serializer\Traverser; | |
11 | |
12 /** | |
13 * This class offers convenience methods for parsing and serializing HTML5. | |
14 * It is roughly designed to mirror the \DOMDocument class that is | |
15 * provided with most versions of PHP. | |
16 * | |
17 * EXPERIMENTAL. This may change or be completely replaced. | |
18 */ | |
19 class HTML5 | |
20 { | |
21 | |
22 /** | |
23 * Global options for the parser and serializer. | |
24 * | |
25 * @var array | |
26 */ | |
27 protected $options = array( | |
28 // If the serializer should encode all entities. | |
29 'encode_entities' => false | |
30 ); | |
31 | |
32 protected $errors = array(); | |
33 | |
34 public function __construct(array $options = array()) | |
35 { | |
36 $this->options = array_merge($this->options, $options); | |
37 } | |
38 | |
39 /** | |
40 * Get the default options. | |
41 * | |
42 * @return array The default options. | |
43 */ | |
44 public function getOptions() | |
45 { | |
46 return $this->options; | |
47 } | |
48 | |
49 /** | |
50 * Load and parse an HTML file. | |
51 * | |
52 * This will apply the HTML5 parser, which is tolerant of many | |
53 * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML | |
54 * 3. Note that in these cases, not all of the old data will be | |
55 * preserved. For example, XHTML's XML declaration will be removed. | |
56 * | |
57 * The rules governing parsing are set out in the HTML 5 spec. | |
58 * | |
59 * @param string $file | |
60 * The path to the file to parse. If this is a resource, it is | |
61 * assumed to be an open stream whose pointer is set to the first | |
62 * byte of input. | |
63 * @param array $options | |
64 * Configuration options when parsing the HTML | |
65 * @return \DOMDocument A DOM document. These object type is defined by the libxml | |
66 * library, and should have been included with your version of PHP. | |
67 */ | |
68 public function load($file, array $options = array()) | |
69 { | |
70 // Handle the case where file is a resource. | |
71 if (is_resource($file)) { | |
72 // FIXME: We need a StreamInputStream class. | |
73 return $this->loadHTML(stream_get_contents($file), $options); | |
74 } | |
75 | |
76 $input = new FileInputStream($file); | |
77 | |
78 return $this->parse($input, $options); | |
79 } | |
80 | |
81 /** | |
82 * Parse a HTML Document from a string. | |
83 * | |
84 * Take a string of HTML 5 (or earlier) and parse it into a | |
85 * DOMDocument. | |
86 * | |
87 * @param string $string | |
88 * A html5 document as a string. | |
89 * @param array $options | |
90 * Configuration options when parsing the HTML | |
91 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with | |
92 * almost all distribtions of PHP. | |
93 */ | |
94 public function loadHTML($string, array $options = array()) | |
95 { | |
96 $input = new StringInputStream($string); | |
97 | |
98 return $this->parse($input, $options); | |
99 } | |
100 | |
101 /** | |
102 * Convenience function to load an HTML file. | |
103 * | |
104 * This is here to provide backwards compatibility with the | |
105 * PHP DOM implementation. It simply calls load(). | |
106 * | |
107 * @param string $file | |
108 * The path to the file to parse. If this is a resource, it is | |
109 * assumed to be an open stream whose pointer is set to the first | |
110 * byte of input. | |
111 * @param array $options | |
112 * Configuration options when parsing the HTML | |
113 * | |
114 * @return \DOMDocument A DOM document. These object type is defined by the libxml | |
115 * library, and should have been included with your version of PHP. | |
116 */ | |
117 public function loadHTMLFile($file, array $options = array()) | |
118 { | |
119 return $this->load($file, $options); | |
120 } | |
121 | |
122 /** | |
123 * Parse a HTML fragment from a string. | |
124 * | |
125 * @param string $string | |
126 * The html5 fragment as a string. | |
127 * @param array $options | |
128 * Configuration options when parsing the HTML | |
129 * | |
130 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with | |
131 * almost all distributions of PHP. | |
132 */ | |
133 public function loadHTMLFragment($string, array $options = array()) | |
134 { | |
135 $input = new StringInputStream($string); | |
136 | |
137 return $this->parseFragment($input, $options); | |
138 } | |
139 | |
140 /** | |
141 * Return all errors encountered into parsing phase | |
142 * | |
143 * @return array | |
144 */ | |
145 public function getErrors() | |
146 { | |
147 return $this->errors; | |
148 } | |
149 | |
150 /** | |
151 * Return true it some errors were encountered into parsing phase | |
152 * | |
153 * @return bool | |
154 */ | |
155 public function hasErrors() | |
156 { | |
157 return count($this->errors) > 0; | |
158 } | |
159 | |
160 /** | |
161 * Parse an input stream. | |
162 * | |
163 * Lower-level loading function. This requires an input stream instead | |
164 * of a string, file, or resource. | |
165 */ | |
166 public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) | |
167 { | |
168 $this->errors = array(); | |
169 $options = array_merge($this->getOptions(), $options); | |
170 $events = new DOMTreeBuilder(false, $options); | |
171 $scanner = new Scanner($input); | |
172 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); | |
173 | |
174 $parser->parse(); | |
175 $this->errors = $events->getErrors(); | |
176 | |
177 return $events->document(); | |
178 } | |
179 | |
180 /** | |
181 * Parse an input stream where the stream is a fragment. | |
182 * | |
183 * Lower-level loading function. This requires an input stream instead | |
184 * of a string, file, or resource. | |
185 */ | |
186 public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) | |
187 { | |
188 $options = array_merge($this->getOptions(), $options); | |
189 $events = new DOMTreeBuilder(true, $options); | |
190 $scanner = new Scanner($input); | |
191 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); | |
192 | |
193 $parser->parse(); | |
194 $this->errors = $events->getErrors(); | |
195 | |
196 return $events->fragment(); | |
197 } | |
198 | |
199 /** | |
200 * Save a DOM into a given file as HTML5. | |
201 * | |
202 * @param mixed $dom | |
203 * The DOM to be serialized. | |
204 * @param string $file | |
205 * The filename to be written. | |
206 * @param array $options | |
207 * Configuration options when serializing the DOM. These include: | |
208 * - encode_entities: Text written to the output is escaped by default and not all | |
209 * entities are encoded. If this is set to true all entities will be encoded. | |
210 * Defaults to false. | |
211 */ | |
212 public function save($dom, $file, $options = array()) | |
213 { | |
214 $close = true; | |
215 if (is_resource($file)) { | |
216 $stream = $file; | |
217 $close = false; | |
218 } else { | |
219 $stream = fopen($file, 'w'); | |
220 } | |
221 $options = array_merge($this->getOptions(), $options); | |
222 $rules = new OutputRules($stream, $options); | |
223 $trav = new Traverser($dom, $stream, $rules, $options); | |
224 | |
225 $trav->walk(); | |
226 | |
227 if ($close) { | |
228 fclose($stream); | |
229 } | |
230 } | |
231 | |
232 /** | |
233 * Convert a DOM into an HTML5 string. | |
234 * | |
235 * @param mixed $dom | |
236 * The DOM to be serialized. | |
237 * @param array $options | |
238 * Configuration options when serializing the DOM. These include: | |
239 * - encode_entities: Text written to the output is escaped by default and not all | |
240 * entities are encoded. If this is set to true all entities will be encoded. | |
241 * Defaults to false. | |
242 * | |
243 * @return string A HTML5 documented generated from the DOM. | |
244 */ | |
245 public function saveHTML($dom, $options = array()) | |
246 { | |
247 $stream = fopen('php://temp', 'w'); | |
248 $this->save($dom, $stream, array_merge($this->getOptions(), $options)); | |
249 | |
250 return stream_get_contents($stream, - 1, 0); | |
251 } | |
252 } |