comparison vendor/masterminds/html5/src/HTML5.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 129ea1e6d783
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2 namespace Masterminds;
3
4 use Masterminds\HTML5\Parser\FileInputStream;
5 use Masterminds\HTML5\Parser\StringInputStream;
6 use Masterminds\HTML5\Parser\DOMTreeBuilder;
7 use Masterminds\HTML5\Parser\Scanner;
8 use Masterminds\HTML5\Parser\Tokenizer;
9 use Masterminds\HTML5\Serializer\OutputRules;
10 use Masterminds\HTML5\Serializer\Traverser;
11
12 /**
13 * This class offers convenience methods for parsing and serializing HTML5.
14 * It is roughly designed to mirror the \DOMDocument class that is
15 * provided with most versions of PHP.
16 *
17 * EXPERIMENTAL. This may change or be completely replaced.
18 */
19 class HTML5
20 {
21
22 /**
23 * Global options for the parser and serializer.
24 *
25 * @var array
26 */
27 protected $options = array(
28 // If the serializer should encode all entities.
29 'encode_entities' => false
30 );
31
32 protected $errors = array();
33
34 public function __construct(array $options = array())
35 {
36 $this->options = array_merge($this->options, $options);
37 }
38
39 /**
40 * Get the default options.
41 *
42 * @return array The default options.
43 */
44 public function getOptions()
45 {
46 return $this->options;
47 }
48
49 /**
50 * Load and parse an HTML file.
51 *
52 * This will apply the HTML5 parser, which is tolerant of many
53 * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
54 * 3. Note that in these cases, not all of the old data will be
55 * preserved. For example, XHTML's XML declaration will be removed.
56 *
57 * The rules governing parsing are set out in the HTML 5 spec.
58 *
59 * @param string $file
60 * The path to the file to parse. If this is a resource, it is
61 * assumed to be an open stream whose pointer is set to the first
62 * byte of input.
63 * @param array $options
64 * Configuration options when parsing the HTML
65 * @return \DOMDocument A DOM document. These object type is defined by the libxml
66 * library, and should have been included with your version of PHP.
67 */
68 public function load($file, array $options = array())
69 {
70 // Handle the case where file is a resource.
71 if (is_resource($file)) {
72 // FIXME: We need a StreamInputStream class.
73 return $this->loadHTML(stream_get_contents($file), $options);
74 }
75
76 $input = new FileInputStream($file);
77
78 return $this->parse($input, $options);
79 }
80
81 /**
82 * Parse a HTML Document from a string.
83 *
84 * Take a string of HTML 5 (or earlier) and parse it into a
85 * DOMDocument.
86 *
87 * @param string $string
88 * A html5 document as a string.
89 * @param array $options
90 * Configuration options when parsing the HTML
91 * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
92 * almost all distribtions of PHP.
93 */
94 public function loadHTML($string, array $options = array())
95 {
96 $input = new StringInputStream($string);
97
98 return $this->parse($input, $options);
99 }
100
101 /**
102 * Convenience function to load an HTML file.
103 *
104 * This is here to provide backwards compatibility with the
105 * PHP DOM implementation. It simply calls load().
106 *
107 * @param string $file
108 * The path to the file to parse. If this is a resource, it is
109 * assumed to be an open stream whose pointer is set to the first
110 * byte of input.
111 * @param array $options
112 * Configuration options when parsing the HTML
113 *
114 * @return \DOMDocument A DOM document. These object type is defined by the libxml
115 * library, and should have been included with your version of PHP.
116 */
117 public function loadHTMLFile($file, array $options = array())
118 {
119 return $this->load($file, $options);
120 }
121
122 /**
123 * Parse a HTML fragment from a string.
124 *
125 * @param string $string
126 * The html5 fragment as a string.
127 * @param array $options
128 * Configuration options when parsing the HTML
129 *
130 * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
131 * almost all distributions of PHP.
132 */
133 public function loadHTMLFragment($string, array $options = array())
134 {
135 $input = new StringInputStream($string);
136
137 return $this->parseFragment($input, $options);
138 }
139
140 /**
141 * Return all errors encountered into parsing phase
142 *
143 * @return array
144 */
145 public function getErrors()
146 {
147 return $this->errors;
148 }
149
150 /**
151 * Return true it some errors were encountered into parsing phase
152 *
153 * @return bool
154 */
155 public function hasErrors()
156 {
157 return count($this->errors) > 0;
158 }
159
160 /**
161 * Parse an input stream.
162 *
163 * Lower-level loading function. This requires an input stream instead
164 * of a string, file, or resource.
165 */
166 public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array())
167 {
168 $this->errors = array();
169 $options = array_merge($this->getOptions(), $options);
170 $events = new DOMTreeBuilder(false, $options);
171 $scanner = new Scanner($input);
172 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
173
174 $parser->parse();
175 $this->errors = $events->getErrors();
176
177 return $events->document();
178 }
179
180 /**
181 * Parse an input stream where the stream is a fragment.
182 *
183 * Lower-level loading function. This requires an input stream instead
184 * of a string, file, or resource.
185 */
186 public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array())
187 {
188 $options = array_merge($this->getOptions(), $options);
189 $events = new DOMTreeBuilder(true, $options);
190 $scanner = new Scanner($input);
191 $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
192
193 $parser->parse();
194 $this->errors = $events->getErrors();
195
196 return $events->fragment();
197 }
198
199 /**
200 * Save a DOM into a given file as HTML5.
201 *
202 * @param mixed $dom
203 * The DOM to be serialized.
204 * @param string $file
205 * The filename to be written.
206 * @param array $options
207 * Configuration options when serializing the DOM. These include:
208 * - encode_entities: Text written to the output is escaped by default and not all
209 * entities are encoded. If this is set to true all entities will be encoded.
210 * Defaults to false.
211 */
212 public function save($dom, $file, $options = array())
213 {
214 $close = true;
215 if (is_resource($file)) {
216 $stream = $file;
217 $close = false;
218 } else {
219 $stream = fopen($file, 'w');
220 }
221 $options = array_merge($this->getOptions(), $options);
222 $rules = new OutputRules($stream, $options);
223 $trav = new Traverser($dom, $stream, $rules, $options);
224
225 $trav->walk();
226
227 if ($close) {
228 fclose($stream);
229 }
230 }
231
232 /**
233 * Convert a DOM into an HTML5 string.
234 *
235 * @param mixed $dom
236 * The DOM to be serialized.
237 * @param array $options
238 * Configuration options when serializing the DOM. These include:
239 * - encode_entities: Text written to the output is escaped by default and not all
240 * entities are encoded. If this is set to true all entities will be encoded.
241 * Defaults to false.
242 *
243 * @return string A HTML5 documented generated from the DOM.
244 */
245 public function saveHTML($dom, $options = array())
246 {
247 $stream = fopen('php://temp', 'w');
248 $this->save($dom, $stream, array_merge($this->getOptions(), $options));
249
250 return stream_get_contents($stream, - 1, 0);
251 }
252 }