Mercurial > hg > isophonics-drupal-site
diff vendor/masterminds/html5/src/HTML5.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 129ea1e6d783 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vendor/masterminds/html5/src/HTML5.php Wed Nov 29 16:09:58 2017 +0000 @@ -0,0 +1,252 @@ +<?php +namespace Masterminds; + +use Masterminds\HTML5\Parser\FileInputStream; +use Masterminds\HTML5\Parser\StringInputStream; +use Masterminds\HTML5\Parser\DOMTreeBuilder; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; +use Masterminds\HTML5\Serializer\OutputRules; +use Masterminds\HTML5\Serializer\Traverser; + +/** + * This class offers convenience methods for parsing and serializing HTML5. + * It is roughly designed to mirror the \DOMDocument class that is + * provided with most versions of PHP. + * + * EXPERIMENTAL. This may change or be completely replaced. + */ +class HTML5 +{ + + /** + * Global options for the parser and serializer. + * + * @var array + */ + protected $options = array( + // If the serializer should encode all entities. + 'encode_entities' => false + ); + + protected $errors = array(); + + public function __construct(array $options = array()) + { + $this->options = array_merge($this->options, $options); + } + + /** + * Get the default options. + * + * @return array The default options. + */ + public function getOptions() + { + return $this->options; + } + + /** + * Load and parse an HTML file. + * + * This will apply the HTML5 parser, which is tolerant of many + * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML + * 3. Note that in these cases, not all of the old data will be + * preserved. For example, XHTML's XML declaration will be removed. + * + * The rules governing parsing are set out in the HTML 5 spec. + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * @param array $options + * Configuration options when parsing the HTML + * @return \DOMDocument A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. + */ + public function load($file, array $options = array()) + { + // Handle the case where file is a resource. + if (is_resource($file)) { + // FIXME: We need a StreamInputStream class. + return $this->loadHTML(stream_get_contents($file), $options); + } + + $input = new FileInputStream($file); + + return $this->parse($input, $options); + } + + /** + * Parse a HTML Document from a string. + * + * Take a string of HTML 5 (or earlier) and parse it into a + * DOMDocument. + * + * @param string $string + * A html5 document as a string. + * @param array $options + * Configuration options when parsing the HTML + * @return \DOMDocument A DOM document. DOM is part of libxml, which is included with + * almost all distribtions of PHP. + */ + public function loadHTML($string, array $options = array()) + { + $input = new StringInputStream($string); + + return $this->parse($input, $options); + } + + /** + * Convenience function to load an HTML file. + * + * This is here to provide backwards compatibility with the + * PHP DOM implementation. It simply calls load(). + * + * @param string $file + * The path to the file to parse. If this is a resource, it is + * assumed to be an open stream whose pointer is set to the first + * byte of input. + * @param array $options + * Configuration options when parsing the HTML + * + * @return \DOMDocument A DOM document. These object type is defined by the libxml + * library, and should have been included with your version of PHP. + */ + public function loadHTMLFile($file, array $options = array()) + { + return $this->load($file, $options); + } + + /** + * Parse a HTML fragment from a string. + * + * @param string $string + * The html5 fragment as a string. + * @param array $options + * Configuration options when parsing the HTML + * + * @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with + * almost all distributions of PHP. + */ + public function loadHTMLFragment($string, array $options = array()) + { + $input = new StringInputStream($string); + + return $this->parseFragment($input, $options); + } + + /** + * Return all errors encountered into parsing phase + * + * @return array + */ + public function getErrors() + { + return $this->errors; + } + + /** + * Return true it some errors were encountered into parsing phase + * + * @return bool + */ + public function hasErrors() + { + return count($this->errors) > 0; + } + + /** + * Parse an input stream. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) + { + $this->errors = array(); + $options = array_merge($this->getOptions(), $options); + $events = new DOMTreeBuilder(false, $options); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); + + $parser->parse(); + $this->errors = $events->getErrors(); + + return $events->document(); + } + + /** + * Parse an input stream where the stream is a fragment. + * + * Lower-level loading function. This requires an input stream instead + * of a string, file, or resource. + */ + public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) + { + $options = array_merge($this->getOptions(), $options); + $events = new DOMTreeBuilder(true, $options); + $scanner = new Scanner($input); + $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); + + $parser->parse(); + $this->errors = $events->getErrors(); + + return $events->fragment(); + } + + /** + * Save a DOM into a given file as HTML5. + * + * @param mixed $dom + * The DOM to be serialized. + * @param string $file + * The filename to be written. + * @param array $options + * Configuration options when serializing the DOM. These include: + * - encode_entities: Text written to the output is escaped by default and not all + * entities are encoded. If this is set to true all entities will be encoded. + * Defaults to false. + */ + public function save($dom, $file, $options = array()) + { + $close = true; + if (is_resource($file)) { + $stream = $file; + $close = false; + } else { + $stream = fopen($file, 'w'); + } + $options = array_merge($this->getOptions(), $options); + $rules = new OutputRules($stream, $options); + $trav = new Traverser($dom, $stream, $rules, $options); + + $trav->walk(); + + if ($close) { + fclose($stream); + } + } + + /** + * Convert a DOM into an HTML5 string. + * + * @param mixed $dom + * The DOM to be serialized. + * @param array $options + * Configuration options when serializing the DOM. These include: + * - encode_entities: Text written to the output is escaped by default and not all + * entities are encoded. If this is set to true all entities will be encoded. + * Defaults to false. + * + * @return string A HTML5 documented generated from the DOM. + */ + public function saveHTML($dom, $options = array()) + { + $stream = fopen('php://temp', 'w'); + $this->save($dom, $stream, array_merge($this->getOptions(), $options)); + + return stream_get_contents($stream, - 1, 0); + } +}