Mercurial > hg > isophonics-drupal-site
diff vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php @ 17:129ea1e6d783
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:21:36 +0000 |
parents | 4c8ae668cc8c |
children | af1871eacc83 |
line wrap: on
line diff
--- a/vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php Tue Jul 10 15:07:59 2018 +0100 +++ b/vendor/masterminds/html5/src/HTML5/Parser/UTF8Utils.php Thu Feb 28 13:21:36 2019 +0000 @@ -1,5 +1,7 @@ <?php + namespace Masterminds\HTML5\Parser; + /* * * Portions based on code from html5lib files with the following copyright: @@ -26,12 +28,14 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +use Masterminds\HTML5\Exception; + /** - * UTF-8 Utilities + * UTF-8 Utilities. */ class UTF8Utils { - /** * The Unicode replacement character.. */ @@ -44,6 +48,10 @@ * MB, libxml, and finally a custom counter. * * @todo Move this to a general utility class. + * + * @param string $string + * + * @return int */ public static function countChars($string) { @@ -69,10 +77,10 @@ * This has not yet been tested with charactersets other than UTF-8. * It should work with ISO-8859-1/-13 and standard Latin Win charsets. * - * @param string $data - * The data to convert. - * @param string $encoding - * A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php + * @param string $data The data to convert + * @param string $encoding A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php + * + * @return string */ public static function convertToUTF8($data, $encoding = 'UTF-8') { @@ -99,7 +107,7 @@ $data = mb_convert_encoding($data, 'UTF-8', $encoding); mb_substitute_character($save); } // @todo Get iconv running in at least some environments if that is possible. - elseif (function_exists('iconv') && $encoding != 'auto') { + elseif (function_exists('iconv') && 'auto' !== $encoding) { // fprintf(STDOUT, "iconv found\n"); // iconv has the following behaviors: // - Overlong representations are ignored. @@ -107,14 +115,13 @@ // - Incomplete sequences generate a warning. $data = @iconv($encoding, 'UTF-8//IGNORE', $data); } else { - // we can make a conforming native implementation throw new Exception('Not implemented, please install mbstring or iconv'); } /* * One leading U+FEFF BYTE ORDER MARK character must be ignored if any are present. */ - if (substr($data, 0, 3) === "\xEF\xBB\xBF") { + if ("\xEF\xBB\xBF" === substr($data, 0, 3)) { $data = substr($data, 3); } @@ -124,23 +131,19 @@ /** * Checks for Unicode code points that are not valid in a document. * - * @param string $data - * A string to analyze. - * @return array An array of (string) error messages produced by the scanning. + * @param string $data A string to analyze + * + * @return array An array of (string) error messages produced by the scanning */ public static function checkForIllegalCodepoints($data) { - if (! function_exists('preg_match_all')) { - throw\Exception('The PCRE library is not loaded or is not available.'); - } - // Vestigal error handling. $errors = array(); /* * All U+0000 null characters in the input must be replaced by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such characters is a parse error. */ - for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i ++) { + for ($i = 0, $count = substr_count($data, "\0"); $i < $count; ++$i) { $errors[] = 'null-character'; } @@ -162,7 +165,7 @@ | [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16}) )/x', $data, $matches); - for ($i = 0; $i < $count; $i ++) { + for ($i = 0; $i < $count; ++$i) { $errors[] = 'invalid-codepoint'; }