annotate core/includes/unicode.inc @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 129ea1e6d783
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 /**
Chris@0 4 * @file
Chris@0 5 * Provides Unicode-related conversions and operations.
Chris@0 6 */
Chris@0 7
Chris@0 8 use Drupal\Component\Utility\Unicode;
Chris@0 9
Chris@0 10 /**
Chris@0 11 * Returns Unicode library status and errors.
Chris@0 12 */
Chris@17 13
Chris@0 14 /**
Chris@0 15 * Moves unicode_requirements() logic to system_requirements().
Chris@0 16 *
Chris@0 17 * @deprecated in Drupal 8.4.0 and will be removed before Drupal 9.0.0.
Chris@0 18 *
Chris@0 19 * @see https://www.drupal.org/node/2884698
Chris@0 20 */
Chris@0 21 function unicode_requirements() {
Chris@0 22 @trigger_error('unicode_requirements() is deprecated in Drupal 8.4.0 and will be removed before Drupal 9.0.0. There is no replacement; system_requirements() now includes the logic instead. See https://www.drupal.org/node/2884698', E_USER_DEPRECATED);
Chris@0 23
Chris@0 24 $libraries = [
Chris@0 25 Unicode::STATUS_SINGLEBYTE => t('Standard PHP'),
Chris@0 26 Unicode::STATUS_MULTIBYTE => t('PHP Mbstring Extension'),
Chris@0 27 Unicode::STATUS_ERROR => t('Error'),
Chris@0 28 ];
Chris@0 29 $severities = [
Chris@0 30 Unicode::STATUS_SINGLEBYTE => REQUIREMENT_WARNING,
Chris@0 31 Unicode::STATUS_MULTIBYTE => NULL,
Chris@0 32 Unicode::STATUS_ERROR => REQUIREMENT_ERROR,
Chris@0 33 ];
Chris@0 34 $failed_check = Unicode::check();
Chris@0 35 $library = Unicode::getStatus();
Chris@0 36
Chris@0 37 $requirements['unicode'] = [
Chris@0 38 'title' => t('Unicode library'),
Chris@0 39 'value' => $libraries[$library],
Chris@0 40 'severity' => $severities[$library],
Chris@0 41 ];
Chris@0 42 switch ($failed_check) {
Chris@0 43 case 'mb_strlen':
Chris@0 44 $requirements['unicode']['description'] = t('Operations on Unicode strings are emulated on a best-effort basis. Install the <a href="http://php.net/mbstring">PHP mbstring extension</a> for improved Unicode support.');
Chris@0 45 break;
Chris@0 46
Chris@0 47 case 'mbstring.func_overload':
Chris@0 48 $requirements['unicode']['description'] = t('Multibyte string function overloading in PHP is active and must be disabled. Check the php.ini <em>mbstring.func_overload</em> setting. Please refer to the <a href="http://php.net/mbstring">PHP mbstring documentation</a> for more information.');
Chris@0 49 break;
Chris@0 50
Chris@0 51 case 'mbstring.encoding_translation':
Chris@0 52 $requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.encoding_translation</em> setting. Please refer to the <a href="http://php.net/mbstring">PHP mbstring documentation</a> for more information.');
Chris@0 53 break;
Chris@0 54
Chris@0 55 case 'mbstring.http_input':
Chris@0 56 $requirements['unicode']['description'] = t('Multibyte string input conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_input</em> setting. Please refer to the <a href="http://php.net/mbstring">PHP mbstring documentation</a> for more information.');
Chris@0 57 break;
Chris@0 58
Chris@0 59 case 'mbstring.http_output':
Chris@0 60 $requirements['unicode']['description'] = t('Multibyte string output conversion in PHP is active and must be disabled. Check the php.ini <em>mbstring.http_output</em> setting. Please refer to the <a href="http://php.net/mbstring">PHP mbstring documentation</a> for more information.');
Chris@0 61 break;
Chris@0 62 }
Chris@0 63
Chris@0 64 return $requirements;
Chris@0 65 }
Chris@0 66
Chris@0 67 /**
Chris@0 68 * Prepares a new XML parser.
Chris@0 69 *
Chris@0 70 * This is a wrapper around xml_parser_create() which extracts the encoding
Chris@0 71 * from the XML data first and sets the output encoding to UTF-8. This function
Chris@0 72 * should be used instead of xml_parser_create(), because PHP 4's XML parser
Chris@0 73 * doesn't check the input encoding itself. "Starting from PHP 5, the input
Chris@0 74 * encoding is automatically detected, so that the encoding parameter specifies
Chris@0 75 * only the output encoding."
Chris@0 76 *
Chris@0 77 * This is also where unsupported encodings will be converted. Callers should
Chris@0 78 * take this into account: $data might have been changed after the call.
Chris@0 79 *
Chris@0 80 * @param $data
Chris@0 81 * The XML data which will be parsed later.
Chris@0 82 *
Chris@0 83 * @return
Chris@0 84 * An XML parser object or FALSE on error.
Chris@0 85 *
Chris@0 86 * @ingroup php_wrappers
Chris@0 87 *
Chris@0 88 * @deprecated in Drupal 8.3.0 and will bre removed in Drupal 9.0.0. Use
Chris@0 89 * xml_parser_create() and
Chris@0 90 * xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8')
Chris@0 91 * instead.
Chris@0 92 */
Chris@0 93 function drupal_xml_parser_create(&$data) {
Chris@0 94 // Default XML encoding is UTF-8
Chris@0 95 $encoding = 'utf-8';
Chris@0 96 $bom = FALSE;
Chris@0 97
Chris@0 98 // Check for UTF-8 byte order mark (PHP5's XML parser doesn't handle it).
Chris@0 99 if (!strncmp($data, "\xEF\xBB\xBF", 3)) {
Chris@0 100 $bom = TRUE;
Chris@0 101 $data = substr($data, 3);
Chris@0 102 }
Chris@0 103
Chris@0 104 // Check for an encoding declaration in the XML prolog if no BOM was found.
Chris@0 105 if (!$bom && preg_match('/^<\?xml[^>]+encoding="(.+?)"/', $data, $match)) {
Chris@0 106 $encoding = $match[1];
Chris@0 107 }
Chris@0 108
Chris@0 109 // Unsupported encodings are converted here into UTF-8.
Chris@0 110 $php_supported = ['utf-8', 'iso-8859-1', 'us-ascii'];
Chris@0 111 if (!in_array(strtolower($encoding), $php_supported)) {
Chris@0 112 $out = Unicode::convertToUtf8($data, $encoding);
Chris@0 113 if ($out !== FALSE) {
Chris@0 114 $encoding = 'utf-8';
Chris@0 115 $data = preg_replace('/^(<\?xml[^>]+encoding)="(.+?)"/', '\\1="utf-8"', $out);
Chris@0 116 }
Chris@0 117 else {
Chris@0 118 \Drupal::logger('php')->warning('Could not convert XML encoding %s to UTF-8.', ['%s' => $encoding]);
Chris@0 119 return FALSE;
Chris@0 120 }
Chris@0 121 }
Chris@0 122
Chris@0 123 $xml_parser = xml_parser_create($encoding);
Chris@0 124 xml_parser_set_option($xml_parser, XML_OPTION_TARGET_ENCODING, 'utf-8');
Chris@0 125 return $xml_parser;
Chris@0 126 }