Mercurial > hg > isophonics-drupal-site
comparison core/lib/Drupal/Component/Utility/Unicode.php @ 17:129ea1e6d783
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:21:36 +0000 |
parents | 1fec387a4317 |
children |
comparison
equal
deleted
inserted
replaced
16:c2387f117808 | 17:129ea1e6d783 |
---|---|
86 * Indicates an error during check for PHP unicode support. | 86 * Indicates an error during check for PHP unicode support. |
87 */ | 87 */ |
88 const STATUS_ERROR = -1; | 88 const STATUS_ERROR = -1; |
89 | 89 |
90 /** | 90 /** |
91 * Holds the multibyte capabilities of the current environment. | |
92 * | |
93 * @var int | |
94 */ | |
95 protected static $status = 0; | |
96 | |
97 /** | |
98 * Gets the current status of unicode/multibyte support on this environment. | 91 * Gets the current status of unicode/multibyte support on this environment. |
99 * | 92 * |
100 * @return int | 93 * @return int |
101 * The status of multibyte support. It can be one of: | 94 * The status of multibyte support. It can be one of: |
102 * - \Drupal\Component\Utility\Unicode::STATUS_MULTIBYTE | 95 * - \Drupal\Component\Utility\Unicode::STATUS_MULTIBYTE |
105 * Standard PHP (emulated) unicode support. | 98 * Standard PHP (emulated) unicode support. |
106 * - \Drupal\Component\Utility\Unicode::STATUS_ERROR | 99 * - \Drupal\Component\Utility\Unicode::STATUS_ERROR |
107 * An error occurred. No unicode support. | 100 * An error occurred. No unicode support. |
108 */ | 101 */ |
109 public static function getStatus() { | 102 public static function getStatus() { |
110 return static::$status; | 103 switch (static::check()) { |
104 case 'mb_strlen': | |
105 return Unicode::STATUS_SINGLEBYTE; | |
106 case '': | |
107 return Unicode::STATUS_MULTIBYTE; | |
108 } | |
109 return Unicode::STATUS_ERROR; | |
111 } | 110 } |
112 | 111 |
113 /** | 112 /** |
114 * Sets the value for multibyte support status for the current environment. | 113 * Sets the value for multibyte support status for the current environment. |
115 * | 114 * |
121 * - \Drupal\Component\Utility\Unicode::STATUS_ERROR | 120 * - \Drupal\Component\Utility\Unicode::STATUS_ERROR |
122 * An error occurred. No unicode support. | 121 * An error occurred. No unicode support. |
123 * | 122 * |
124 * @param int $status | 123 * @param int $status |
125 * The new status of multibyte support. | 124 * The new status of multibyte support. |
125 * | |
126 * @deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In | |
127 * Drupal 9 there will be no way to set the status and in Drupal 8 this | |
128 * ability has been removed because mb_*() functions are supplied using | |
129 * Symfony's polyfill. | |
130 * | |
131 * @see https://www.drupal.org/node/2850048 | |
126 */ | 132 */ |
127 public static function setStatus($status) { | 133 public static function setStatus($status) { |
128 if (!in_array($status, [static::STATUS_SINGLEBYTE, static::STATUS_MULTIBYTE, static::STATUS_ERROR])) { | 134 @trigger_error('\Drupal\Component\Utility\Unicode::setStatus() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In Drupal 9 there will be no way to set the status and in Drupal 8 this ability has been removed because mb_*() functions are supplied using Symfony\'s polyfill. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
129 throw new \InvalidArgumentException('Invalid status value for unicode support.'); | |
130 } | |
131 static::$status = $status; | |
132 } | 135 } |
133 | 136 |
134 /** | 137 /** |
135 * Checks for Unicode support in PHP and sets the proper settings if possible. | 138 * Checks for Unicode support in PHP and sets the proper settings if possible. |
136 * | 139 * |
141 * @return string | 144 * @return string |
142 * A string identifier of a failed multibyte extension check, if any. | 145 * A string identifier of a failed multibyte extension check, if any. |
143 * Otherwise, an empty string. | 146 * Otherwise, an empty string. |
144 */ | 147 */ |
145 public static function check() { | 148 public static function check() { |
149 // Set appropriate configuration. | |
150 mb_internal_encoding('utf-8'); | |
151 mb_language('uni'); | |
152 | |
146 // Check for mbstring extension. | 153 // Check for mbstring extension. |
147 if (!function_exists('mb_strlen')) { | 154 if (!extension_loaded('mbstring')) { |
148 static::$status = static::STATUS_SINGLEBYTE; | |
149 return 'mb_strlen'; | 155 return 'mb_strlen'; |
150 } | 156 } |
151 | 157 |
152 // Check mbstring configuration. | 158 // Check mbstring configuration. |
153 if (ini_get('mbstring.func_overload') != 0) { | 159 if (ini_get('mbstring.func_overload') != 0) { |
154 static::$status = static::STATUS_ERROR; | |
155 return 'mbstring.func_overload'; | 160 return 'mbstring.func_overload'; |
156 } | 161 } |
157 if (ini_get('mbstring.encoding_translation') != 0) { | 162 if (ini_get('mbstring.encoding_translation') != 0) { |
158 static::$status = static::STATUS_ERROR; | |
159 return 'mbstring.encoding_translation'; | 163 return 'mbstring.encoding_translation'; |
160 } | 164 } |
161 // mbstring.http_input and mbstring.http_output are deprecated and empty by | 165 // mbstring.http_input and mbstring.http_output are deprecated and empty by |
162 // default in PHP 5.6. | 166 // default in PHP 5.6. |
163 if (version_compare(PHP_VERSION, '5.6.0') == -1) { | 167 if (version_compare(PHP_VERSION, '5.6.0') == -1) { |
164 if (ini_get('mbstring.http_input') != 'pass') { | 168 if (ini_get('mbstring.http_input') != 'pass') { |
165 static::$status = static::STATUS_ERROR; | |
166 return 'mbstring.http_input'; | 169 return 'mbstring.http_input'; |
167 } | 170 } |
168 if (ini_get('mbstring.http_output') != 'pass') { | 171 if (ini_get('mbstring.http_output') != 'pass') { |
169 static::$status = static::STATUS_ERROR; | |
170 return 'mbstring.http_output'; | 172 return 'mbstring.http_output'; |
171 } | 173 } |
172 } | 174 } |
173 | 175 |
174 // Set appropriate configuration. | |
175 mb_internal_encoding('utf-8'); | |
176 mb_language('uni'); | |
177 static::$status = static::STATUS_MULTIBYTE; | |
178 return ''; | 176 return ''; |
179 } | 177 } |
180 | 178 |
181 /** | 179 /** |
182 * Decodes UTF byte-order mark (BOM) into the encoding's name. | 180 * Decodes UTF byte-order mark (BOM) into the encoding's name. |
222 * | 220 * |
223 * @return string|bool | 221 * @return string|bool |
224 * Converted data or FALSE. | 222 * Converted data or FALSE. |
225 */ | 223 */ |
226 public static function convertToUtf8($data, $encoding) { | 224 public static function convertToUtf8($data, $encoding) { |
227 if (function_exists('iconv')) { | 225 return @iconv($encoding, 'utf-8', $data); |
228 return @iconv($encoding, 'utf-8', $data); | |
229 } | |
230 elseif (function_exists('mb_convert_encoding')) { | |
231 return @mb_convert_encoding($data, 'utf-8', $encoding); | |
232 } | |
233 elseif (function_exists('recode_string')) { | |
234 return @recode_string($encoding . '..utf-8', $data); | |
235 } | |
236 // Cannot convert. | |
237 return FALSE; | |
238 } | 226 } |
239 | 227 |
240 /** | 228 /** |
241 * Truncates a UTF-8-encoded string safely to a number of bytes. | 229 * Truncates a UTF-8-encoded string safely to a number of bytes. |
242 * | 230 * |
279 * @param string $text | 267 * @param string $text |
280 * The string to run the operation on. | 268 * The string to run the operation on. |
281 * | 269 * |
282 * @return int | 270 * @return int |
283 * The length of the string. | 271 * The length of the string. |
272 * | |
273 * @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use | |
274 * mb_strlen() instead. | |
275 * | |
276 * @see https://www.drupal.org/node/2850048 | |
284 */ | 277 */ |
285 public static function strlen($text) { | 278 public static function strlen($text) { |
286 if (static::getStatus() == static::STATUS_MULTIBYTE) { | 279 @trigger_error('\Drupal\Component\Utility\Unicode::strlen() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strlen() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
287 return mb_strlen($text); | 280 return mb_strlen($text); |
288 } | |
289 else { | |
290 // Do not count UTF-8 continuation bytes. | |
291 return strlen(preg_replace("/[\x80-\xBF]/", '', $text)); | |
292 } | |
293 } | 281 } |
294 | 282 |
295 /** | 283 /** |
296 * Converts a UTF-8 string to uppercase. | 284 * Converts a UTF-8 string to uppercase. |
297 * | 285 * |
298 * @param string $text | 286 * @param string $text |
299 * The string to run the operation on. | 287 * The string to run the operation on. |
300 * | 288 * |
301 * @return string | 289 * @return string |
302 * The string in uppercase. | 290 * The string in uppercase. |
291 * | |
292 * @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use | |
293 * mb_strtoupper() instead. | |
294 * | |
295 * @see https://www.drupal.org/node/2850048 | |
303 */ | 296 */ |
304 public static function strtoupper($text) { | 297 public static function strtoupper($text) { |
305 if (static::getStatus() == static::STATUS_MULTIBYTE) { | 298 @trigger_error('\Drupal\Component\Utility\Unicode::strtoupper() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtoupper() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
306 return mb_strtoupper($text); | 299 return mb_strtoupper($text); |
307 } | |
308 else { | |
309 // Use C-locale for ASCII-only uppercase. | |
310 $text = strtoupper($text); | |
311 // Case flip Latin-1 accented letters. | |
312 $text = preg_replace_callback('/\xC3[\xA0-\xB6\xB8-\xBE]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); | |
313 return $text; | |
314 } | |
315 } | 300 } |
316 | 301 |
317 /** | 302 /** |
318 * Converts a UTF-8 string to lowercase. | 303 * Converts a UTF-8 string to lowercase. |
319 * | 304 * |
320 * @param string $text | 305 * @param string $text |
321 * The string to run the operation on. | 306 * The string to run the operation on. |
322 * | 307 * |
323 * @return string | 308 * @return string |
324 * The string in lowercase. | 309 * The string in lowercase. |
310 * | |
311 * @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use | |
312 * mb_strtolower() instead. | |
313 * | |
314 * @see https://www.drupal.org/node/2850048 | |
325 */ | 315 */ |
326 public static function strtolower($text) { | 316 public static function strtolower($text) { |
327 if (static::getStatus() == static::STATUS_MULTIBYTE) { | 317 @trigger_error('\Drupal\Component\Utility\Unicode::strtolower() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtolower() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
328 return mb_strtolower($text); | 318 return mb_strtolower($text); |
329 } | |
330 else { | |
331 // Use C-locale for ASCII-only lowercase. | |
332 $text = strtolower($text); | |
333 // Case flip Latin-1 accented letters. | |
334 $text = preg_replace_callback('/\xC3[\x80-\x96\x98-\x9E]/', '\Drupal\Component\Utility\Unicode::caseFlip', $text); | |
335 return $text; | |
336 } | |
337 } | 319 } |
338 | 320 |
339 /** | 321 /** |
340 * Capitalizes the first character of a UTF-8 string. | 322 * Capitalizes the first character of a UTF-8 string. |
341 * | 323 * |
344 * | 326 * |
345 * @return string | 327 * @return string |
346 * The string with the first character as uppercase. | 328 * The string with the first character as uppercase. |
347 */ | 329 */ |
348 public static function ucfirst($text) { | 330 public static function ucfirst($text) { |
349 return static::strtoupper(static::substr($text, 0, 1)) . static::substr($text, 1); | 331 return mb_strtoupper(mb_substr($text, 0, 1)) . mb_substr($text, 1); |
350 } | 332 } |
351 | 333 |
352 /** | 334 /** |
353 * Converts the first character of a UTF-8 string to lowercase. | 335 * Converts the first character of a UTF-8 string to lowercase. |
354 * | 336 * |
360 * | 342 * |
361 * @ingroup php_wrappers | 343 * @ingroup php_wrappers |
362 */ | 344 */ |
363 public static function lcfirst($text) { | 345 public static function lcfirst($text) { |
364 // Note: no mbstring equivalent! | 346 // Note: no mbstring equivalent! |
365 return static::strtolower(static::substr($text, 0, 1)) . static::substr($text, 1); | 347 return mb_strtolower(mb_substr($text, 0, 1)) . mb_substr($text, 1); |
366 } | 348 } |
367 | 349 |
368 /** | 350 /** |
369 * Capitalizes the first character of each word in a UTF-8 string. | 351 * Capitalizes the first character of each word in a UTF-8 string. |
370 * | 352 * |
377 * @ingroup php_wrappers | 359 * @ingroup php_wrappers |
378 */ | 360 */ |
379 public static function ucwords($text) { | 361 public static function ucwords($text) { |
380 $regex = '/(^|[' . static::PREG_CLASS_WORD_BOUNDARY . '])([^' . static::PREG_CLASS_WORD_BOUNDARY . '])/u'; | 362 $regex = '/(^|[' . static::PREG_CLASS_WORD_BOUNDARY . '])([^' . static::PREG_CLASS_WORD_BOUNDARY . '])/u'; |
381 return preg_replace_callback($regex, function (array $matches) { | 363 return preg_replace_callback($regex, function (array $matches) { |
382 return $matches[1] . Unicode::strtoupper($matches[2]); | 364 return $matches[1] . mb_strtoupper($matches[2]); |
383 }, $text); | 365 }, $text); |
384 } | 366 } |
385 | 367 |
386 /** | 368 /** |
387 * Cuts off a piece of a string based on character indices and counts. | 369 * Cuts off a piece of a string based on character indices and counts. |
397 * @param int $length | 379 * @param int $length |
398 * The number of characters to read. | 380 * The number of characters to read. |
399 * | 381 * |
400 * @return string | 382 * @return string |
401 * The shortened string. | 383 * The shortened string. |
384 * | |
385 * @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use | |
386 * mb_substr() instead. | |
387 * | |
388 * @see https://www.drupal.org/node/2850048 | |
402 */ | 389 */ |
403 public static function substr($text, $start, $length = NULL) { | 390 public static function substr($text, $start, $length = NULL) { |
404 if (static::getStatus() == static::STATUS_MULTIBYTE) { | 391 @trigger_error('\Drupal\Component\Utility\Unicode::substr() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_substr() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
405 return $length === NULL ? mb_substr($text, $start) : mb_substr($text, $start, $length); | 392 return mb_substr($text, $start, $length); |
406 } | |
407 else { | |
408 $strlen = strlen($text); | |
409 // Find the starting byte offset. | |
410 $bytes = 0; | |
411 if ($start > 0) { | |
412 // Count all the characters except continuation bytes from the start | |
413 // until we have found $start characters or the end of the string. | |
414 $bytes = -1; $chars = -1; | |
415 while ($bytes < $strlen - 1 && $chars < $start) { | |
416 $bytes++; | |
417 $c = ord($text[$bytes]); | |
418 if ($c < 0x80 || $c >= 0xC0) { | |
419 $chars++; | |
420 } | |
421 } | |
422 } | |
423 elseif ($start < 0) { | |
424 // Count all the characters except continuation bytes from the end | |
425 // until we have found abs($start) characters. | |
426 $start = abs($start); | |
427 $bytes = $strlen; $chars = 0; | |
428 while ($bytes > 0 && $chars < $start) { | |
429 $bytes--; | |
430 $c = ord($text[$bytes]); | |
431 if ($c < 0x80 || $c >= 0xC0) { | |
432 $chars++; | |
433 } | |
434 } | |
435 } | |
436 $istart = $bytes; | |
437 | |
438 // Find the ending byte offset. | |
439 if ($length === NULL) { | |
440 $iend = $strlen; | |
441 } | |
442 elseif ($length > 0) { | |
443 // Count all the characters except continuation bytes from the starting | |
444 // index until we have found $length characters or reached the end of | |
445 // the string, then backtrace one byte. | |
446 $iend = $istart - 1; | |
447 $chars = -1; | |
448 $last_real = FALSE; | |
449 while ($iend < $strlen - 1 && $chars < $length) { | |
450 $iend++; | |
451 $c = ord($text[$iend]); | |
452 $last_real = FALSE; | |
453 if ($c < 0x80 || $c >= 0xC0) { | |
454 $chars++; | |
455 $last_real = TRUE; | |
456 } | |
457 } | |
458 // Backtrace one byte if the last character we found was a real | |
459 // character and we don't need it. | |
460 if ($last_real && $chars >= $length) { | |
461 $iend--; | |
462 } | |
463 } | |
464 elseif ($length < 0) { | |
465 // Count all the characters except continuation bytes from the end | |
466 // until we have found abs($start) characters, then backtrace one byte. | |
467 $length = abs($length); | |
468 $iend = $strlen; $chars = 0; | |
469 while ($iend > 0 && $chars < $length) { | |
470 $iend--; | |
471 $c = ord($text[$iend]); | |
472 if ($c < 0x80 || $c >= 0xC0) { | |
473 $chars++; | |
474 } | |
475 } | |
476 // Backtrace one byte if we are not at the beginning of the string. | |
477 if ($iend > 0) { | |
478 $iend--; | |
479 } | |
480 } | |
481 else { | |
482 // $length == 0, return an empty string. | |
483 return ''; | |
484 } | |
485 | |
486 return substr($text, $istart, max(0, $iend - $istart + 1)); | |
487 } | |
488 } | 393 } |
489 | 394 |
490 /** | 395 /** |
491 * Truncates a UTF-8-encoded string safely to a number of characters. | 396 * Truncates a UTF-8-encoded string safely to a number of characters. |
492 * | 397 * |
524 public static function truncate($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) { | 429 public static function truncate($string, $max_length, $wordsafe = FALSE, $add_ellipsis = FALSE, $min_wordsafe_length = 1) { |
525 $ellipsis = ''; | 430 $ellipsis = ''; |
526 $max_length = max($max_length, 0); | 431 $max_length = max($max_length, 0); |
527 $min_wordsafe_length = max($min_wordsafe_length, 0); | 432 $min_wordsafe_length = max($min_wordsafe_length, 0); |
528 | 433 |
529 if (static::strlen($string) <= $max_length) { | 434 if (mb_strlen($string) <= $max_length) { |
530 // No truncation needed, so don't add ellipsis, just return. | 435 // No truncation needed, so don't add ellipsis, just return. |
531 return $string; | 436 return $string; |
532 } | 437 } |
533 | 438 |
534 if ($add_ellipsis) { | 439 if ($add_ellipsis) { |
535 // Truncate ellipsis in case $max_length is small. | 440 // Truncate ellipsis in case $max_length is small. |
536 $ellipsis = static::substr('…', 0, $max_length); | 441 $ellipsis = mb_substr('…', 0, $max_length); |
537 $max_length -= static::strlen($ellipsis); | 442 $max_length -= mb_strlen($ellipsis); |
538 $max_length = max($max_length, 0); | 443 $max_length = max($max_length, 0); |
539 } | 444 } |
540 | 445 |
541 if ($max_length <= $min_wordsafe_length) { | 446 if ($max_length <= $min_wordsafe_length) { |
542 // Do not attempt word-safe if lengths are bad. | 447 // Do not attempt word-safe if lengths are bad. |
551 $found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . Unicode::PREG_CLASS_WORD_BOUNDARY . ']/us', $string, $matches); | 456 $found = preg_match('/^(.{' . $min_wordsafe_length . ',' . $max_length . '})[' . Unicode::PREG_CLASS_WORD_BOUNDARY . ']/us', $string, $matches); |
552 if ($found) { | 457 if ($found) { |
553 $string = $matches[1]; | 458 $string = $matches[1]; |
554 } | 459 } |
555 else { | 460 else { |
556 $string = static::substr($string, 0, $max_length); | 461 $string = mb_substr($string, 0, $max_length); |
557 } | 462 } |
558 } | 463 } |
559 else { | 464 else { |
560 $string = static::substr($string, 0, $max_length); | 465 $string = mb_substr($string, 0, $max_length); |
561 } | 466 } |
562 | 467 |
563 if ($add_ellipsis) { | 468 if ($add_ellipsis) { |
564 // If we're adding an ellipsis, remove any trailing periods. | 469 // If we're adding an ellipsis, remove any trailing periods. |
565 $string = rtrim($string, '.'); | 470 $string = rtrim($string, '.'); |
581 * @return int | 486 * @return int |
582 * Returns < 0 if $str1 is less than $str2; > 0 if $str1 is greater than | 487 * Returns < 0 if $str1 is less than $str2; > 0 if $str1 is greater than |
583 * $str2, and 0 if they are equal. | 488 * $str2, and 0 if they are equal. |
584 */ | 489 */ |
585 public static function strcasecmp($str1, $str2) { | 490 public static function strcasecmp($str1, $str2) { |
586 return strcmp(static::strtoupper($str1), static::strtoupper($str2)); | 491 return strcmp(mb_strtoupper($str1), mb_strtoupper($str2)); |
587 } | 492 } |
588 | 493 |
589 /** | 494 /** |
590 * Encodes MIME/HTTP headers that contain incorrectly encoded characters. | 495 * Encodes MIME/HTTP headers that contain incorrectly encoded characters. |
591 * | 496 * |
713 * | 618 * |
714 * @return int|false | 619 * @return int|false |
715 * The position where $needle occurs in $haystack, always relative to the | 620 * The position where $needle occurs in $haystack, always relative to the |
716 * beginning (independent of $offset), or FALSE if not found. Note that | 621 * beginning (independent of $offset), or FALSE if not found. Note that |
717 * a return value of 0 is not the same as FALSE. | 622 * a return value of 0 is not the same as FALSE. |
623 * | |
624 * @deprecated in Drupal 8.6.0, will be removed before Drupal 9.0.0. Use | |
625 * mb_strpos() instead. | |
626 * | |
627 * @see https://www.drupal.org/node/2850048 | |
718 */ | 628 */ |
719 public static function strpos($haystack, $needle, $offset = 0) { | 629 public static function strpos($haystack, $needle, $offset = 0) { |
720 if (static::getStatus() == static::STATUS_MULTIBYTE) { | 630 @trigger_error('\Drupal\Component\Utility\Unicode::strpos() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strpos() instead. See https://www.drupal.org/node/2850048.', E_USER_DEPRECATED); |
721 return mb_strpos($haystack, $needle, $offset); | 631 return mb_strpos($haystack, $needle, $offset); |
722 } | |
723 else { | |
724 // Remove Unicode continuation characters, to be compatible with | |
725 // Unicode::strlen() and Unicode::substr(). | |
726 $haystack = preg_replace("/[\x80-\xBF]/", '', $haystack); | |
727 $needle = preg_replace("/[\x80-\xBF]/", '', $needle); | |
728 return strpos($haystack, $needle, $offset); | |
729 } | |
730 } | 632 } |
731 | 633 |
732 } | 634 } |