Mercurial > hg > isophonics-drupal-site
diff core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @ 17:129ea1e6d783
Update, including to Drupal core 8.6.10
author | Chris Cannam |
---|---|
date | Thu, 28 Feb 2019 13:21:36 +0000 |
parents | 4c8ae668cc8c |
children |
line wrap: on
line diff
--- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php Tue Jul 10 15:07:59 2018 +0100 +++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php Thu Feb 28 13:21:36 2019 +0000 @@ -107,6 +107,29 @@ public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) { $result = ''; $length = 0; + $hash = FALSE; + + // Replace question marks with a unique hash if necessary. This because + // mb_convert_encoding() replaces all invalid characters with a question + // mark. + if ($unknown_character != '?' && strpos($string, '?') !== FALSE) { + $hash = hash('sha256', $string); + $string = str_replace('?', $hash, $string); + } + + // Ensure the string is valid UTF8 for preg_split(). Unknown characters will + // be replaced by a question mark. + $string = mb_convert_encoding($string, 'UTF-8', 'UTF-8'); + + // Use the provided unknown character instead of a question mark. + if ($unknown_character != '?') { + $string = str_replace('?', $unknown_character, $string); + // Restore original question marks if necessary. + if ($hash !== FALSE) { + $string = str_replace($hash, '?', $string); + } + } + // Split into Unicode characters and transliterate each one. foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) { $code = self::ordUTF8($character);