diff core/lib/Drupal/Component/Transliteration/PhpTransliteration.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 4c8ae668cc8c
children
line wrap: on
line diff
--- a/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php	Tue Jul 10 15:07:59 2018 +0100
+++ b/core/lib/Drupal/Component/Transliteration/PhpTransliteration.php	Thu Feb 28 13:21:36 2019 +0000
@@ -107,6 +107,29 @@
   public function transliterate($string, $langcode = 'en', $unknown_character = '?', $max_length = NULL) {
     $result = '';
     $length = 0;
+    $hash = FALSE;
+
+    // Replace question marks with a unique hash if necessary. This because
+    // mb_convert_encoding() replaces all invalid characters with a question
+    // mark.
+    if ($unknown_character != '?' && strpos($string, '?') !== FALSE) {
+      $hash = hash('sha256', $string);
+      $string = str_replace('?', $hash, $string);
+    }
+
+    // Ensure the string is valid UTF8 for preg_split(). Unknown characters will
+    // be replaced by a question mark.
+    $string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
+
+    // Use the provided unknown character instead of a question mark.
+    if ($unknown_character != '?') {
+      $string = str_replace('?', $unknown_character, $string);
+      // Restore original question marks if necessary.
+      if ($hash !== FALSE) {
+        $string = str_replace($hash, '?', $string);
+      }
+    }
+
     // Split into Unicode characters and transliterate each one.
     foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) {
       $code = self::ordUTF8($character);