diff core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @ 17:129ea1e6d783

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:21:36 +0000
parents 1fec387a4317
children af1871eacc83
line wrap: on
line diff
--- a/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php	Tue Jul 10 15:07:59 2018 +0100
+++ b/core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php	Thu Feb 28 13:21:36 2019 +0000
@@ -106,7 +106,7 @@
     // Make some strings with two, three, and four-byte characters for testing.
     // Note that the 3-byte character is overridden by the 'kg' language.
     $two_byte = 'Ä Ö Ü Å Ø äöüåøhello';
-    // This is a Cyrrillic character that looks something like a u. See
+    // This is a Cyrillic character that looks something like a "u". See
     // http://www.unicode.org/charts/PDF/U0400.pdf
     $three_byte = html_entity_decode('ц', ENT_NOQUOTES, 'UTF-8');
     // This is a Canadian Aboriginal character like a triangle. See
@@ -118,7 +118,8 @@
     $five_byte = html_entity_decode('𐌰𐌸', ENT_NOQUOTES, 'UTF-8');
 
     return [
-      // Each test case is (language code, input, output).
+      // Each test case is language code, input, output, unknown character, max
+      // length.
       // Test ASCII in English.
       ['en', $random, $random],
       // Test ASCII in some other language with no overrides.
@@ -142,29 +143,37 @@
       // Test strings in some other languages.
       // Turkish, provided by drupal.org user Kartagis.
       ['tr', 'Abayı serdiler bize. Söyleyeceğim yüzlerine. Sanırım hepimiz aynı şeyi düşünüyoruz.', 'Abayi serdiler bize. Soyleyecegim yuzlerine. Sanirim hepimiz ayni seyi dusunuyoruz.'],
+      // Max length.
+      ['de', $two_byte, 'Ae Oe Ue A O aeoe', '?', 17],
+      // Do not split up the transliteration of a single character.
+      ['de', $two_byte, 'Ae Oe Ue A O aeoe', '?', 18],
       // Illegal/unknown unicode.
-      ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'],
-      // Max length.
-      ['de', $two_byte, 'Ae Oe', '?', 5],
+      ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?????'],
+      ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '-----', '-'],
+      ['en', 'Hel' . chr(0x80) . 'o World', 'Hel?o World'],
+      ['en', 'Hell' . chr(0x80) . ' World', 'Hell? World'],
+      // Non default replacement.
+      ['en', chr(0x80) . 'ello World', '_ello World', '_'],
+      // Keep the original question marks.
+      ['en', chr(0xF8) . '?' . chr(0x80), '???'],
+      ['en', chr(0x80) . 'ello ? World?', '_ello ? World?', '_'],
+      ['pl', 'aąeę' . chr(0x80) . 'oółżźz ?', 'aaee?oolzzz ?'],
+      // Non-US-ASCII replacement.
+      ['en', chr(0x80) . 'ello World?', 'Oello World?', 'Ö'],
+      ['pl', chr(0x80) . 'óóść', 'ooosc', 'ó'],
+      // Ensure question marks are replaced when max length used.
+      ['en', chr(0x80) . 'ello ? World?', '_ello ?', '_', 7],
+      // Empty replacement.
+      ['en', chr(0x80) . 'ello World' . chr(0xF8), 'ello World', ''],
+      // Not affecting spacing from the beginning and end of a string.
+      ['en', ' Hello Abventor! ', ' Hello Abventor! '],
+      ['pl', ' Drupal Kraków Community', ' Drupal Krakow ', '?', 15],
+      // Keep many spaces between words.
+      ['en', 'Too    many    spaces between words !', 'Too    many    spaces between words !'],
     ];
   }
 
   /**
-   * Tests the transliteration with max length.
-   */
-  public function testTransliterationWithMaxLength() {
-    $transliteration = new PhpTransliteration();
-
-    // Test with max length, using German. It should never split up the
-    // transliteration of a single character.
-    $input = 'Ä Ö Ü Å Ø äöüåøhello';
-    $trunc_output = 'Ae Oe Ue A O aeoe';
-
-    $this->assertSame($trunc_output, $transliteration->transliterate($input, 'de', '?', 17), 'Truncating to 17 characters works');
-    $this->assertSame($trunc_output, $transliteration->transliterate($input, 'de', '?', 18), 'Truncating to 18 characters works');
-  }
-
-  /**
    * Tests inclusion is safe.
    *
    * @covers ::readLanguageOverrides