Mercurial > hg > isophonics-drupal-site
comparison core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php @ 0:4c8ae668cc8c
Initial import (non-working)
author | Chris Cannam |
---|---|
date | Wed, 29 Nov 2017 16:09:58 +0000 |
parents | |
children | 1fec387a4317 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4c8ae668cc8c |
---|---|
1 <?php | |
2 | |
3 namespace Drupal\Tests\Component\Transliteration; | |
4 | |
5 use Drupal\Component\Transliteration\PhpTransliteration; | |
6 use Drupal\Component\Utility\Random; | |
7 use org\bovigo\vfs\vfsStream; | |
8 use PHPUnit\Framework\TestCase; | |
9 | |
10 /** | |
11 * Tests Transliteration component functionality. | |
12 * | |
13 * @group Transliteration | |
14 * | |
15 * @coversDefaultClass \Drupal\Component\Transliteration\PhpTransliteration | |
16 */ | |
17 class PhpTransliterationTest extends TestCase { | |
18 | |
19 /** | |
20 * Tests the PhpTransliteration::removeDiacritics() function. | |
21 * | |
22 * @param string $original | |
23 * The language code to test. | |
24 * @param string $expected | |
25 * The expected return from PhpTransliteration::removeDiacritics(). | |
26 * | |
27 * @dataProvider providerTestPhpTransliterationRemoveDiacritics | |
28 */ | |
29 public function testRemoveDiacritics($original, $expected) { | |
30 $transliterator_class = new PhpTransliteration(); | |
31 $result = $transliterator_class->removeDiacritics($original); | |
32 $this->assertEquals($expected, $result); | |
33 } | |
34 | |
35 /** | |
36 * Provides data for self::testRemoveDiacritics(). | |
37 * | |
38 * @return array | |
39 * An array of arrays, each containing the parameters for | |
40 * self::testRemoveDiacritics(). | |
41 */ | |
42 public function providerTestPhpTransliterationRemoveDiacritics() { | |
43 return [ | |
44 // Test all characters in the Unicode range 0x00bf to 0x017f. | |
45 ['ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ', 'AAAAAAÆCEEEEIIII'], | |
46 ['ÐÑÒÓÔÕÖרÙÚÛÜÝÞß', 'ÐNOOOOO×OUUUUYÞß'], | |
47 ['àáâãäåæçèéêëìíîï', 'aaaaaaæceeeeiiii'], | |
48 ['ðñòóôõö÷øùúûüýþÿ', 'ðnooooo÷ouuuuyþy'], | |
49 ['ĀāĂ㥹ĆćĈĉĊċČčĎď', 'AaAaAaCcCcCcCcDd'], | |
50 ['ĐđĒēĔĕĖėĘęĚěĜĝĞğ', 'DdEeEeEeEeEeGgGg'], | |
51 ['ĠġĢģĤĥĦħĨĩĪīĬĭĮį', 'GgGgHhHhIiIiIiIi'], | |
52 ['İıIJijĴĵĶķĸĹĺĻļĽľĿ', 'IiIJijJjKkĸLlLlLlL'], | |
53 ['ŀŁłŃńŅņŇňʼnŊŋŌōŎŏ', 'lLlNnNnNnʼnŊŋOoOo'], | |
54 ['ŐőŒœŔŕŖŗŘřŚśŜŝŞş', 'OoŒœRrRrRrSsSsSs'], | |
55 ['ŠšŢţŤťŦŧŨũŪūŬŭŮů', 'SsTtTtTtUuUuUuUu'], | |
56 ['ŰűŲųŴŵŶŷŸŹźŻżŽž', 'UuUuWwYyYZzZzZz'], | |
57 | |
58 // Test all characters in the Unicode range 0x01CD to 0x024F. | |
59 ['ǍǎǏ', 'AaI'], | |
60 ['ǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟ', 'iOoUuUuUuUuUuǝAa'], | |
61 ['ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯ', 'AaǢǣGgGgKkOoOoǮǯ'], | |
62 ['ǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿ', 'jDZDzdzGgǶǷNnAaǼǽOo'], | |
63 ['ȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏ', 'AaAaEeEeIiIiOoOo'], | |
64 ['ȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟ', 'RrRrUuUuSsTtȜȝHh'], | |
65 ['ȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯ', 'ȠȡȢȣZzAaEeOoOoOo'], | |
66 ['ȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ', 'OoYylntjȸȹACcLTs'], | |
67 ['ɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏ', 'zɁɂBUɅEeJjQqRrYy'], | |
68 ]; | |
69 } | |
70 | |
71 /** | |
72 * Tests the PhpTransliteration class. | |
73 * | |
74 * @param string $langcode | |
75 * The language code to test. | |
76 * @param string $original | |
77 * The original string. | |
78 * @param string $expected | |
79 * The expected return from PhpTransliteration::transliterate(). | |
80 * @param string $unknown_character | |
81 * (optional) The character to substitute for characters in $string without | |
82 * transliterated equivalents. Defaults to '?'. | |
83 * @param int $max_length | |
84 * (optional) If provided, return at most this many characters, ensuring | |
85 * that the transliteration does not split in the middle of an input | |
86 * character's transliteration. | |
87 * | |
88 * @dataProvider providerTestPhpTransliteration | |
89 */ | |
90 public function testPhpTransliteration($langcode, $original, $expected, $unknown_character = '?', $max_length = NULL) { | |
91 $transliterator_class = new PhpTransliteration(); | |
92 $actual = $transliterator_class->transliterate($original, $langcode, $unknown_character, $max_length); | |
93 $this->assertSame($expected, $actual); | |
94 } | |
95 | |
96 /** | |
97 * Provides data for self::testPhpTransliteration(). | |
98 * | |
99 * @return array | |
100 * An array of arrays, each containing the parameters for | |
101 * self::testPhpTransliteration(). | |
102 */ | |
103 public function providerTestPhpTransliteration() { | |
104 $random_generator = new Random(); | |
105 $random = $random_generator->string(10); | |
106 // Make some strings with two, three, and four-byte characters for testing. | |
107 // Note that the 3-byte character is overridden by the 'kg' language. | |
108 $two_byte = 'Ä Ö Ü Å Ø äöüåøhello'; | |
109 // This is a Cyrrillic character that looks something like a u. See | |
110 // http://www.unicode.org/charts/PDF/U0400.pdf | |
111 $three_byte = html_entity_decode('ц', ENT_NOQUOTES, 'UTF-8'); | |
112 // This is a Canadian Aboriginal character like a triangle. See | |
113 // http://www.unicode.org/charts/PDF/U1400.pdf | |
114 $four_byte = html_entity_decode('ᐑ', ENT_NOQUOTES, 'UTF-8'); | |
115 // These are two Gothic alphabet letters. See | |
116 // http://wikipedia.org/wiki/Gothic_alphabet | |
117 // They are not in our tables, but should at least give us '?' (unknown). | |
118 $five_byte = html_entity_decode('𐌰𐌸', ENT_NOQUOTES, 'UTF-8'); | |
119 | |
120 return [ | |
121 // Each test case is (language code, input, output). | |
122 // Test ASCII in English. | |
123 ['en', $random, $random], | |
124 // Test ASCII in some other language with no overrides. | |
125 ['fr', $random, $random], | |
126 // Test 3 and 4-byte characters in a language without overrides. | |
127 // Note: if the data tables change, these will need to change too! They | |
128 // are set up to test that data table loading works, so values come | |
129 // directly from the data files. | |
130 ['fr', $three_byte, 'c'], | |
131 ['fr', $four_byte, 'wii'], | |
132 // Test 5-byte characters. | |
133 ['en', $five_byte, '??'], | |
134 // Test a language with no overrides. | |
135 ['en', $two_byte, 'A O U A O aouaohello'], | |
136 // Test language overrides provided by core. | |
137 ['de', $two_byte, 'Ae Oe Ue A O aeoeueaohello'], | |
138 ['de', $random, $random], | |
139 ['dk', $two_byte, 'A O U Aa Oe aouaaoehello'], | |
140 ['dk', $random, $random], | |
141 ['kg', $three_byte, 'ts'], | |
142 // Test strings in some other languages. | |
143 // Turkish, provided by drupal.org user Kartagis. | |
144 ['tr', 'Abayı serdiler bize. Söyleyeceğim yüzlerine. Sanırım hepimiz aynı şeyi düşünüyoruz.', 'Abayi serdiler bize. Soyleyecegim yuzlerine. Sanirim hepimiz ayni seyi dusunuyoruz.'], | |
145 // Illegal/unknown unicode. | |
146 ['en', chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), '?'], | |
147 // Max length. | |
148 ['de', $two_byte, 'Ae Oe', '?', 5], | |
149 ]; | |
150 } | |
151 | |
152 /** | |
153 * Tests the transliteration with max length. | |
154 */ | |
155 public function testTransliterationWithMaxLength() { | |
156 $transliteration = new PhpTransliteration(); | |
157 | |
158 // Test with max length, using German. It should never split up the | |
159 // transliteration of a single character. | |
160 $input = 'Ä Ö Ü Å Ø äöüåøhello'; | |
161 $trunc_output = 'Ae Oe Ue A O aeoe'; | |
162 | |
163 $this->assertSame($trunc_output, $transliteration->transliterate($input, 'de', '?', 17), 'Truncating to 17 characters works'); | |
164 $this->assertSame($trunc_output, $transliteration->transliterate($input, 'de', '?', 18), 'Truncating to 18 characters works'); | |
165 } | |
166 | |
167 /** | |
168 * Tests inclusion is safe. | |
169 * | |
170 * @covers ::readLanguageOverrides | |
171 */ | |
172 public function testSafeInclude() { | |
173 // The overrides in the transliteration data directory transliterates 0x82 | |
174 // into "safe" but the overrides one directory higher transliterates the | |
175 // same character into "security hole". So by using "../index" as the | |
176 // language code we can test the ../ is stripped from the langcode. | |
177 vfsStream::setup('transliteration', NULL, [ | |
178 'index.php' => '<?php $overrides = ["../index" => [0x82 => "security hole"]];', | |
179 'dir' => [ | |
180 'index.php' => '<?php $overrides = ["../index" => [0x82 => "safe"]];', | |
181 ], | |
182 ]); | |
183 $transliteration = new PhpTransliteration(vfsStream::url('transliteration/dir')); | |
184 $transliterated = $transliteration->transliterate(chr(0xC2) . chr(0x82), '../index'); | |
185 $this->assertSame($transliterated, 'safe'); | |
186 } | |
187 | |
188 } |