comparison core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 1fec387a4317
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2
3 namespace Drupal\Tests\Component\Utility;
4
5 use Drupal\Component\Utility\Unicode;
6 use PHPUnit\Framework\TestCase;
7
8 /**
9 * Test unicode handling features implemented in Unicode component.
10 *
11 * @group Utility
12 *
13 * @coversDefaultClass \Drupal\Component\Utility\Unicode
14 */
15 class UnicodeTest extends TestCase {
16
17 /**
18 * {@inheritdoc}
19 *
20 * @covers ::check
21 */
22 protected function setUp() {
23 // Initialize unicode component.
24 Unicode::check();
25 }
26
27 /**
28 * Getting and settings the multibyte environment status.
29 *
30 * @dataProvider providerTestStatus
31 * @covers ::getStatus
32 * @covers ::setStatus
33 */
34 public function testStatus($value, $expected, $invalid = FALSE) {
35 if ($invalid) {
36 $this->setExpectedException('InvalidArgumentException');
37 }
38 Unicode::setStatus($value);
39 $this->assertEquals($expected, Unicode::getStatus());
40 }
41
42 /**
43 * Data provider for testStatus().
44 *
45 * @see testStatus()
46 *
47 * @return array
48 * An array containing:
49 * - The status value to set.
50 * - The status value to expect after setting the new value.
51 * - (optional) Boolean indicating invalid status. Defaults to FALSE.
52 */
53 public function providerTestStatus() {
54 return [
55 [Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE],
56 [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
57 [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
58 [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
59 [rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE],
60 [Unicode::STATUS_ERROR, Unicode::STATUS_ERROR],
61 [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
62 ];
63 }
64
65 /**
66 * Tests multibyte encoding and decoding.
67 *
68 * @dataProvider providerTestMimeHeader
69 * @covers ::mimeHeaderEncode
70 * @covers ::mimeHeaderDecode
71 */
72 public function testMimeHeader($value, $encoded) {
73 $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
74 $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
75 }
76
77 /**
78 * Data provider for testMimeHeader().
79 *
80 * @see testMimeHeader()
81 *
82 * @return array
83 * An array containing a string and its encoded value.
84 */
85 public function providerTestMimeHeader() {
86 return [
87 ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
88 // Simple ASCII characters.
89 ['ASCII', 'ASCII'],
90 ];
91 }
92
93 /**
94 * Tests multibyte strtolower.
95 *
96 * @dataProvider providerStrtolower
97 * @covers ::strtolower
98 * @covers ::caseFlip
99 */
100 public function testStrtolower($text, $expected, $multibyte = FALSE) {
101 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
102 Unicode::setStatus($status);
103 $this->assertEquals($expected, Unicode::strtolower($text));
104 }
105
106 /**
107 * Data provider for testStrtolower().
108 *
109 * @see testStrtolower()
110 *
111 * @return array
112 * An array containing a string, its lowercase version and whether it should
113 * be processed as multibyte.
114 */
115 public function providerStrtolower() {
116 $cases = [
117 ['tHe QUIcK bRoWn', 'the quick brown'],
118 ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
119 ];
120 foreach ($cases as $case) {
121 // Test the same string both in multibyte and singlebyte conditions.
122 array_push($case, TRUE);
123 $cases[] = $case;
124 }
125 // Add a multibyte string.
126 $cases[] = ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE];
127 return $cases;
128 }
129
130 /**
131 * Tests multibyte strtoupper.
132 *
133 * @dataProvider providerStrtoupper
134 * @covers ::strtoupper
135 * @covers ::caseFlip
136 */
137 public function testStrtoupper($text, $expected, $multibyte = FALSE) {
138 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
139 Unicode::setStatus($status);
140 $this->assertEquals($expected, Unicode::strtoupper($text));
141 }
142
143 /**
144 * Data provider for testStrtoupper().
145 *
146 * @see testStrtoupper()
147 *
148 * @return array
149 * An array containing a string, its uppercase version and whether it should
150 * be processed as multibyte.
151 */
152 public function providerStrtoupper() {
153 $cases = [
154 ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
155 ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
156 ];
157 foreach ($cases as $case) {
158 // Test the same string both in multibyte and singlebyte conditions.
159 array_push($case, TRUE);
160 $cases[] = $case;
161 }
162 // Add a multibyte string.
163 $cases[] = ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE];
164 return $cases;
165 }
166
167 /**
168 * Tests multibyte ucfirst.
169 *
170 * @dataProvider providerUcfirst
171 * @covers ::ucfirst
172 */
173 public function testUcfirst($text, $expected) {
174 $this->assertEquals($expected, Unicode::ucfirst($text));
175 }
176
177 /**
178 * Data provider for testUcfirst().
179 *
180 * @see testUcfirst()
181 *
182 * @return array
183 * An array containing a string and its uppercase first version.
184 */
185 public function providerUcfirst() {
186 return [
187 ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
188 ['françAIS', 'FrançAIS'],
189 ['über', 'Über'],
190 ['åwesome', 'Åwesome'],
191 // A multibyte string.
192 ['σion', 'Σion'],
193 ];
194 }
195
196 /**
197 * Tests multibyte lcfirst.
198 *
199 * @dataProvider providerLcfirst
200 * @covers ::lcfirst
201 */
202 public function testLcfirst($text, $expected, $multibyte = FALSE) {
203 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
204 Unicode::setStatus($status);
205 $this->assertEquals($expected, Unicode::lcfirst($text));
206 }
207
208 /**
209 * Data provider for testLcfirst().
210 *
211 * @see testLcfirst()
212 *
213 * @return array
214 * An array containing a string, its lowercase version and whether it should
215 * be processed as multibyte.
216 */
217 public function providerLcfirst() {
218 return [
219 ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
220 ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
221 ['Über', 'über'],
222 ['Åwesome', 'åwesome'],
223 // Add a multibyte string.
224 ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE],
225 ];
226 }
227
228 /**
229 * Tests multibyte ucwords.
230 *
231 * @dataProvider providerUcwords
232 * @covers ::ucwords
233 */
234 public function testUcwords($text, $expected, $multibyte = FALSE) {
235 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
236 Unicode::setStatus($status);
237 $this->assertEquals($expected, Unicode::ucwords($text));
238 }
239
240 /**
241 * Data provider for testUcwords().
242 *
243 * @see testUcwords()
244 *
245 * @return array
246 * An array containing a string, its capitalized version and whether it should
247 * be processed as multibyte.
248 */
249 public function providerUcwords() {
250 return [
251 ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
252 ['françAIS', 'FrançAIS'],
253 ['über', 'Über'],
254 ['åwesome', 'Åwesome'],
255 // Make sure we don't mangle extra spaces.
256 ['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
257 // Add a multibyte string.
258 ['σion', 'Σion', TRUE],
259 ];
260 }
261
262 /**
263 * Tests multibyte strlen.
264 *
265 * @dataProvider providerStrlen
266 * @covers ::strlen
267 */
268 public function testStrlen($text, $expected) {
269 // Run through multibyte code path.
270 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
271 $this->assertEquals($expected, Unicode::strlen($text));
272 // Run through singlebyte code path.
273 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
274 $this->assertEquals($expected, Unicode::strlen($text));
275 }
276
277 /**
278 * Data provider for testStrlen().
279 *
280 * @see testStrlen()
281 *
282 * @return array
283 * An array containing a string and its length.
284 */
285 public function providerStrlen() {
286 return [
287 ['tHe QUIcK bRoWn', 15],
288 ['ÜBER-åwesome', 12],
289 ['以呂波耳・ほへとち。リヌルヲ。', 15],
290 ];
291 }
292
293 /**
294 * Tests multibyte substr.
295 *
296 * @dataProvider providerSubstr
297 * @covers ::substr
298 */
299 public function testSubstr($text, $start, $length, $expected) {
300 // Run through multibyte code path.
301 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
302 $this->assertEquals($expected, Unicode::substr($text, $start, $length));
303 // Run through singlebyte code path.
304 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
305 $this->assertEquals($expected, Unicode::substr($text, $start, $length));
306 }
307
308 /**
309 * Data provider for testSubstr().
310 *
311 * @see testSubstr()
312 *
313 * @return array
314 * An array containing:
315 * - The string to test.
316 * - The start number to be processed by substr.
317 * - The length number to be processed by substr.
318 * - The expected string result.
319 */
320 public function providerSubstr() {
321 return [
322 ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
323 ['frànçAIS is über-åwesome', 0, 0, ''],
324 ['frànçAIS is über-åwesome', 0, 1, 'f'],
325 ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
326 ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
327 ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
328 ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
329 ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
330 ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
331 ['frànçAIS is über-åwesome', 1, 0, ''],
332 ['frànçAIS is über-åwesome', 100, 0, ''],
333 ['frànçAIS is über-åwesome', -4, 2, 'so'],
334 ['frànçAIS is über-åwesome', -4, 3, 'som'],
335 ['frànçAIS is über-åwesome', -4, 4, 'some'],
336 ['frànçAIS is über-åwesome', -4, 5, 'some'],
337 ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
338 ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
339 ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
340 ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
341 ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
342 ['frànçAIS is über-åwesome', -7, -6, 'å'],
343 ['frànçAIS is über-åwesome', -7, -7, ''],
344 ['frànçAIS is über-åwesome', -7, -8, ''],
345 ['...', 0, 2, '..'],
346 ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
347 ];
348 }
349
350 /**
351 * Tests multibyte truncate.
352 *
353 * @dataProvider providerTruncate
354 * @covers ::truncate
355 */
356 public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
357 $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
358 }
359
360 /**
361 * Data provider for testTruncate().
362 *
363 * @see testTruncate()
364 *
365 * @return array
366 * An array containing:
367 * - The string to test.
368 * - The max length to truncate this string to.
369 * - The expected string result.
370 * - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
371 * - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
372 */
373 public function providerTruncate() {
374 return [
375 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
376 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
377 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
378 ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
379 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
380 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
381 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
382 ['123', 1, '…', TRUE, TRUE],
383 ['123', 2, '1…', TRUE, TRUE],
384 ['123', 3, '123', TRUE, TRUE],
385 ['1234', 3, '12…', TRUE, TRUE],
386 ['1234567890', 10, '1234567890', TRUE, TRUE],
387 ['12345678901', 10, '123456789…', TRUE, TRUE],
388 ['12345678901', 11, '12345678901', TRUE, TRUE],
389 ['123456789012', 11, '1234567890…', TRUE, TRUE],
390 ['12345 7890', 10, '12345 7890', TRUE, TRUE],
391 ['12345 7890', 9, '12345…', TRUE, TRUE],
392 ['123 567 90', 10, '123 567 90', TRUE, TRUE],
393 ['123 567 901', 10, '123 567…', TRUE, TRUE],
394 ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
395 ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
396 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
397 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
398 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
399 ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
400 ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
401 ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
402 ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
403 ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
404 ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
405 ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
406 ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
407 ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
408 ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
409 ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
410 ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
411 ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
412 ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
413 ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
414 ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
415 ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
416 ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
417 ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
418 ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
419 ];
420 }
421
422 /**
423 * Tests multibyte truncate bytes.
424 *
425 * @dataProvider providerTestTruncateBytes
426 * @covers ::truncateBytes
427 *
428 * @param string $text
429 * The string to truncate.
430 * @param int $max_length
431 * The upper limit on the returned string length.
432 * @param string $expected
433 * The expected return from Unicode::truncateBytes().
434 */
435 public function testTruncateBytes($text, $max_length, $expected) {
436 $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
437 }
438
439 /**
440 * Provides data for self::testTruncateBytes().
441 *
442 * @return array
443 * An array of arrays, each containing the parameters to
444 * self::testTruncateBytes().
445 */
446 public function providerTestTruncateBytes() {
447 return [
448 // String shorter than max length.
449 ['Short string', 42, 'Short string'],
450 // Simple string longer than max length.
451 ['Longer string than previous.', 10, 'Longer str'],
452 // Unicode.
453 ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
454 ];
455 }
456
457 /**
458 * Tests UTF-8 validation.
459 *
460 * @dataProvider providerTestValidateUtf8
461 * @covers ::validateUtf8
462 *
463 * @param string $text
464 * The text to validate.
465 * @param bool $expected
466 * The expected return value from Unicode::validateUtf8().
467 * @param string $message
468 * The message to display on failure.
469 */
470 public function testValidateUtf8($text, $expected, $message) {
471 $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
472 }
473
474 /**
475 * Provides data for self::testValidateUtf8().
476 *
477 * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
478 *
479 * @return array
480 * An array of arrays, each containing the parameters for
481 * self::testValidateUtf8().
482 */
483 public function providerTestValidateUtf8() {
484 return [
485 // Empty string.
486 ['', TRUE, 'An empty string did not validate.'],
487 // Simple text string.
488 ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
489 // Invalid UTF-8, overlong 5 byte encoding.
490 [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
491 // High code-point without trailing characters.
492 [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
493 ];
494 }
495
496 /**
497 * Tests UTF-8 conversion.
498 *
499 * @dataProvider providerTestConvertToUtf8
500 * @covers ::convertToUtf8
501 *
502 * @param string $data
503 * The data to be converted.
504 * @param string $encoding
505 * The encoding the data is in.
506 * @param string|bool $expected
507 * The expected result.
508 */
509 public function testConvertToUtf8($data, $encoding, $expected) {
510 $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
511 }
512
513 /**
514 * Provides data to self::testConvertToUtf8().
515 *
516 * @return array
517 * An array of arrays, each containing the parameters to
518 * self::testConvertUtf8(). }
519 */
520 public function providerTestConvertToUtf8() {
521 return [
522 [chr(0x97), 'Windows-1252', '—'],
523 [chr(0x99), 'Windows-1252', '™'],
524 [chr(0x80), 'Windows-1252', '€'],
525 ];
526 }
527
528 /**
529 * Tests multibyte strpos.
530 *
531 * @dataProvider providerStrpos
532 * @covers ::strpos
533 */
534 public function testStrpos($haystack, $needle, $offset, $expected) {
535 // Run through multibyte code path.
536 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
537 $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
538 // Run through singlebyte code path.
539 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
540 $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
541 }
542
543 /**
544 * Data provider for testStrpos().
545 *
546 * @see testStrpos()
547 *
548 * @return array
549 * An array containing:
550 * - The haystack string to be searched in.
551 * - The needle string to search for.
552 * - The offset integer to start at.
553 * - The expected integer/FALSE result.
554 */
555 public function providerStrpos() {
556 return [
557 ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
558 ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
559 ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
560 ['frànçAIS is über-åwesome', 'r', 0, 1],
561 ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
562 ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
563 ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
564 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
565 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
566 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],
567 ];
568 }
569
570 }