diff core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 1fec387a4317
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core/tests/Drupal/Tests/Component/Utility/UnicodeTest.php	Wed Nov 29 16:09:58 2017 +0000
@@ -0,0 +1,570 @@
+<?php
+
+namespace Drupal\Tests\Component\Utility;
+
+use Drupal\Component\Utility\Unicode;
+use PHPUnit\Framework\TestCase;
+
+/**
+ * Test unicode handling features implemented in Unicode component.
+ *
+ * @group Utility
+ *
+ * @coversDefaultClass \Drupal\Component\Utility\Unicode
+ */
+class UnicodeTest extends TestCase {
+
+  /**
+   * {@inheritdoc}
+   *
+   * @covers ::check
+   */
+  protected function setUp() {
+    // Initialize unicode component.
+    Unicode::check();
+  }
+
+  /**
+   * Getting and settings the multibyte environment status.
+   *
+   * @dataProvider providerTestStatus
+   * @covers ::getStatus
+   * @covers ::setStatus
+   */
+  public function testStatus($value, $expected, $invalid = FALSE) {
+    if ($invalid) {
+      $this->setExpectedException('InvalidArgumentException');
+    }
+    Unicode::setStatus($value);
+    $this->assertEquals($expected, Unicode::getStatus());
+  }
+
+  /**
+   * Data provider for testStatus().
+   *
+   * @see testStatus()
+   *
+   * @return array
+   *   An array containing:
+   *     - The status value to set.
+   *     - The status value to expect after setting the new value.
+   *     - (optional) Boolean indicating invalid status. Defaults to FALSE.
+   */
+  public function providerTestStatus() {
+    return [
+      [Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE],
+      [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
+      [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
+      [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
+      [rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE],
+      [Unicode::STATUS_ERROR, Unicode::STATUS_ERROR],
+      [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
+    ];
+  }
+
+  /**
+   * Tests multibyte encoding and decoding.
+   *
+   * @dataProvider providerTestMimeHeader
+   * @covers ::mimeHeaderEncode
+   * @covers ::mimeHeaderDecode
+   */
+  public function testMimeHeader($value, $encoded) {
+    $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
+    $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
+  }
+
+  /**
+   * Data provider for testMimeHeader().
+   *
+   * @see testMimeHeader()
+   *
+   * @return array
+   *   An array containing a string and its encoded value.
+   */
+  public function providerTestMimeHeader() {
+    return [
+      ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
+      // Simple ASCII characters.
+      ['ASCII', 'ASCII'],
+    ];
+  }
+
+  /**
+   * Tests multibyte strtolower.
+   *
+   * @dataProvider providerStrtolower
+   * @covers ::strtolower
+   * @covers ::caseFlip
+   */
+  public function testStrtolower($text, $expected, $multibyte = FALSE) {
+    $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
+    Unicode::setStatus($status);
+    $this->assertEquals($expected, Unicode::strtolower($text));
+  }
+
+  /**
+   * Data provider for testStrtolower().
+   *
+   * @see testStrtolower()
+   *
+   * @return array
+   *   An array containing a string, its lowercase version and whether it should
+   *   be processed as multibyte.
+   */
+  public function providerStrtolower() {
+    $cases = [
+      ['tHe QUIcK bRoWn', 'the quick brown'],
+      ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
+    ];
+    foreach ($cases as $case) {
+      // Test the same string both in multibyte and singlebyte conditions.
+      array_push($case, TRUE);
+      $cases[] = $case;
+    }
+    // Add a multibyte string.
+    $cases[] = ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE];
+    return $cases;
+  }
+
+  /**
+   * Tests multibyte strtoupper.
+   *
+   * @dataProvider providerStrtoupper
+   * @covers ::strtoupper
+   * @covers ::caseFlip
+   */
+  public function testStrtoupper($text, $expected, $multibyte = FALSE) {
+    $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
+    Unicode::setStatus($status);
+    $this->assertEquals($expected, Unicode::strtoupper($text));
+  }
+
+  /**
+   * Data provider for testStrtoupper().
+   *
+   * @see testStrtoupper()
+   *
+   * @return array
+   *   An array containing a string, its uppercase version and whether it should
+   *   be processed as multibyte.
+   */
+  public function providerStrtoupper() {
+    $cases = [
+      ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
+      ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
+    ];
+    foreach ($cases as $case) {
+      // Test the same string both in multibyte and singlebyte conditions.
+      array_push($case, TRUE);
+      $cases[] = $case;
+    }
+    // Add a multibyte string.
+    $cases[] = ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE];
+    return $cases;
+  }
+
+  /**
+   * Tests multibyte ucfirst.
+   *
+   * @dataProvider providerUcfirst
+   * @covers ::ucfirst
+   */
+  public function testUcfirst($text, $expected) {
+    $this->assertEquals($expected, Unicode::ucfirst($text));
+  }
+
+  /**
+   * Data provider for testUcfirst().
+   *
+   * @see testUcfirst()
+   *
+   * @return array
+   *   An array containing a string and its uppercase first version.
+   */
+  public function providerUcfirst() {
+    return [
+      ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
+      ['françAIS', 'FrançAIS'],
+      ['über', 'Über'],
+      ['åwesome', 'Åwesome'],
+      // A multibyte string.
+      ['σion', 'Σion'],
+    ];
+  }
+
+  /**
+   * Tests multibyte lcfirst.
+   *
+   * @dataProvider providerLcfirst
+   * @covers ::lcfirst
+   */
+  public function testLcfirst($text, $expected, $multibyte = FALSE) {
+    $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
+    Unicode::setStatus($status);
+    $this->assertEquals($expected, Unicode::lcfirst($text));
+  }
+
+  /**
+   * Data provider for testLcfirst().
+   *
+   * @see testLcfirst()
+   *
+   * @return array
+   *   An array containing a string, its lowercase version and whether it should
+   *   be processed as multibyte.
+   */
+  public function providerLcfirst() {
+    return [
+      ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
+      ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
+      ['Über', 'über'],
+      ['Åwesome', 'åwesome'],
+      // Add a multibyte string.
+      ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE],
+    ];
+  }
+
+  /**
+   * Tests multibyte ucwords.
+   *
+   * @dataProvider providerUcwords
+   * @covers ::ucwords
+   */
+  public function testUcwords($text, $expected, $multibyte = FALSE) {
+    $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
+    Unicode::setStatus($status);
+    $this->assertEquals($expected, Unicode::ucwords($text));
+  }
+
+  /**
+   * Data provider for testUcwords().
+   *
+   * @see testUcwords()
+   *
+   * @return array
+   *   An array containing a string, its capitalized version and whether it should
+   *   be processed as multibyte.
+   */
+  public function providerUcwords() {
+    return [
+      ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
+      ['françAIS', 'FrançAIS'],
+      ['über', 'Über'],
+      ['åwesome', 'Åwesome'],
+      // Make sure we don't mangle extra spaces.
+      ['frànçAIS is  über-åwesome', 'FrànçAIS Is  Über-Åwesome'],
+      // Add a multibyte string.
+      ['σion', 'Σion', TRUE],
+    ];
+  }
+
+  /**
+   * Tests multibyte strlen.
+   *
+   * @dataProvider providerStrlen
+   * @covers ::strlen
+   */
+  public function testStrlen($text, $expected) {
+    // Run through multibyte code path.
+    Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
+    $this->assertEquals($expected, Unicode::strlen($text));
+    // Run through singlebyte code path.
+    Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
+    $this->assertEquals($expected, Unicode::strlen($text));
+  }
+
+  /**
+   * Data provider for testStrlen().
+   *
+   * @see testStrlen()
+   *
+   * @return array
+   *   An array containing a string and its length.
+   */
+  public function providerStrlen() {
+    return [
+      ['tHe QUIcK bRoWn', 15],
+      ['ÜBER-åwesome', 12],
+      ['以呂波耳・ほへとち。リヌルヲ。', 15],
+    ];
+  }
+
+  /**
+   * Tests multibyte substr.
+   *
+   * @dataProvider providerSubstr
+   * @covers ::substr
+   */
+  public function testSubstr($text, $start, $length, $expected) {
+    // Run through multibyte code path.
+    Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
+    $this->assertEquals($expected, Unicode::substr($text, $start, $length));
+    // Run through singlebyte code path.
+    Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
+    $this->assertEquals($expected, Unicode::substr($text, $start, $length));
+  }
+
+  /**
+   * Data provider for testSubstr().
+   *
+   * @see testSubstr()
+   *
+   * @return array
+   *   An array containing:
+   *     - The string to test.
+   *     - The start number to be processed by substr.
+   *     - The length number to be processed by substr.
+   *     - The expected string result.
+   */
+  public function providerSubstr() {
+    return [
+      ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
+      ['frànçAIS is über-åwesome', 0, 0, ''],
+      ['frànçAIS is über-åwesome', 0, 1, 'f'],
+      ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
+      ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
+      ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
+      ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
+      ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
+      ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
+      ['frànçAIS is über-åwesome', 1, 0, ''],
+      ['frànçAIS is über-åwesome', 100, 0, ''],
+      ['frànçAIS is über-åwesome', -4, 2, 'so'],
+      ['frànçAIS is über-åwesome', -4, 3, 'som'],
+      ['frànçAIS is über-åwesome', -4, 4, 'some'],
+      ['frànçAIS is über-åwesome', -4, 5, 'some'],
+      ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
+      ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
+      ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
+      ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
+      ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
+      ['frànçAIS is über-åwesome', -7, -6, 'å'],
+      ['frànçAIS is über-åwesome', -7, -7, ''],
+      ['frànçAIS is über-åwesome', -7, -8, ''],
+      ['...', 0, 2, '..'],
+      ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
+    ];
+  }
+
+  /**
+   * Tests multibyte truncate.
+   *
+   * @dataProvider providerTruncate
+   * @covers ::truncate
+   */
+  public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
+    $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
+  }
+
+  /**
+   * Data provider for testTruncate().
+   *
+   * @see testTruncate()
+   *
+   * @return array
+   *   An array containing:
+   *     - The string to test.
+   *     - The max length to truncate this string to.
+   *     - The expected string result.
+   *     - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
+   *     - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
+   */
+  public function providerTruncate() {
+    return [
+      ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
+      ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
+      ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
+      ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
+      ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
+      ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
+      ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
+      ['123', 1, '…', TRUE, TRUE],
+      ['123', 2, '1…', TRUE, TRUE],
+      ['123', 3, '123', TRUE, TRUE],
+      ['1234', 3, '12…', TRUE, TRUE],
+      ['1234567890', 10, '1234567890', TRUE, TRUE],
+      ['12345678901', 10, '123456789…', TRUE, TRUE],
+      ['12345678901', 11, '12345678901', TRUE, TRUE],
+      ['123456789012', 11, '1234567890…', TRUE, TRUE],
+      ['12345 7890', 10, '12345 7890', TRUE, TRUE],
+      ['12345 7890', 9, '12345…', TRUE, TRUE],
+      ['123 567 90', 10, '123 567 90', TRUE, TRUE],
+      ['123 567 901', 10, '123 567…', TRUE, TRUE],
+      ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
+      ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
+      ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
+      ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
+      ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
+      ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
+      ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
+      ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
+      ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
+      ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
+      ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
+      ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
+      ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
+      ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
+      ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
+      ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
+    ];
+  }
+
+  /**
+   * Tests multibyte truncate bytes.
+   *
+   * @dataProvider providerTestTruncateBytes
+   * @covers ::truncateBytes
+   *
+   * @param string $text
+   *   The string to truncate.
+   * @param int $max_length
+   *   The upper limit on the returned string length.
+   * @param string $expected
+   *   The expected return from Unicode::truncateBytes().
+   */
+  public function testTruncateBytes($text, $max_length, $expected) {
+    $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
+  }
+
+  /**
+   * Provides data for self::testTruncateBytes().
+   *
+   * @return array
+   *   An array of arrays, each containing the parameters to
+   *   self::testTruncateBytes().
+   */
+  public function providerTestTruncateBytes() {
+    return [
+      // String shorter than max length.
+      ['Short string', 42, 'Short string'],
+      // Simple string longer than max length.
+      ['Longer string than previous.', 10, 'Longer str'],
+      // Unicode.
+      ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
+    ];
+  }
+
+  /**
+   * Tests UTF-8 validation.
+   *
+   * @dataProvider providerTestValidateUtf8
+   * @covers ::validateUtf8
+   *
+   * @param string $text
+   *   The text to validate.
+   * @param bool $expected
+   *   The expected return value from Unicode::validateUtf8().
+   * @param string $message
+   *   The message to display on failure.
+   */
+  public function testValidateUtf8($text, $expected, $message) {
+    $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
+  }
+
+  /**
+   * Provides data for self::testValidateUtf8().
+   *
+   * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
+   *
+   * @return array
+   *   An array of arrays, each containing the parameters for
+   *   self::testValidateUtf8().
+   */
+  public function providerTestValidateUtf8() {
+    return [
+      // Empty string.
+      ['', TRUE, 'An empty string did not validate.'],
+      // Simple text string.
+      ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
+      // Invalid UTF-8, overlong 5 byte encoding.
+      [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
+      // High code-point without trailing characters.
+      [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
+    ];
+  }
+
+  /**
+   * Tests UTF-8 conversion.
+   *
+   * @dataProvider providerTestConvertToUtf8
+   * @covers ::convertToUtf8
+   *
+   * @param string $data
+   *   The data to be converted.
+   * @param string $encoding
+   *   The encoding the data is in.
+   * @param string|bool $expected
+   *   The expected result.
+   */
+  public function testConvertToUtf8($data, $encoding, $expected) {
+    $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
+  }
+
+  /**
+   * Provides data to self::testConvertToUtf8().
+   *
+   * @return array
+   *   An array of arrays, each containing the parameters to
+   *   self::testConvertUtf8().  }
+   */
+  public function providerTestConvertToUtf8() {
+    return [
+      [chr(0x97), 'Windows-1252', '—'],
+      [chr(0x99), 'Windows-1252', '™'],
+      [chr(0x80), 'Windows-1252', '€'],
+    ];
+  }
+
+  /**
+   * Tests multibyte strpos.
+   *
+   * @dataProvider providerStrpos
+   * @covers ::strpos
+   */
+  public function testStrpos($haystack, $needle, $offset, $expected) {
+    // Run through multibyte code path.
+    Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
+    $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
+    // Run through singlebyte code path.
+    Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
+    $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
+  }
+
+  /**
+   * Data provider for testStrpos().
+   *
+   * @see testStrpos()
+   *
+   * @return array
+   *   An array containing:
+   *     - The haystack string to be searched in.
+   *     - The needle string to search for.
+   *     - The offset integer to start at.
+   *     - The expected integer/FALSE result.
+   */
+  public function providerStrpos() {
+    return [
+      ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
+      ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
+      ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
+      ['frànçAIS is über-åwesome', 'r', 0, 1],
+      ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
+      ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
+      ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
+      ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
+      ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
+      ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],
+    ];
+  }
+
+}