Mercurial > hg > cmmr2012-drupal-site
diff core/modules/search/tests/src/Functional/SearchSimplifyTest.php @ 0:c75dbcec494b
Initial commit from drush-created site
author | Chris Cannam |
---|---|
date | Thu, 05 Jul 2018 14:24:15 +0000 |
parents | |
children | a9cd425dd02b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/modules/search/tests/src/Functional/SearchSimplifyTest.php Thu Jul 05 14:24:15 2018 +0000 @@ -0,0 +1,82 @@ +<?php + +namespace Drupal\Tests\search\Functional; + +use Drupal\Component\Utility\Unicode; + +/** + * Tests that the search_simply() function works as intended. + * + * @group search + */ +class SearchSimplifyTest extends SearchTestBase { + /** + * Tests that all Unicode characters simplify correctly. + */ + public function testSearchSimplifyUnicode() { + // This test uses a file that was constructed so that the even lines are + // boundary characters, and the odd lines are valid word characters. (It + // was generated as a sequence of all the Unicode characters, and then the + // boundary characters (punctuation, spaces, etc.) were split off into + // their own lines). So the even-numbered lines should simplify to nothing, + // and the odd-numbered lines we need to split into shorter chunks and + // verify that simplification doesn't lose any characters. + $input = file_get_contents(\Drupal::root() . '/core/modules/search/tests/UnicodeTest.txt'); + $basestrings = explode(chr(10), $input); + $strings = []; + foreach ($basestrings as $key => $string) { + if ($key % 2) { + // Even line - should simplify down to a space. + $simplified = search_simplify($string); + $this->assertIdentical($simplified, ' ', "Line $key is excluded from the index"); + } + else { + // Odd line, should be word characters. + // Split this into 30-character chunks, so we don't run into limits + // of truncation in search_simplify(). + $start = 0; + while ($start < Unicode::strlen($string)) { + $newstr = Unicode::substr($string, $start, 30); + // Special case: leading zeros are removed from numeric strings, + // and there's one string in this file that is numbers starting with + // zero, so prepend a 1 on that string. + if (preg_match('/^[0-9]+$/', $newstr)) { + $newstr = '1' . $newstr; + } + $strings[] = $newstr; + $start += 30; + } + } + } + foreach ($strings as $key => $string) { + $simplified = search_simplify($string); + $this->assertTrue(Unicode::strlen($simplified) >= Unicode::strlen($string), "Nothing is removed from string $key."); + } + + // Test the low-numbered ASCII control characters separately. They are not + // in the text file because they are problematic for diff, especially \0. + $string = ''; + for ($i = 0; $i < 32; $i++) { + $string .= chr($i); + } + $this->assertIdentical(' ', search_simplify($string), 'Search simplify works for ASCII control characters.'); + } + + /** + * Tests that search_simplify() does the right thing with punctuation. + */ + public function testSearchSimplifyPunctuation() { + $cases = [ + ['20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'], + ['great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'], + ['very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'], + ['regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'], + ]; + + foreach ($cases as $case) { + $out = trim(search_simplify($case[0])); + $this->assertEqual($out, $case[1], $case[2]); + } + } + +}