annotate core/modules/search/tests/src/Functional/SearchSimplifyTest.php @ 4:a9cd425dd02b

Update, including to Drupal core 8.6.10
author Chris Cannam
date Thu, 28 Feb 2019 13:11:55 +0000
parents c75dbcec494b
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 namespace Drupal\Tests\search\Functional;
Chris@0 4
Chris@4 5 use Drupal\Tests\BrowserTestBase;
Chris@0 6
Chris@0 7 /**
Chris@0 8 * Tests that the search_simply() function works as intended.
Chris@0 9 *
Chris@0 10 * @group search
Chris@0 11 */
Chris@4 12 class SearchSimplifyTest extends BrowserTestBase {
Chris@4 13
Chris@4 14 /**
Chris@4 15 * {@inheritdoc}
Chris@4 16 */
Chris@4 17 protected static $modules = ['search'];
Chris@4 18
Chris@0 19 /**
Chris@0 20 * Tests that all Unicode characters simplify correctly.
Chris@0 21 */
Chris@0 22 public function testSearchSimplifyUnicode() {
Chris@0 23 // This test uses a file that was constructed so that the even lines are
Chris@0 24 // boundary characters, and the odd lines are valid word characters. (It
Chris@0 25 // was generated as a sequence of all the Unicode characters, and then the
Chris@0 26 // boundary characters (punctuation, spaces, etc.) were split off into
Chris@0 27 // their own lines). So the even-numbered lines should simplify to nothing,
Chris@0 28 // and the odd-numbered lines we need to split into shorter chunks and
Chris@0 29 // verify that simplification doesn't lose any characters.
Chris@4 30 $input = file_get_contents($this->root . '/core/modules/search/tests/UnicodeTest.txt');
Chris@0 31 $basestrings = explode(chr(10), $input);
Chris@0 32 $strings = [];
Chris@0 33 foreach ($basestrings as $key => $string) {
Chris@0 34 if ($key % 2) {
Chris@0 35 // Even line - should simplify down to a space.
Chris@0 36 $simplified = search_simplify($string);
Chris@0 37 $this->assertIdentical($simplified, ' ', "Line $key is excluded from the index");
Chris@0 38 }
Chris@0 39 else {
Chris@0 40 // Odd line, should be word characters.
Chris@0 41 // Split this into 30-character chunks, so we don't run into limits
Chris@0 42 // of truncation in search_simplify().
Chris@0 43 $start = 0;
Chris@4 44 while ($start < mb_strlen($string)) {
Chris@4 45 $newstr = mb_substr($string, $start, 30);
Chris@0 46 // Special case: leading zeros are removed from numeric strings,
Chris@0 47 // and there's one string in this file that is numbers starting with
Chris@0 48 // zero, so prepend a 1 on that string.
Chris@0 49 if (preg_match('/^[0-9]+$/', $newstr)) {
Chris@0 50 $newstr = '1' . $newstr;
Chris@0 51 }
Chris@0 52 $strings[] = $newstr;
Chris@0 53 $start += 30;
Chris@0 54 }
Chris@0 55 }
Chris@0 56 }
Chris@0 57 foreach ($strings as $key => $string) {
Chris@0 58 $simplified = search_simplify($string);
Chris@4 59 $this->assertTrue(mb_strlen($simplified) >= mb_strlen($string), "Nothing is removed from string $key.");
Chris@0 60 }
Chris@0 61
Chris@0 62 // Test the low-numbered ASCII control characters separately. They are not
Chris@0 63 // in the text file because they are problematic for diff, especially \0.
Chris@0 64 $string = '';
Chris@0 65 for ($i = 0; $i < 32; $i++) {
Chris@0 66 $string .= chr($i);
Chris@0 67 }
Chris@0 68 $this->assertIdentical(' ', search_simplify($string), 'Search simplify works for ASCII control characters.');
Chris@0 69 }
Chris@0 70
Chris@0 71 /**
Chris@0 72 * Tests that search_simplify() does the right thing with punctuation.
Chris@0 73 */
Chris@0 74 public function testSearchSimplifyPunctuation() {
Chris@0 75 $cases = [
Chris@0 76 ['20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'],
Chris@0 77 ['great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'],
Chris@0 78 ['very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'],
Chris@0 79 ['regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'],
Chris@0 80 ];
Chris@0 81
Chris@0 82 foreach ($cases as $case) {
Chris@0 83 $out = trim(search_simplify($case[0]));
Chris@0 84 $this->assertEqual($out, $case[1], $case[2]);
Chris@0 85 }
Chris@0 86 }
Chris@0 87
Chris@0 88 }