Mercurial > hg > cmmr2012-drupal-site
comparison core/modules/search/tests/src/Functional/SearchSimplifyTest.php @ 0:c75dbcec494b
Initial commit from drush-created site
author | Chris Cannam |
---|---|
date | Thu, 05 Jul 2018 14:24:15 +0000 |
parents | |
children | a9cd425dd02b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c75dbcec494b |
---|---|
1 <?php | |
2 | |
3 namespace Drupal\Tests\search\Functional; | |
4 | |
5 use Drupal\Component\Utility\Unicode; | |
6 | |
7 /** | |
8 * Tests that the search_simply() function works as intended. | |
9 * | |
10 * @group search | |
11 */ | |
12 class SearchSimplifyTest extends SearchTestBase { | |
13 /** | |
14 * Tests that all Unicode characters simplify correctly. | |
15 */ | |
16 public function testSearchSimplifyUnicode() { | |
17 // This test uses a file that was constructed so that the even lines are | |
18 // boundary characters, and the odd lines are valid word characters. (It | |
19 // was generated as a sequence of all the Unicode characters, and then the | |
20 // boundary characters (punctuation, spaces, etc.) were split off into | |
21 // their own lines). So the even-numbered lines should simplify to nothing, | |
22 // and the odd-numbered lines we need to split into shorter chunks and | |
23 // verify that simplification doesn't lose any characters. | |
24 $input = file_get_contents(\Drupal::root() . '/core/modules/search/tests/UnicodeTest.txt'); | |
25 $basestrings = explode(chr(10), $input); | |
26 $strings = []; | |
27 foreach ($basestrings as $key => $string) { | |
28 if ($key % 2) { | |
29 // Even line - should simplify down to a space. | |
30 $simplified = search_simplify($string); | |
31 $this->assertIdentical($simplified, ' ', "Line $key is excluded from the index"); | |
32 } | |
33 else { | |
34 // Odd line, should be word characters. | |
35 // Split this into 30-character chunks, so we don't run into limits | |
36 // of truncation in search_simplify(). | |
37 $start = 0; | |
38 while ($start < Unicode::strlen($string)) { | |
39 $newstr = Unicode::substr($string, $start, 30); | |
40 // Special case: leading zeros are removed from numeric strings, | |
41 // and there's one string in this file that is numbers starting with | |
42 // zero, so prepend a 1 on that string. | |
43 if (preg_match('/^[0-9]+$/', $newstr)) { | |
44 $newstr = '1' . $newstr; | |
45 } | |
46 $strings[] = $newstr; | |
47 $start += 30; | |
48 } | |
49 } | |
50 } | |
51 foreach ($strings as $key => $string) { | |
52 $simplified = search_simplify($string); | |
53 $this->assertTrue(Unicode::strlen($simplified) >= Unicode::strlen($string), "Nothing is removed from string $key."); | |
54 } | |
55 | |
56 // Test the low-numbered ASCII control characters separately. They are not | |
57 // in the text file because they are problematic for diff, especially \0. | |
58 $string = ''; | |
59 for ($i = 0; $i < 32; $i++) { | |
60 $string .= chr($i); | |
61 } | |
62 $this->assertIdentical(' ', search_simplify($string), 'Search simplify works for ASCII control characters.'); | |
63 } | |
64 | |
65 /** | |
66 * Tests that search_simplify() does the right thing with punctuation. | |
67 */ | |
68 public function testSearchSimplifyPunctuation() { | |
69 $cases = [ | |
70 ['20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'], | |
71 ['great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'], | |
72 ['very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'], | |
73 ['regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'], | |
74 ]; | |
75 | |
76 foreach ($cases as $case) { | |
77 $out = trim(search_simplify($case[0])); | |
78 $this->assertEqual($out, $case[1], $case[2]); | |
79 } | |
80 } | |
81 | |
82 } |