comparison core/modules/search/tests/src/Functional/SearchSimplifyTest.php @ 0:c75dbcec494b

Initial commit from drush-created site
author Chris Cannam
date Thu, 05 Jul 2018 14:24:15 +0000
parents
children a9cd425dd02b
comparison
equal deleted inserted replaced
-1:000000000000 0:c75dbcec494b
1 <?php
2
3 namespace Drupal\Tests\search\Functional;
4
5 use Drupal\Component\Utility\Unicode;
6
7 /**
8 * Tests that the search_simply() function works as intended.
9 *
10 * @group search
11 */
12 class SearchSimplifyTest extends SearchTestBase {
13 /**
14 * Tests that all Unicode characters simplify correctly.
15 */
16 public function testSearchSimplifyUnicode() {
17 // This test uses a file that was constructed so that the even lines are
18 // boundary characters, and the odd lines are valid word characters. (It
19 // was generated as a sequence of all the Unicode characters, and then the
20 // boundary characters (punctuation, spaces, etc.) were split off into
21 // their own lines). So the even-numbered lines should simplify to nothing,
22 // and the odd-numbered lines we need to split into shorter chunks and
23 // verify that simplification doesn't lose any characters.
24 $input = file_get_contents(\Drupal::root() . '/core/modules/search/tests/UnicodeTest.txt');
25 $basestrings = explode(chr(10), $input);
26 $strings = [];
27 foreach ($basestrings as $key => $string) {
28 if ($key % 2) {
29 // Even line - should simplify down to a space.
30 $simplified = search_simplify($string);
31 $this->assertIdentical($simplified, ' ', "Line $key is excluded from the index");
32 }
33 else {
34 // Odd line, should be word characters.
35 // Split this into 30-character chunks, so we don't run into limits
36 // of truncation in search_simplify().
37 $start = 0;
38 while ($start < Unicode::strlen($string)) {
39 $newstr = Unicode::substr($string, $start, 30);
40 // Special case: leading zeros are removed from numeric strings,
41 // and there's one string in this file that is numbers starting with
42 // zero, so prepend a 1 on that string.
43 if (preg_match('/^[0-9]+$/', $newstr)) {
44 $newstr = '1' . $newstr;
45 }
46 $strings[] = $newstr;
47 $start += 30;
48 }
49 }
50 }
51 foreach ($strings as $key => $string) {
52 $simplified = search_simplify($string);
53 $this->assertTrue(Unicode::strlen($simplified) >= Unicode::strlen($string), "Nothing is removed from string $key.");
54 }
55
56 // Test the low-numbered ASCII control characters separately. They are not
57 // in the text file because they are problematic for diff, especially \0.
58 $string = '';
59 for ($i = 0; $i < 32; $i++) {
60 $string .= chr($i);
61 }
62 $this->assertIdentical(' ', search_simplify($string), 'Search simplify works for ASCII control characters.');
63 }
64
65 /**
66 * Tests that search_simplify() does the right thing with punctuation.
67 */
68 public function testSearchSimplifyPunctuation() {
69 $cases = [
70 ['20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'],
71 ['great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'],
72 ['very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'],
73 ['regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'],
74 ];
75
76 foreach ($cases as $case) {
77 $out = trim(search_simplify($case[0]));
78 $this->assertEqual($out, $case[1], $case[2]);
79 }
80 }
81
82 }