comparison core/modules/search/tests/src/Kernel/SearchMatchTest.php @ 0:4c8ae668cc8c

Initial import (non-working)
author Chris Cannam
date Wed, 29 Nov 2017 16:09:58 +0000
parents
children 129ea1e6d783
comparison
equal deleted inserted replaced
-1:000000000000 0:4c8ae668cc8c
1 <?php
2
3 namespace Drupal\Tests\search\Kernel;
4
5 use Drupal\Core\Language\LanguageInterface;
6 use Drupal\KernelTests\KernelTestBase;
7
8 /**
9 * Indexes content and queries it.
10 *
11 * @group search
12 */
13 class SearchMatchTest extends KernelTestBase {
14
15 // The search index can contain different types of content. Typically the type
16 // is 'node'. Here we test with _test_ and _test2_ as the type.
17 const SEARCH_TYPE = '_test_';
18 const SEARCH_TYPE_2 = '_test2_';
19 const SEARCH_TYPE_JPN = '_test3_';
20
21 /**
22 * Modules to enable.
23 *
24 * @var array
25 */
26 public static $modules = ['search'];
27
28 /**
29 * {@inheritdoc}
30 */
31 protected function setUp() {
32 parent::setUp();
33 $this->installSchema('search', ['search_index', 'search_dataset', 'search_total']);
34 $this->installConfig(['search']);
35 }
36
37 /**
38 * Test search indexing.
39 */
40 public function testMatching() {
41 $this->_setup();
42 $this->_testQueries();
43 }
44
45 /**
46 * Set up a small index of items to test against.
47 */
48 public function _setup() {
49 $this->config('search.settings')->set('index.minimum_word_size', 3)->save();
50
51 for ($i = 1; $i <= 7; ++$i) {
52 search_index(static::SEARCH_TYPE, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText($i));
53 }
54 for ($i = 1; $i <= 5; ++$i) {
55 search_index(static::SEARCH_TYPE_2, $i + 7, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText2($i));
56 }
57 // No getText builder function for Japanese text; just a simple array.
58 foreach ([
59 13 => '以呂波耳・ほへとち。リヌルヲ。',
60 14 => 'ドルーパルが大好きよ!',
61 15 => 'コーヒーとケーキ',
62 ] as $i => $jpn) {
63 search_index(static::SEARCH_TYPE_JPN, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $jpn);
64 }
65 search_update_totals();
66 }
67
68 /**
69 * _test_: Helper method for generating snippets of content.
70 *
71 * Generated items to test against:
72 * 1 ipsum
73 * 2 dolore sit
74 * 3 sit am ut
75 * 4 am ut enim am
76 * 5 ut enim am minim veniam
77 * 6 enim am minim veniam es cillum
78 * 7 am minim veniam es cillum dolore eu
79 */
80 public function getText($n) {
81 $words = explode(' ', "Ipsum dolore sit am. Ut enim am minim veniam. Es cillum dolore eu.");
82 return implode(' ', array_slice($words, $n - 1, $n));
83 }
84
85 /**
86 * _test2_: Helper method for generating snippets of content.
87 *
88 * Generated items to test against:
89 * 8 dear
90 * 9 king philip
91 * 10 philip came over
92 * 11 came over from germany
93 * 12 over from germany swimming
94 */
95 public function getText2($n) {
96 $words = explode(' ', "Dear King Philip came over from Germany swimming.");
97 return implode(' ', array_slice($words, $n - 1, $n));
98 }
99
100 /**
101 * Run predefine queries looking for indexed terms.
102 */
103 public function _testQueries() {
104 // Note: OR queries that include short words in OR groups are only accepted
105 // if the ORed terms are ANDed with at least one long word in the rest of
106 // the query. Examples:
107 // enim dolore OR ut = enim (dolore OR ut) = (enim dolor) OR (enim ut)
108 // is good, and
109 // dolore OR ut = (dolore) OR (ut)
110 // is bad. This is a design limitation to avoid full table scans.
111 $queries = [
112 // Simple AND queries.
113 'ipsum' => [1],
114 'enim' => [4, 5, 6],
115 'xxxxx' => [],
116 'enim minim' => [5, 6],
117 'enim xxxxx' => [],
118 'dolore eu' => [7],
119 'dolore xx' => [],
120 'ut minim' => [5],
121 'xx minim' => [],
122 'enim veniam am minim ut' => [5],
123 // Simple OR and AND/OR queries.
124 'dolore OR ipsum' => [1, 2, 7],
125 'dolore OR xxxxx' => [2, 7],
126 'dolore OR ipsum OR enim' => [1, 2, 4, 5, 6, 7],
127 'ipsum OR dolore sit OR cillum' => [2, 7],
128 'minim dolore OR ipsum' => [7],
129 'dolore OR ipsum veniam' => [7],
130 'minim dolore OR ipsum OR enim' => [5, 6, 7],
131 'dolore xx OR yy' => [],
132 'xxxxx dolore OR ipsum' => [],
133 // Sequence of OR queries.
134 'minim' => [5, 6, 7],
135 'minim OR xxxx' => [5, 6, 7],
136 'minim OR xxxx OR minim' => [5, 6, 7],
137 'minim OR xxxx minim' => [5, 6, 7],
138 'minim OR xxxx minim OR yyyy' => [5, 6, 7],
139 'minim OR xxxx minim OR cillum' => [6, 7, 5],
140 'minim OR xxxx minim OR xxxx' => [5, 6, 7],
141 // Negative queries.
142 'dolore -sit' => [7],
143 'dolore -eu' => [2],
144 'dolore -xxxxx' => [2, 7],
145 'dolore -xx' => [2, 7],
146 // Phrase queries.
147 '"dolore sit"' => [2],
148 '"sit dolore"' => [],
149 '"am minim veniam es"' => [6, 7],
150 '"minim am veniam es"' => [],
151 // Mixed queries.
152 '"am minim veniam es" OR dolore' => [2, 6, 7],
153 '"minim am veniam es" OR "dolore sit"' => [2],
154 '"minim am veniam es" OR "sit dolore"' => [],
155 '"am minim veniam es" -eu' => [6],
156 '"am minim veniam" -"cillum dolore"' => [5, 6],
157 '"am minim veniam" -"dolore cillum"' => [5, 6, 7],
158 'xxxxx "minim am veniam es" OR dolore' => [],
159 'xx "minim am veniam es" OR dolore' => []
160 ];
161 foreach ($queries as $query => $results) {
162 $result = db_select('search_index', 'i')
163 ->extend('Drupal\search\SearchQuery')
164 ->searchExpression($query, static::SEARCH_TYPE)
165 ->execute();
166
167 $set = $result ? $result->fetchAll() : [];
168 $this->_testQueryMatching($query, $set, $results);
169 $this->_testQueryScores($query, $set, $results);
170 }
171
172 // These queries are run against the second index type, SEARCH_TYPE_2.
173 $queries = [
174 // Simple AND queries.
175 'ipsum' => [],
176 'enim' => [],
177 'enim minim' => [],
178 'dear' => [8],
179 'germany' => [11, 12],
180 ];
181 foreach ($queries as $query => $results) {
182 $result = db_select('search_index', 'i')
183 ->extend('Drupal\search\SearchQuery')
184 ->searchExpression($query, static::SEARCH_TYPE_2)
185 ->execute();
186
187 $set = $result ? $result->fetchAll() : [];
188 $this->_testQueryMatching($query, $set, $results);
189 $this->_testQueryScores($query, $set, $results);
190 }
191
192 // These queries are run against the third index type, SEARCH_TYPE_JPN.
193 $queries = [
194 // Simple AND queries.
195 '呂波耳' => [13],
196 '以呂波耳' => [13],
197 'ほへと ヌルヲ' => [13],
198 'とちリ' => [],
199 'ドルーパル' => [14],
200 'パルが大' => [14],
201 'コーヒー' => [15],
202 'ヒーキ' => [],
203 ];
204 foreach ($queries as $query => $results) {
205 $result = db_select('search_index', 'i')
206 ->extend('Drupal\search\SearchQuery')
207 ->searchExpression($query, static::SEARCH_TYPE_JPN)
208 ->execute();
209
210 $set = $result ? $result->fetchAll() : [];
211 $this->_testQueryMatching($query, $set, $results);
212 $this->_testQueryScores($query, $set, $results);
213 }
214 }
215
216 /**
217 * Test the matching abilities of the engine.
218 *
219 * Verify if a query produces the correct results.
220 */
221 public function _testQueryMatching($query, $set, $results) {
222 // Get result IDs.
223 $found = [];
224 foreach ($set as $item) {
225 $found[] = $item->sid;
226 }
227
228 // Compare $results and $found.
229 sort($found);
230 sort($results);
231 $this->assertEqual($found, $results, "Query matching '$query'");
232 }
233
234 /**
235 * Test the scoring abilities of the engine.
236 *
237 * Verify if a query produces normalized, monotonous scores.
238 */
239 public function _testQueryScores($query, $set, $results) {
240 // Get result scores.
241 $scores = [];
242 foreach ($set as $item) {
243 $scores[] = $item->calculated_score;
244 }
245
246 // Check order.
247 $sorted = $scores;
248 sort($sorted);
249 $this->assertEqual($scores, array_reverse($sorted), "Query order '$query'");
250
251 // Check range.
252 $this->assertEqual(!count($scores) || (min($scores) > 0.0 && max($scores) <= 1.0001), TRUE, "Query scoring '$query'");
253 }
254
255 }