annotate core/modules/search/tests/src/Kernel/SearchMatchTest.php @ 9:1fc0ff908d1f

Add another data file
author Chris Cannam
date Mon, 05 Feb 2018 12:34:32 +0000
parents 4c8ae668cc8c
children 129ea1e6d783
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 namespace Drupal\Tests\search\Kernel;
Chris@0 4
Chris@0 5 use Drupal\Core\Language\LanguageInterface;
Chris@0 6 use Drupal\KernelTests\KernelTestBase;
Chris@0 7
Chris@0 8 /**
Chris@0 9 * Indexes content and queries it.
Chris@0 10 *
Chris@0 11 * @group search
Chris@0 12 */
Chris@0 13 class SearchMatchTest extends KernelTestBase {
Chris@0 14
Chris@0 15 // The search index can contain different types of content. Typically the type
Chris@0 16 // is 'node'. Here we test with _test_ and _test2_ as the type.
Chris@0 17 const SEARCH_TYPE = '_test_';
Chris@0 18 const SEARCH_TYPE_2 = '_test2_';
Chris@0 19 const SEARCH_TYPE_JPN = '_test3_';
Chris@0 20
Chris@0 21 /**
Chris@0 22 * Modules to enable.
Chris@0 23 *
Chris@0 24 * @var array
Chris@0 25 */
Chris@0 26 public static $modules = ['search'];
Chris@0 27
Chris@0 28 /**
Chris@0 29 * {@inheritdoc}
Chris@0 30 */
Chris@0 31 protected function setUp() {
Chris@0 32 parent::setUp();
Chris@0 33 $this->installSchema('search', ['search_index', 'search_dataset', 'search_total']);
Chris@0 34 $this->installConfig(['search']);
Chris@0 35 }
Chris@0 36
Chris@0 37 /**
Chris@0 38 * Test search indexing.
Chris@0 39 */
Chris@0 40 public function testMatching() {
Chris@0 41 $this->_setup();
Chris@0 42 $this->_testQueries();
Chris@0 43 }
Chris@0 44
Chris@0 45 /**
Chris@0 46 * Set up a small index of items to test against.
Chris@0 47 */
Chris@0 48 public function _setup() {
Chris@0 49 $this->config('search.settings')->set('index.minimum_word_size', 3)->save();
Chris@0 50
Chris@0 51 for ($i = 1; $i <= 7; ++$i) {
Chris@0 52 search_index(static::SEARCH_TYPE, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText($i));
Chris@0 53 }
Chris@0 54 for ($i = 1; $i <= 5; ++$i) {
Chris@0 55 search_index(static::SEARCH_TYPE_2, $i + 7, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText2($i));
Chris@0 56 }
Chris@0 57 // No getText builder function for Japanese text; just a simple array.
Chris@0 58 foreach ([
Chris@0 59 13 => '以呂波耳・ほへとち。リヌルヲ。',
Chris@0 60 14 => 'ドルーパルが大好きよ!',
Chris@0 61 15 => 'コーヒーとケーキ',
Chris@0 62 ] as $i => $jpn) {
Chris@0 63 search_index(static::SEARCH_TYPE_JPN, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $jpn);
Chris@0 64 }
Chris@0 65 search_update_totals();
Chris@0 66 }
Chris@0 67
Chris@0 68 /**
Chris@0 69 * _test_: Helper method for generating snippets of content.
Chris@0 70 *
Chris@0 71 * Generated items to test against:
Chris@0 72 * 1 ipsum
Chris@0 73 * 2 dolore sit
Chris@0 74 * 3 sit am ut
Chris@0 75 * 4 am ut enim am
Chris@0 76 * 5 ut enim am minim veniam
Chris@0 77 * 6 enim am minim veniam es cillum
Chris@0 78 * 7 am minim veniam es cillum dolore eu
Chris@0 79 */
Chris@0 80 public function getText($n) {
Chris@0 81 $words = explode(' ', "Ipsum dolore sit am. Ut enim am minim veniam. Es cillum dolore eu.");
Chris@0 82 return implode(' ', array_slice($words, $n - 1, $n));
Chris@0 83 }
Chris@0 84
Chris@0 85 /**
Chris@0 86 * _test2_: Helper method for generating snippets of content.
Chris@0 87 *
Chris@0 88 * Generated items to test against:
Chris@0 89 * 8 dear
Chris@0 90 * 9 king philip
Chris@0 91 * 10 philip came over
Chris@0 92 * 11 came over from germany
Chris@0 93 * 12 over from germany swimming
Chris@0 94 */
Chris@0 95 public function getText2($n) {
Chris@0 96 $words = explode(' ', "Dear King Philip came over from Germany swimming.");
Chris@0 97 return implode(' ', array_slice($words, $n - 1, $n));
Chris@0 98 }
Chris@0 99
Chris@0 100 /**
Chris@0 101 * Run predefine queries looking for indexed terms.
Chris@0 102 */
Chris@0 103 public function _testQueries() {
Chris@0 104 // Note: OR queries that include short words in OR groups are only accepted
Chris@0 105 // if the ORed terms are ANDed with at least one long word in the rest of
Chris@0 106 // the query. Examples:
Chris@0 107 // enim dolore OR ut = enim (dolore OR ut) = (enim dolor) OR (enim ut)
Chris@0 108 // is good, and
Chris@0 109 // dolore OR ut = (dolore) OR (ut)
Chris@0 110 // is bad. This is a design limitation to avoid full table scans.
Chris@0 111 $queries = [
Chris@0 112 // Simple AND queries.
Chris@0 113 'ipsum' => [1],
Chris@0 114 'enim' => [4, 5, 6],
Chris@0 115 'xxxxx' => [],
Chris@0 116 'enim minim' => [5, 6],
Chris@0 117 'enim xxxxx' => [],
Chris@0 118 'dolore eu' => [7],
Chris@0 119 'dolore xx' => [],
Chris@0 120 'ut minim' => [5],
Chris@0 121 'xx minim' => [],
Chris@0 122 'enim veniam am minim ut' => [5],
Chris@0 123 // Simple OR and AND/OR queries.
Chris@0 124 'dolore OR ipsum' => [1, 2, 7],
Chris@0 125 'dolore OR xxxxx' => [2, 7],
Chris@0 126 'dolore OR ipsum OR enim' => [1, 2, 4, 5, 6, 7],
Chris@0 127 'ipsum OR dolore sit OR cillum' => [2, 7],
Chris@0 128 'minim dolore OR ipsum' => [7],
Chris@0 129 'dolore OR ipsum veniam' => [7],
Chris@0 130 'minim dolore OR ipsum OR enim' => [5, 6, 7],
Chris@0 131 'dolore xx OR yy' => [],
Chris@0 132 'xxxxx dolore OR ipsum' => [],
Chris@0 133 // Sequence of OR queries.
Chris@0 134 'minim' => [5, 6, 7],
Chris@0 135 'minim OR xxxx' => [5, 6, 7],
Chris@0 136 'minim OR xxxx OR minim' => [5, 6, 7],
Chris@0 137 'minim OR xxxx minim' => [5, 6, 7],
Chris@0 138 'minim OR xxxx minim OR yyyy' => [5, 6, 7],
Chris@0 139 'minim OR xxxx minim OR cillum' => [6, 7, 5],
Chris@0 140 'minim OR xxxx minim OR xxxx' => [5, 6, 7],
Chris@0 141 // Negative queries.
Chris@0 142 'dolore -sit' => [7],
Chris@0 143 'dolore -eu' => [2],
Chris@0 144 'dolore -xxxxx' => [2, 7],
Chris@0 145 'dolore -xx' => [2, 7],
Chris@0 146 // Phrase queries.
Chris@0 147 '"dolore sit"' => [2],
Chris@0 148 '"sit dolore"' => [],
Chris@0 149 '"am minim veniam es"' => [6, 7],
Chris@0 150 '"minim am veniam es"' => [],
Chris@0 151 // Mixed queries.
Chris@0 152 '"am minim veniam es" OR dolore' => [2, 6, 7],
Chris@0 153 '"minim am veniam es" OR "dolore sit"' => [2],
Chris@0 154 '"minim am veniam es" OR "sit dolore"' => [],
Chris@0 155 '"am minim veniam es" -eu' => [6],
Chris@0 156 '"am minim veniam" -"cillum dolore"' => [5, 6],
Chris@0 157 '"am minim veniam" -"dolore cillum"' => [5, 6, 7],
Chris@0 158 'xxxxx "minim am veniam es" OR dolore' => [],
Chris@0 159 'xx "minim am veniam es" OR dolore' => []
Chris@0 160 ];
Chris@0 161 foreach ($queries as $query => $results) {
Chris@0 162 $result = db_select('search_index', 'i')
Chris@0 163 ->extend('Drupal\search\SearchQuery')
Chris@0 164 ->searchExpression($query, static::SEARCH_TYPE)
Chris@0 165 ->execute();
Chris@0 166
Chris@0 167 $set = $result ? $result->fetchAll() : [];
Chris@0 168 $this->_testQueryMatching($query, $set, $results);
Chris@0 169 $this->_testQueryScores($query, $set, $results);
Chris@0 170 }
Chris@0 171
Chris@0 172 // These queries are run against the second index type, SEARCH_TYPE_2.
Chris@0 173 $queries = [
Chris@0 174 // Simple AND queries.
Chris@0 175 'ipsum' => [],
Chris@0 176 'enim' => [],
Chris@0 177 'enim minim' => [],
Chris@0 178 'dear' => [8],
Chris@0 179 'germany' => [11, 12],
Chris@0 180 ];
Chris@0 181 foreach ($queries as $query => $results) {
Chris@0 182 $result = db_select('search_index', 'i')
Chris@0 183 ->extend('Drupal\search\SearchQuery')
Chris@0 184 ->searchExpression($query, static::SEARCH_TYPE_2)
Chris@0 185 ->execute();
Chris@0 186
Chris@0 187 $set = $result ? $result->fetchAll() : [];
Chris@0 188 $this->_testQueryMatching($query, $set, $results);
Chris@0 189 $this->_testQueryScores($query, $set, $results);
Chris@0 190 }
Chris@0 191
Chris@0 192 // These queries are run against the third index type, SEARCH_TYPE_JPN.
Chris@0 193 $queries = [
Chris@0 194 // Simple AND queries.
Chris@0 195 '呂波耳' => [13],
Chris@0 196 '以呂波耳' => [13],
Chris@0 197 'ほへと ヌルヲ' => [13],
Chris@0 198 'とちリ' => [],
Chris@0 199 'ドルーパル' => [14],
Chris@0 200 'パルが大' => [14],
Chris@0 201 'コーヒー' => [15],
Chris@0 202 'ヒーキ' => [],
Chris@0 203 ];
Chris@0 204 foreach ($queries as $query => $results) {
Chris@0 205 $result = db_select('search_index', 'i')
Chris@0 206 ->extend('Drupal\search\SearchQuery')
Chris@0 207 ->searchExpression($query, static::SEARCH_TYPE_JPN)
Chris@0 208 ->execute();
Chris@0 209
Chris@0 210 $set = $result ? $result->fetchAll() : [];
Chris@0 211 $this->_testQueryMatching($query, $set, $results);
Chris@0 212 $this->_testQueryScores($query, $set, $results);
Chris@0 213 }
Chris@0 214 }
Chris@0 215
Chris@0 216 /**
Chris@0 217 * Test the matching abilities of the engine.
Chris@0 218 *
Chris@0 219 * Verify if a query produces the correct results.
Chris@0 220 */
Chris@0 221 public function _testQueryMatching($query, $set, $results) {
Chris@0 222 // Get result IDs.
Chris@0 223 $found = [];
Chris@0 224 foreach ($set as $item) {
Chris@0 225 $found[] = $item->sid;
Chris@0 226 }
Chris@0 227
Chris@0 228 // Compare $results and $found.
Chris@0 229 sort($found);
Chris@0 230 sort($results);
Chris@0 231 $this->assertEqual($found, $results, "Query matching '$query'");
Chris@0 232 }
Chris@0 233
Chris@0 234 /**
Chris@0 235 * Test the scoring abilities of the engine.
Chris@0 236 *
Chris@0 237 * Verify if a query produces normalized, monotonous scores.
Chris@0 238 */
Chris@0 239 public function _testQueryScores($query, $set, $results) {
Chris@0 240 // Get result scores.
Chris@0 241 $scores = [];
Chris@0 242 foreach ($set as $item) {
Chris@0 243 $scores[] = $item->calculated_score;
Chris@0 244 }
Chris@0 245
Chris@0 246 // Check order.
Chris@0 247 $sorted = $scores;
Chris@0 248 sort($sorted);
Chris@0 249 $this->assertEqual($scores, array_reverse($sorted), "Query order '$query'");
Chris@0 250
Chris@0 251 // Check range.
Chris@0 252 $this->assertEqual(!count($scores) || (min($scores) > 0.0 && max($scores) <= 1.0001), TRUE, "Query scoring '$query'");
Chris@0 253 }
Chris@0 254
Chris@0 255 }