annotate core/modules/search/tests/src/Kernel/SearchMatchTest.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents af1871eacc83
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 namespace Drupal\Tests\search\Kernel;
Chris@0 4
Chris@18 5 use Drupal\Core\Database\Database;
Chris@0 6 use Drupal\Core\Language\LanguageInterface;
Chris@0 7 use Drupal\KernelTests\KernelTestBase;
Chris@0 8
Chris@0 9 /**
Chris@0 10 * Indexes content and queries it.
Chris@0 11 *
Chris@0 12 * @group search
Chris@0 13 */
Chris@0 14 class SearchMatchTest extends KernelTestBase {
Chris@0 15
Chris@0 16 // The search index can contain different types of content. Typically the type
Chris@0 17 // is 'node'. Here we test with _test_ and _test2_ as the type.
Chris@0 18 const SEARCH_TYPE = '_test_';
Chris@0 19 const SEARCH_TYPE_2 = '_test2_';
Chris@0 20 const SEARCH_TYPE_JPN = '_test3_';
Chris@0 21
Chris@0 22 /**
Chris@0 23 * Modules to enable.
Chris@0 24 *
Chris@0 25 * @var array
Chris@0 26 */
Chris@0 27 public static $modules = ['search'];
Chris@0 28
Chris@0 29 /**
Chris@0 30 * {@inheritdoc}
Chris@0 31 */
Chris@0 32 protected function setUp() {
Chris@0 33 parent::setUp();
Chris@0 34 $this->installSchema('search', ['search_index', 'search_dataset', 'search_total']);
Chris@0 35 $this->installConfig(['search']);
Chris@0 36 }
Chris@0 37
Chris@0 38 /**
Chris@0 39 * Test search indexing.
Chris@0 40 */
Chris@0 41 public function testMatching() {
Chris@0 42 $this->_setup();
Chris@0 43 $this->_testQueries();
Chris@0 44 }
Chris@0 45
Chris@0 46 /**
Chris@0 47 * Set up a small index of items to test against.
Chris@0 48 */
Chris@0 49 public function _setup() {
Chris@0 50 $this->config('search.settings')->set('index.minimum_word_size', 3)->save();
Chris@0 51
Chris@0 52 for ($i = 1; $i <= 7; ++$i) {
Chris@0 53 search_index(static::SEARCH_TYPE, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText($i));
Chris@0 54 }
Chris@0 55 for ($i = 1; $i <= 5; ++$i) {
Chris@0 56 search_index(static::SEARCH_TYPE_2, $i + 7, LanguageInterface::LANGCODE_NOT_SPECIFIED, $this->getText2($i));
Chris@0 57 }
Chris@0 58 // No getText builder function for Japanese text; just a simple array.
Chris@0 59 foreach ([
Chris@0 60 13 => '以呂波耳・ほへとち。リヌルヲ。',
Chris@0 61 14 => 'ドルーパルが大好きよ!',
Chris@0 62 15 => 'コーヒーとケーキ',
Chris@0 63 ] as $i => $jpn) {
Chris@0 64 search_index(static::SEARCH_TYPE_JPN, $i, LanguageInterface::LANGCODE_NOT_SPECIFIED, $jpn);
Chris@0 65 }
Chris@0 66 search_update_totals();
Chris@0 67 }
Chris@0 68
Chris@0 69 /**
Chris@0 70 * _test_: Helper method for generating snippets of content.
Chris@0 71 *
Chris@0 72 * Generated items to test against:
Chris@0 73 * 1 ipsum
Chris@0 74 * 2 dolore sit
Chris@0 75 * 3 sit am ut
Chris@0 76 * 4 am ut enim am
Chris@0 77 * 5 ut enim am minim veniam
Chris@0 78 * 6 enim am minim veniam es cillum
Chris@0 79 * 7 am minim veniam es cillum dolore eu
Chris@0 80 */
Chris@0 81 public function getText($n) {
Chris@0 82 $words = explode(' ', "Ipsum dolore sit am. Ut enim am minim veniam. Es cillum dolore eu.");
Chris@0 83 return implode(' ', array_slice($words, $n - 1, $n));
Chris@0 84 }
Chris@0 85
Chris@0 86 /**
Chris@0 87 * _test2_: Helper method for generating snippets of content.
Chris@0 88 *
Chris@0 89 * Generated items to test against:
Chris@0 90 * 8 dear
Chris@0 91 * 9 king philip
Chris@0 92 * 10 philip came over
Chris@0 93 * 11 came over from germany
Chris@0 94 * 12 over from germany swimming
Chris@0 95 */
Chris@0 96 public function getText2($n) {
Chris@0 97 $words = explode(' ', "Dear King Philip came over from Germany swimming.");
Chris@0 98 return implode(' ', array_slice($words, $n - 1, $n));
Chris@0 99 }
Chris@0 100
Chris@0 101 /**
Chris@0 102 * Run predefine queries looking for indexed terms.
Chris@0 103 */
Chris@0 104 public function _testQueries() {
Chris@0 105 // Note: OR queries that include short words in OR groups are only accepted
Chris@0 106 // if the ORed terms are ANDed with at least one long word in the rest of
Chris@0 107 // the query. Examples:
Chris@0 108 // enim dolore OR ut = enim (dolore OR ut) = (enim dolor) OR (enim ut)
Chris@0 109 // is good, and
Chris@0 110 // dolore OR ut = (dolore) OR (ut)
Chris@0 111 // is bad. This is a design limitation to avoid full table scans.
Chris@0 112 $queries = [
Chris@0 113 // Simple AND queries.
Chris@0 114 'ipsum' => [1],
Chris@0 115 'enim' => [4, 5, 6],
Chris@0 116 'xxxxx' => [],
Chris@0 117 'enim minim' => [5, 6],
Chris@0 118 'enim xxxxx' => [],
Chris@0 119 'dolore eu' => [7],
Chris@0 120 'dolore xx' => [],
Chris@0 121 'ut minim' => [5],
Chris@0 122 'xx minim' => [],
Chris@0 123 'enim veniam am minim ut' => [5],
Chris@0 124 // Simple OR and AND/OR queries.
Chris@0 125 'dolore OR ipsum' => [1, 2, 7],
Chris@0 126 'dolore OR xxxxx' => [2, 7],
Chris@0 127 'dolore OR ipsum OR enim' => [1, 2, 4, 5, 6, 7],
Chris@0 128 'ipsum OR dolore sit OR cillum' => [2, 7],
Chris@0 129 'minim dolore OR ipsum' => [7],
Chris@0 130 'dolore OR ipsum veniam' => [7],
Chris@0 131 'minim dolore OR ipsum OR enim' => [5, 6, 7],
Chris@0 132 'dolore xx OR yy' => [],
Chris@0 133 'xxxxx dolore OR ipsum' => [],
Chris@0 134 // Sequence of OR queries.
Chris@0 135 'minim' => [5, 6, 7],
Chris@0 136 'minim OR xxxx' => [5, 6, 7],
Chris@0 137 'minim OR xxxx OR minim' => [5, 6, 7],
Chris@0 138 'minim OR xxxx minim' => [5, 6, 7],
Chris@0 139 'minim OR xxxx minim OR yyyy' => [5, 6, 7],
Chris@0 140 'minim OR xxxx minim OR cillum' => [6, 7, 5],
Chris@0 141 'minim OR xxxx minim OR xxxx' => [5, 6, 7],
Chris@0 142 // Negative queries.
Chris@0 143 'dolore -sit' => [7],
Chris@0 144 'dolore -eu' => [2],
Chris@0 145 'dolore -xxxxx' => [2, 7],
Chris@0 146 'dolore -xx' => [2, 7],
Chris@0 147 // Phrase queries.
Chris@0 148 '"dolore sit"' => [2],
Chris@0 149 '"sit dolore"' => [],
Chris@0 150 '"am minim veniam es"' => [6, 7],
Chris@0 151 '"minim am veniam es"' => [],
Chris@0 152 // Mixed queries.
Chris@0 153 '"am minim veniam es" OR dolore' => [2, 6, 7],
Chris@0 154 '"minim am veniam es" OR "dolore sit"' => [2],
Chris@0 155 '"minim am veniam es" OR "sit dolore"' => [],
Chris@0 156 '"am minim veniam es" -eu' => [6],
Chris@0 157 '"am minim veniam" -"cillum dolore"' => [5, 6],
Chris@0 158 '"am minim veniam" -"dolore cillum"' => [5, 6, 7],
Chris@0 159 'xxxxx "minim am veniam es" OR dolore' => [],
Chris@17 160 'xx "minim am veniam es" OR dolore' => [],
Chris@0 161 ];
Chris@18 162 $connection = Database::getConnection();
Chris@0 163 foreach ($queries as $query => $results) {
Chris@18 164 $result = $connection->select('search_index', 'i')
Chris@0 165 ->extend('Drupal\search\SearchQuery')
Chris@0 166 ->searchExpression($query, static::SEARCH_TYPE)
Chris@0 167 ->execute();
Chris@0 168
Chris@0 169 $set = $result ? $result->fetchAll() : [];
Chris@0 170 $this->_testQueryMatching($query, $set, $results);
Chris@0 171 $this->_testQueryScores($query, $set, $results);
Chris@0 172 }
Chris@0 173
Chris@0 174 // These queries are run against the second index type, SEARCH_TYPE_2.
Chris@0 175 $queries = [
Chris@0 176 // Simple AND queries.
Chris@0 177 'ipsum' => [],
Chris@0 178 'enim' => [],
Chris@0 179 'enim minim' => [],
Chris@0 180 'dear' => [8],
Chris@0 181 'germany' => [11, 12],
Chris@0 182 ];
Chris@0 183 foreach ($queries as $query => $results) {
Chris@18 184 $result = $connection->select('search_index', 'i')
Chris@0 185 ->extend('Drupal\search\SearchQuery')
Chris@0 186 ->searchExpression($query, static::SEARCH_TYPE_2)
Chris@0 187 ->execute();
Chris@0 188
Chris@0 189 $set = $result ? $result->fetchAll() : [];
Chris@0 190 $this->_testQueryMatching($query, $set, $results);
Chris@0 191 $this->_testQueryScores($query, $set, $results);
Chris@0 192 }
Chris@0 193
Chris@0 194 // These queries are run against the third index type, SEARCH_TYPE_JPN.
Chris@0 195 $queries = [
Chris@0 196 // Simple AND queries.
Chris@0 197 '呂波耳' => [13],
Chris@0 198 '以呂波耳' => [13],
Chris@0 199 'ほへと ヌルヲ' => [13],
Chris@0 200 'とちリ' => [],
Chris@0 201 'ドルーパル' => [14],
Chris@0 202 'パルが大' => [14],
Chris@0 203 'コーヒー' => [15],
Chris@0 204 'ヒーキ' => [],
Chris@0 205 ];
Chris@0 206 foreach ($queries as $query => $results) {
Chris@18 207 $result = $connection->select('search_index', 'i')
Chris@0 208 ->extend('Drupal\search\SearchQuery')
Chris@0 209 ->searchExpression($query, static::SEARCH_TYPE_JPN)
Chris@0 210 ->execute();
Chris@0 211
Chris@0 212 $set = $result ? $result->fetchAll() : [];
Chris@0 213 $this->_testQueryMatching($query, $set, $results);
Chris@0 214 $this->_testQueryScores($query, $set, $results);
Chris@0 215 }
Chris@0 216 }
Chris@0 217
Chris@0 218 /**
Chris@0 219 * Test the matching abilities of the engine.
Chris@0 220 *
Chris@0 221 * Verify if a query produces the correct results.
Chris@0 222 */
Chris@0 223 public function _testQueryMatching($query, $set, $results) {
Chris@0 224 // Get result IDs.
Chris@0 225 $found = [];
Chris@0 226 foreach ($set as $item) {
Chris@0 227 $found[] = $item->sid;
Chris@0 228 }
Chris@0 229
Chris@0 230 // Compare $results and $found.
Chris@0 231 sort($found);
Chris@0 232 sort($results);
Chris@0 233 $this->assertEqual($found, $results, "Query matching '$query'");
Chris@0 234 }
Chris@0 235
Chris@0 236 /**
Chris@0 237 * Test the scoring abilities of the engine.
Chris@0 238 *
Chris@0 239 * Verify if a query produces normalized, monotonous scores.
Chris@0 240 */
Chris@0 241 public function _testQueryScores($query, $set, $results) {
Chris@0 242 // Get result scores.
Chris@0 243 $scores = [];
Chris@0 244 foreach ($set as $item) {
Chris@0 245 $scores[] = $item->calculated_score;
Chris@0 246 }
Chris@0 247
Chris@0 248 // Check order.
Chris@0 249 $sorted = $scores;
Chris@0 250 sort($sorted);
Chris@0 251 $this->assertEqual($scores, array_reverse($sorted), "Query order '$query'");
Chris@0 252
Chris@0 253 // Check range.
Chris@0 254 $this->assertEqual(!count($scores) || (min($scores) > 0.0 && max($scores) <= 1.0001), TRUE, "Query scoring '$query'");
Chris@0 255 }
Chris@0 256
Chris@0 257 }