annotate core/modules/search/tests/src/Kernel/SearchExcerptTest.php @ 19:fa3358dc1485 tip

Add ndrum files
author Chris Cannam
date Wed, 28 Aug 2019 13:14:47 +0100
parents 4c8ae668cc8c
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 namespace Drupal\Tests\search\Kernel;
Chris@0 4
Chris@0 5 use Drupal\KernelTests\KernelTestBase;
Chris@0 6
Chris@0 7 /**
Chris@0 8 * Tests the search_excerpt() function.
Chris@0 9 *
Chris@0 10 * @group search
Chris@0 11 */
Chris@0 12 class SearchExcerptTest extends KernelTestBase {
Chris@0 13
Chris@0 14 /**
Chris@0 15 * Modules to enable.
Chris@0 16 *
Chris@0 17 * @var array
Chris@0 18 */
Chris@0 19 public static $modules = ['search', 'search_langcode_test'];
Chris@0 20
Chris@0 21 /**
Chris@0 22 * Tests search_excerpt() with several simulated search keywords.
Chris@0 23 *
Chris@0 24 * Passes keywords and a sample marked up string, "The quick
Chris@0 25 * brown fox jumps over the lazy dog", and compares it to the
Chris@0 26 * correctly marked up string. The correctly marked up string
Chris@0 27 * contains either highlighted keywords or the original marked
Chris@0 28 * up string if no keywords matched the string.
Chris@0 29 */
Chris@0 30 public function testSearchExcerpt() {
Chris@0 31 // Make some text with entities and tags.
Chris@0 32 $text = 'The <strong>quick</strong> <a href="#">brown</a> fox &amp; jumps <h2>over</h2> the lazy dog';
Chris@0 33 $expected = 'The quick brown fox &amp; jumps over the lazy dog';
Chris@0 34 $result = $this->doSearchExcerpt('nothing', $text);
Chris@0 35 $this->assertEqual(preg_replace('| +|', ' ', $result), $expected, 'Entire string, stripped of HTML tags, is returned when keyword is not found in short string');
Chris@0 36
Chris@0 37 $result = $this->doSearchExcerpt('fox', $text);
Chris@0 38 $this->assertEqual($result, 'The quick brown <strong>fox</strong> &amp; jumps over the lazy dog', 'Found keyword is highlighted');
Chris@0 39
Chris@0 40 $expected = '<strong>The</strong> quick brown fox &amp; jumps over <strong>the</strong> lazy dog';
Chris@0 41 $result = $this->doSearchExcerpt('The', $text);
Chris@0 42 $this->assertEqual(preg_replace('| +|', ' ', $result), $expected, 'Keyword is highlighted at beginning of short string');
Chris@0 43
Chris@0 44 $expected = 'The quick brown fox &amp; jumps over the lazy <strong>dog</strong>';
Chris@0 45 $result = $this->doSearchExcerpt('dog', $text);
Chris@0 46 $this->assertEqual(preg_replace('| +|', ' ', $result), $expected, 'Keyword is highlighted at end of short string');
Chris@0 47
Chris@0 48 $longtext = str_repeat(str_replace('brown', 'silver', $text) . ' ', 10) . $text . str_repeat(' ' . str_replace('brown', 'pink', $text), 10);
Chris@0 49 $result = $this->doSearchExcerpt('brown', $longtext);
Chris@0 50 $expected = '… silver fox &amp; jumps over the lazy dog The quick <strong>brown</strong> fox &amp; jumps over the lazy dog The quick …';
Chris@0 51 $this->assertEqual($result, $expected, 'Snippet around keyword in long text is correctly capped');
Chris@0 52
Chris@0 53 $longtext = str_repeat($text . ' ', 10);
Chris@0 54 $result = $this->doSearchExcerpt('nothing', $longtext);
Chris@0 55 $expected = 'The quick brown fox &amp; jumps over the lazy dog';
Chris@0 56 $this->assertTrue(strpos($result, $expected) === 0, 'When keyword is not found in long string, return value starts as expected');
Chris@0 57
Chris@0 58 $entities = str_repeat('k&eacute;sz&iacute;t&eacute;se ', 20);
Chris@0 59 $result = $this->doSearchExcerpt('nothing', $entities);
Chris@0 60 $this->assertFalse(strpos($result, '&'), 'Entities are not present in excerpt');
Chris@0 61 $this->assertTrue(strpos($result, 'í') > 0, 'Entities are converted in excerpt');
Chris@0 62
Chris@0 63 // The node body that will produce this rendered $text is:
Chris@0 64 // 123456789 HTMLTest +123456789+&lsquo; +&lsquo; +&lsquo; +&lsquo; +12345678 &nbsp;&nbsp; +&lsquo; +&lsquo; +&lsquo; &lsquo;
Chris@0 65 $text = "<div class=\"field field--name-body field--type-text-with-summary field--label-hidden\"><div class=\"field__items\"><div class=\"field__item even\" property=\"content:encoded\"><p>123456789 HTMLTest +123456789+‘ +‘ +‘ +‘ +12345678    +‘ +‘ +‘ ‘</p>\n</div></div></div> ";
Chris@0 66 $result = $this->doSearchExcerpt('HTMLTest', $text);
Chris@0 67 $this->assertFalse(empty($result), 'Rendered Multi-byte HTML encodings are not corrupted in search excerpts');
Chris@0 68 }
Chris@0 69
Chris@0 70 /**
Chris@0 71 * Tests search_excerpt() with search keywords matching simplified words.
Chris@0 72 *
Chris@0 73 * Excerpting should handle keywords that are matched only after going through
Chris@0 74 * search_simplify(). This test passes keywords that match simplified words
Chris@0 75 * and compares them with strings that contain the original unsimplified word.
Chris@0 76 */
Chris@0 77 public function testSearchExcerptSimplified() {
Chris@0 78 $start_time = microtime(TRUE);
Chris@0 79
Chris@0 80 $lorem1 = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Etiam vitae arcu at leo cursus laoreet. Curabitur dui tortor, adipiscing malesuada tempor in, bibendum ac diam. Cras non tellus a libero pellentesque condimentum. What is a Drupalism? Suspendisse ac lacus libero. Ut non est vel nisl faucibus interdum nec sed leo. Pellentesque sem risus, vulputate eu semper eget, auctor in libero.';
Chris@0 81 $lorem2 = 'Ut fermentum est vitae metus convallis scelerisque. Phasellus pellentesque rhoncus tellus, eu dignissim purus posuere id. Quisque eu fringilla ligula. Morbi ullamcorper, lorem et mattis egestas, tortor neque pretium velit, eget eleifend odio turpis eu purus. Donec vitae metus quis leo pretium tincidunt a pulvinar sem. Morbi adipiscing laoreet mauris vel placerat. Nullam elementum, nisl sit amet scelerisque malesuada, dolor nunc hendrerit quam, eu ultrices erat est in orci.';
Chris@0 82
Chris@0 83 // Make some text with some keywords that will get simplified.
Chris@0 84 $text = $lorem1 . ' Number: 123456.7890 Hyphenated: one-two abc,def ' . $lorem2;
Chris@0 85 // Note: The search_excerpt() function adds some extra spaces -- not
Chris@0 86 // important for HTML formatting. Remove these for comparison.
Chris@0 87 $result = $this->doSearchExcerpt('123456.7890', $text);
Chris@0 88 $this->assertTrue(strpos($result, 'Number: <strong>123456.7890</strong>') !== FALSE, 'Numeric keyword is highlighted with exact match');
Chris@0 89
Chris@0 90 $result = $this->doSearchExcerpt('1234567890', $text);
Chris@0 91 $this->assertTrue(strpos($result, 'Number: <strong>123456.7890</strong>') !== FALSE, 'Numeric keyword is highlighted with simplified match');
Chris@0 92
Chris@0 93 $result = $this->doSearchExcerpt('Number 1234567890', $text);
Chris@0 94 $this->assertTrue(strpos($result, '<strong>Number</strong>: <strong>123456.7890</strong>') !== FALSE, 'Punctuated and numeric keyword is highlighted with simplified match');
Chris@0 95
Chris@0 96 $result = $this->doSearchExcerpt('"Number 1234567890"', $text);
Chris@0 97 $this->assertTrue(strpos($result, '<strong>Number: 123456.7890</strong>') !== FALSE, 'Phrase with punctuated and numeric keyword is highlighted with simplified match');
Chris@0 98
Chris@0 99 $result = $this->doSearchExcerpt('"Hyphenated onetwo"', $text);
Chris@0 100 $this->assertTrue(strpos($result, '<strong>Hyphenated: one-two</strong>') !== FALSE, 'Phrase with punctuated and hyphenated keyword is highlighted with simplified match');
Chris@0 101
Chris@0 102 $result = $this->doSearchExcerpt('"abc def"', $text);
Chris@0 103 $this->assertTrue(strpos($result, '<strong>abc,def</strong>') !== FALSE, 'Phrase with keyword simplified into two separate words is highlighted with simplified match');
Chris@0 104
Chris@0 105 // Test phrases with characters which are being truncated.
Chris@0 106 $result = $this->doSearchExcerpt('"ipsum _"', $text);
Chris@0 107 $this->assertTrue(strpos($result, '<strong>ipsum</strong>') !== FALSE, 'Only valid part of the phrase is highlighted and invalid part containing "_" is ignored.');
Chris@0 108
Chris@0 109 $result = $this->doSearchExcerpt('"ipsum 0000"', $text);
Chris@0 110 $this->assertTrue(strpos($result, '<strong>ipsum</strong>') !== FALSE, 'Only valid part of the phrase is highlighted and invalid part "0000" is ignored.');
Chris@0 111
Chris@0 112 // Test combination of the valid keyword and keyword containing only
Chris@0 113 // characters which are being truncated during simplification.
Chris@0 114 $result = $this->doSearchExcerpt('ipsum _', $text);
Chris@0 115 $this->assertTrue(strpos($result, '<strong>ipsum</strong>') !== FALSE, 'Only valid keyword is highlighted and invalid keyword "_" is ignored.');
Chris@0 116
Chris@0 117 $result = $this->doSearchExcerpt('ipsum 0000', $text);
Chris@0 118 $this->assertTrue(strpos($result, '<strong>ipsum</strong>') !== FALSE, 'Only valid keyword is highlighted and invalid keyword "0000" is ignored.');
Chris@0 119
Chris@0 120 // Test using the hook_search_preprocess() from the test module.
Chris@0 121 // The hook replaces "finding" or "finds" with "find".
Chris@0 122 // So, if we search for "find" or "finds" or "finding", we should
Chris@0 123 // highlight "finding".
Chris@0 124 $text = "this tests finding a string";
Chris@0 125 $result = $this->doSearchExcerpt('finds', $text, 'ex');
Chris@0 126 $this->assertTrue(strpos($result, '<strong>finding</strong>') !== FALSE, 'Search excerpt works with preprocess hook, search for finds');
Chris@0 127 $result = $this->doSearchExcerpt('find', $text, 'ex');
Chris@0 128 $this->assertTrue(strpos($result, '<strong>finding</strong>') !== FALSE, 'Search excerpt works with preprocess hook, search for find');
Chris@0 129
Chris@0 130 // Just to be sure, test with the replacement at the beginning and end.
Chris@0 131 $text = "finding at the beginning";
Chris@0 132 $result = $this->doSearchExcerpt('finds', $text, 'ex');
Chris@0 133 $this->assertTrue(strpos($result, '<strong>finding</strong>') !== FALSE, 'Search excerpt works with preprocess hook, text at start');
Chris@0 134
Chris@0 135 $text = "at the end finding";
Chris@0 136 $result = $this->doSearchExcerpt('finds', $text, 'ex');
Chris@0 137 $this->assertTrue(strpos($result, '<strong>finding</strong>') !== FALSE, 'Search excerpt works with preprocess hook, text at end');
Chris@0 138
Chris@0 139 // Testing with a one-to-many replacement: the test module replaces DIC
Chris@0 140 // with Dependency Injection Container.
Chris@0 141 $text = "something about the DIC is happening";
Chris@0 142 $result = $this->doSearchExcerpt('Dependency', $text, 'ex');
Chris@0 143 $this->assertTrue(strpos($result, '<strong>DIC</strong>') !== FALSE, 'Search excerpt works with preprocess hook, acronym first word');
Chris@0 144
Chris@0 145 $result = $this->doSearchExcerpt('Injection', $text, 'ex');
Chris@0 146 $this->assertTrue(strpos($result, '<strong>DIC</strong>') !== FALSE, 'Search excerpt works with preprocess hook, acronym second word');
Chris@0 147
Chris@0 148 $result = $this->doSearchExcerpt('Container', $text, 'ex');
Chris@0 149 $this->assertTrue(strpos($result, '<strong>DIC</strong>') !== FALSE, 'Search excerpt works with preprocess hook, acronym third word');
Chris@0 150
Chris@0 151 // Testing with a many-to-one replacement: the test module replaces
Chris@0 152 // hypertext markup language with HTML.
Chris@0 153 $text = "we always use hypertext markup language to describe things";
Chris@0 154 $result = $this->doSearchExcerpt('html', $text, 'ex');
Chris@0 155 $this->assertTrue(strpos($result, '<strong>hypertext markup language</strong>') !== FALSE, 'Search excerpt works with preprocess hook, acronym many to one');
Chris@0 156
Chris@0 157 // Test with accents and caps in a longer piece of text with the target
Chris@0 158 // near the end.
Chris@0 159 $text = str_repeat($lorem2, 20) . ' ' . $lorem1;
Chris@0 160 $result = $this->doSearchExcerpt('Lìbêró', $text);
Chris@0 161 $this->assertTrue(strpos($result, '<strong>libero</strong>') !== FALSE, 'Search excerpt works with caps and accents in longer text');
Chris@0 162
Chris@0 163 // Test with an acronym provided by the hook, with the target text in the
Chris@0 164 // middle of a long string.
Chris@0 165 $text = str_repeat($lorem2, 10) . ' DIC ' . str_repeat($lorem2, 10);
Chris@0 166 $result = $this->doSearchExcerpt('Dependency', $text, 'ex');
Chris@0 167 $this->assertTrue(strpos($result, '<strong>DIC</strong>') !== FALSE, 'Search excerpt works with acronym in longer text');
Chris@0 168
Chris@0 169 // Test a long string with a lot of whitespace in it.
Chris@0 170 $lorem3 = str_replace(' ', str_repeat(" \n", 20), $lorem2);
Chris@0 171 $text = str_repeat($lorem3, 20) . ' ' . $lorem1;
Chris@0 172 $result = $this->doSearchExcerpt('Lìbêró', $text);
Chris@0 173 $this->assertTrue(strpos($result, '<strong>libero</strong>') !== FALSE, 'Search excerpt works with caps and accents in longer text with whitespace');
Chris@0 174
Chris@0 175 $this->verbose('Elapsed time: ' . (microtime(TRUE) - $start_time));
Chris@0 176 }
Chris@0 177
Chris@0 178 /**
Chris@0 179 * Calls search_excerpt() and renders output.
Chris@0 180 *
Chris@0 181 * @param string $keys
Chris@0 182 * A string containing a search query.
Chris@0 183 * @param string $render_array
Chris@0 184 * The text to extract fragments from.
Chris@0 185 * @param string|null $langcode
Chris@0 186 * Language code for the language of $text, if known.
Chris@0 187 *
Chris@0 188 * @return string
Chris@0 189 * A string containing HTML for the excerpt.
Chris@0 190 */
Chris@0 191 protected function doSearchExcerpt($keys, $render_array, $langcode = NULL) {
Chris@0 192 $render_array = search_excerpt($keys, $render_array, $langcode);
Chris@0 193 $text = \Drupal::service('renderer')->renderPlain($render_array);
Chris@0 194 // The search_excerpt() function adds some extra spaces -- not
Chris@0 195 // important for HTML formatting or this test. Remove these for comparison.
Chris@0 196 return preg_replace('| +|', ' ', $text);
Chris@0 197 }
Chris@0 198
Chris@0 199 }