Chris@0: $string) { Chris@0: if ($key % 2) { Chris@0: // Even line - should simplify down to a space. Chris@0: $simplified = search_simplify($string); Chris@0: $this->assertIdentical($simplified, ' ', "Line $key is excluded from the index"); Chris@0: } Chris@0: else { Chris@0: // Odd line, should be word characters. Chris@0: // Split this into 30-character chunks, so we don't run into limits Chris@0: // of truncation in search_simplify(). Chris@0: $start = 0; Chris@0: while ($start < Unicode::strlen($string)) { Chris@0: $newstr = Unicode::substr($string, $start, 30); Chris@0: // Special case: leading zeros are removed from numeric strings, Chris@0: // and there's one string in this file that is numbers starting with Chris@0: // zero, so prepend a 1 on that string. Chris@0: if (preg_match('/^[0-9]+$/', $newstr)) { Chris@0: $newstr = '1' . $newstr; Chris@0: } Chris@0: $strings[] = $newstr; Chris@0: $start += 30; Chris@0: } Chris@0: } Chris@0: } Chris@0: foreach ($strings as $key => $string) { Chris@0: $simplified = search_simplify($string); Chris@0: $this->assertTrue(Unicode::strlen($simplified) >= Unicode::strlen($string), "Nothing is removed from string $key."); Chris@0: } Chris@0: Chris@0: // Test the low-numbered ASCII control characters separately. They are not Chris@0: // in the text file because they are problematic for diff, especially \0. Chris@0: $string = ''; Chris@0: for ($i = 0; $i < 32; $i++) { Chris@0: $string .= chr($i); Chris@0: } Chris@0: $this->assertIdentical(' ', search_simplify($string), 'Search simplify works for ASCII control characters.'); Chris@0: } Chris@0: Chris@0: /** Chris@0: * Tests that search_simplify() does the right thing with punctuation. Chris@0: */ Chris@0: public function testSearchSimplifyPunctuation() { Chris@0: $cases = [ Chris@0: ['20.03/94-28,876', '20039428876', 'Punctuation removed from numbers'], Chris@0: ['great...drupal--module', 'great drupal module', 'Multiple dot and dashes are word boundaries'], Chris@0: ['very_great-drupal.module', 'verygreatdrupalmodule', 'Single dot, dash, underscore are removed'], Chris@0: ['regular,punctuation;word', 'regular punctuation word', 'Punctuation is a word boundary'], Chris@0: ]; Chris@0: Chris@0: foreach ($cases as $case) { Chris@0: $out = trim(search_simplify($case[0])); Chris@0: $this->assertEqual($out, $case[1], $case[2]); Chris@0: } Chris@0: } Chris@0: Chris@0: }