Mercurial > hg > vamp-website
diff forum/Sources/SearchAPI-Fulltext.php @ 76:e3e11437ecea website
Add forum code
author | Chris Cannam |
---|---|
date | Sun, 07 Jul 2013 11:25:48 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/forum/Sources/SearchAPI-Fulltext.php Sun Jul 07 11:25:48 2013 +0200 @@ -0,0 +1,241 @@ +<?php + +/** + * Simple Machines Forum (SMF) + * + * @package SMF + * @author Simple Machines http://www.simplemachines.org + * @copyright 2011 Simple Machines + * @license http://www.simplemachines.org/about/smf/license.php BSD + * + * @version 2.0 + */ + +if (!defined('SMF')) + die('Hacking attempt...'); + +/* + int searchSort(string $wordA, string $wordB) + - callback function for usort used to sort the fulltext results. + - the order of sorting is: large words, small words, large words that + are excluded from the search, small words that are excluded. +*/ + +class fulltext_search +{ + // This is the last version of SMF that this was tested on, to protect against API changes. + public $version_compatible = 'SMF 2.0'; + // This won't work with versions of SMF less than this. + public $min_smf_version = 'SMF 2.0 Beta 2'; + // Is it supported? + public $is_supported = true; + + // Can we do a boolean search - tested on construct. + protected $canDoBooleanSearch = false; + // What words are banned? + protected $bannedWords = array(); + // What is the minimum word length? + protected $min_word_length = 4; + // What databases support the fulltext index? + protected $supported_databases = array('mysql'); + + public function __construct() + { + global $smcFunc, $db_connection, $modSettings, $db_type; + + // Is this database supported? + if (!in_array($db_type, $this->supported_databases)) + { + $this->is_supported = false; + return; + } + + // Some MySQL versions are superior to others :P. + $this->canDoBooleanSearch = version_compare($smcFunc['db_server_info']($db_connection), '4.0.1', '>='); + + $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']); + $this->min_word_length = $this->_getMinWordLength(); + } + + // Check whether the method can be performed by this API. + public function supportsMethod($methodName, $query_params = null) + { + switch ($methodName) + { + case 'searchSort': + case 'prepareIndexes': + case 'indexedWordQuery': + return true; + break; + + default: + return false; + break; + } + } + + // What is the minimum word length full text supports? + protected function _getMinWordLength() + { + global $smcFunc; + + // Try to determine the minimum number of letters for a fulltext search. + $request = $smcFunc['db_search_query']('max_fulltext_length', ' + SHOW VARIABLES + LIKE {string:fulltext_minimum_word_length}', + array( + 'fulltext_minimum_word_length' => 'ft_min_word_len', + ) + ); + if ($request !== false && $smcFunc['db_num_rows']($request) == 1) + { + list (, $min_word_length) = $smcFunc['db_fetch_row']($request); + $smcFunc['db_free_result']($request); + } + // 4 is the MySQL default... + else + $min_word_length = 4; + + return $min_word_length; + } + + // This function compares the length of two strings plus a little. + public function searchSort($a, $b) + { + global $modSettings, $excludedWords; + + $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0); + $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0); + + return $x < $y ? 1 : ($x > $y ? -1 : 0); + } + + // Do we have to do some work with the words we are searching for to prepare them? + public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded) + { + global $modSettings; + + $subwords = text2words($word, null, false); + + if (!$this->canDoBooleanSearch && count($subwords) > 1 && empty($modSettings['search_force_index'])) + $wordsSearch['words'][] = $word; + + if ($this->canDoBooleanSearch) + { + $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"'; + $wordsSearch['indexed_words'][] = $fulltextWord; + if ($isExcluded) + $wordsExclude[] = $fulltextWord; + } + // Excluded phrases don't benefit from being split into subwords. + elseif (count($subwords) > 1 && $isExcluded) + return; + else + { + $relyOnIndex = true; + foreach ($subwords as $subword) + { + if (($smcFunc['strlen']($subword) >= $this->min_word_length) && !in_array($subword, $this->bannedWords)) + { + $wordsSearch['indexed_words'][] = $subword; + if ($isExcluded) + $wordsExclude[] = $subword; + } + elseif (!in_array($subword, $this->bannedWords)) + $relyOnIndex = false; + } + + if ($this->canDoBooleanSearch && !$relyOnIndex && empty($modSettings['search_force_index'])) + $wordsSearch['words'][] = $word; + } + } + + // Search for indexed words. + public function indexedWordQuery($words, $search_data) + { + global $modSettings, $smcFunc; + + $query_select = array( + 'id_msg' => 'm.id_msg', + ); + $query_where = array(); + $query_params = $search_data['params']; + + if ($query_params['id_search']) + $query_select['id_search'] = '{int:id_search}'; + + $count = 0; + if (empty($modSettings['search_simple_fulltext'])) + foreach ($words['words'] as $regularWord) + { + $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}'; + $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; + } + + if ($query_params['user_query']) + $query_where[] = '{raw:user_query}'; + if ($query_params['board_query']) + $query_where[] = 'm.id_board {raw:board_query}'; + + if ($query_params['topic']) + $query_where[] = 'm.id_topic = {int:topic}'; + if ($query_params['min_msg_id']) + $query_where[] = 'm.id_msg >= {int:min_msg_id}'; + if ($query_params['max_msg_id']) + $query_where[] = 'm.id_msg <= {int:max_msg_id}'; + + $count = 0; + if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) + foreach ($query_params['excluded_phrases'] as $phrase) + { + $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}'; + $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; + } + $count = 0; + if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) + foreach ($query_params['excluded_subject_words'] as $excludedWord) + { + $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}'; + $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; + } + + if (!empty($modSettings['search_simple_fulltext'])) + { + $query_where[] = 'MATCH (body) AGAINST ({string:body_match})'; + $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words'])); + } + elseif ($this->canDoBooleanSearch) + { + $query_params['boolean_match'] = ''; + foreach ($words['indexed_words'] as $fulltextWord) + $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' '; + $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1); + + $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)'; + } + else + { + $count = 0; + foreach ($words['indexed_words'] as $fulltextWord) + { + $query_where[] = (in_array($fulltextWord, $query_params['excluded_index_words']) ? 'NOT ' : '') . 'MATCH (body) AGAINST ({string:fulltext_match_' . $count . '})'; + $query_params['fulltext_match_' . $count++] = $fulltextWord; + } + } + + $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( ' + INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' + (' . implode(', ', array_keys($query_select)) . ')') : '') . ' + SELECT ' . implode(', ', $query_select) . ' + FROM {db_prefix}messages AS m + WHERE ' . implode(' + AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' + LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), + $query_params + ); + + return $ignoreRequest; + } +} + +?> \ No newline at end of file