comparison forum/Sources/SearchAPI-Fulltext.php @ 76:e3e11437ecea website

Add forum code
author Chris Cannam
date Sun, 07 Jul 2013 11:25:48 +0200
parents
children
comparison
equal deleted inserted replaced
75:72f59aa7e503 76:e3e11437ecea
1 <?php
2
3 /**
4 * Simple Machines Forum (SMF)
5 *
6 * @package SMF
7 * @author Simple Machines http://www.simplemachines.org
8 * @copyright 2011 Simple Machines
9 * @license http://www.simplemachines.org/about/smf/license.php BSD
10 *
11 * @version 2.0
12 */
13
14 if (!defined('SMF'))
15 die('Hacking attempt...');
16
17 /*
18 int searchSort(string $wordA, string $wordB)
19 - callback function for usort used to sort the fulltext results.
20 - the order of sorting is: large words, small words, large words that
21 are excluded from the search, small words that are excluded.
22 */
23
24 class fulltext_search
25 {
26 // This is the last version of SMF that this was tested on, to protect against API changes.
27 public $version_compatible = 'SMF 2.0';
28 // This won't work with versions of SMF less than this.
29 public $min_smf_version = 'SMF 2.0 Beta 2';
30 // Is it supported?
31 public $is_supported = true;
32
33 // Can we do a boolean search - tested on construct.
34 protected $canDoBooleanSearch = false;
35 // What words are banned?
36 protected $bannedWords = array();
37 // What is the minimum word length?
38 protected $min_word_length = 4;
39 // What databases support the fulltext index?
40 protected $supported_databases = array('mysql');
41
42 public function __construct()
43 {
44 global $smcFunc, $db_connection, $modSettings, $db_type;
45
46 // Is this database supported?
47 if (!in_array($db_type, $this->supported_databases))
48 {
49 $this->is_supported = false;
50 return;
51 }
52
53 // Some MySQL versions are superior to others :P.
54 $this->canDoBooleanSearch = version_compare($smcFunc['db_server_info']($db_connection), '4.0.1', '>=');
55
56 $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']);
57 $this->min_word_length = $this->_getMinWordLength();
58 }
59
60 // Check whether the method can be performed by this API.
61 public function supportsMethod($methodName, $query_params = null)
62 {
63 switch ($methodName)
64 {
65 case 'searchSort':
66 case 'prepareIndexes':
67 case 'indexedWordQuery':
68 return true;
69 break;
70
71 default:
72 return false;
73 break;
74 }
75 }
76
77 // What is the minimum word length full text supports?
78 protected function _getMinWordLength()
79 {
80 global $smcFunc;
81
82 // Try to determine the minimum number of letters for a fulltext search.
83 $request = $smcFunc['db_search_query']('max_fulltext_length', '
84 SHOW VARIABLES
85 LIKE {string:fulltext_minimum_word_length}',
86 array(
87 'fulltext_minimum_word_length' => 'ft_min_word_len',
88 )
89 );
90 if ($request !== false && $smcFunc['db_num_rows']($request) == 1)
91 {
92 list (, $min_word_length) = $smcFunc['db_fetch_row']($request);
93 $smcFunc['db_free_result']($request);
94 }
95 // 4 is the MySQL default...
96 else
97 $min_word_length = 4;
98
99 return $min_word_length;
100 }
101
102 // This function compares the length of two strings plus a little.
103 public function searchSort($a, $b)
104 {
105 global $modSettings, $excludedWords;
106
107 $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
108 $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
109
110 return $x < $y ? 1 : ($x > $y ? -1 : 0);
111 }
112
113 // Do we have to do some work with the words we are searching for to prepare them?
114 public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
115 {
116 global $modSettings;
117
118 $subwords = text2words($word, null, false);
119
120 if (!$this->canDoBooleanSearch && count($subwords) > 1 && empty($modSettings['search_force_index']))
121 $wordsSearch['words'][] = $word;
122
123 if ($this->canDoBooleanSearch)
124 {
125 $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
126 $wordsSearch['indexed_words'][] = $fulltextWord;
127 if ($isExcluded)
128 $wordsExclude[] = $fulltextWord;
129 }
130 // Excluded phrases don't benefit from being split into subwords.
131 elseif (count($subwords) > 1 && $isExcluded)
132 return;
133 else
134 {
135 $relyOnIndex = true;
136 foreach ($subwords as $subword)
137 {
138 if (($smcFunc['strlen']($subword) >= $this->min_word_length) && !in_array($subword, $this->bannedWords))
139 {
140 $wordsSearch['indexed_words'][] = $subword;
141 if ($isExcluded)
142 $wordsExclude[] = $subword;
143 }
144 elseif (!in_array($subword, $this->bannedWords))
145 $relyOnIndex = false;
146 }
147
148 if ($this->canDoBooleanSearch && !$relyOnIndex && empty($modSettings['search_force_index']))
149 $wordsSearch['words'][] = $word;
150 }
151 }
152
153 // Search for indexed words.
154 public function indexedWordQuery($words, $search_data)
155 {
156 global $modSettings, $smcFunc;
157
158 $query_select = array(
159 'id_msg' => 'm.id_msg',
160 );
161 $query_where = array();
162 $query_params = $search_data['params'];
163
164 if ($query_params['id_search'])
165 $query_select['id_search'] = '{int:id_search}';
166
167 $count = 0;
168 if (empty($modSettings['search_simple_fulltext']))
169 foreach ($words['words'] as $regularWord)
170 {
171 $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}';
172 $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
173 }
174
175 if ($query_params['user_query'])
176 $query_where[] = '{raw:user_query}';
177 if ($query_params['board_query'])
178 $query_where[] = 'm.id_board {raw:board_query}';
179
180 if ($query_params['topic'])
181 $query_where[] = 'm.id_topic = {int:topic}';
182 if ($query_params['min_msg_id'])
183 $query_where[] = 'm.id_msg >= {int:min_msg_id}';
184 if ($query_params['max_msg_id'])
185 $query_where[] = 'm.id_msg <= {int:max_msg_id}';
186
187 $count = 0;
188 if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
189 foreach ($query_params['excluded_phrases'] as $phrase)
190 {
191 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}';
192 $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
193 }
194 $count = 0;
195 if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
196 foreach ($query_params['excluded_subject_words'] as $excludedWord)
197 {
198 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}';
199 $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
200 }
201
202 if (!empty($modSettings['search_simple_fulltext']))
203 {
204 $query_where[] = 'MATCH (body) AGAINST ({string:body_match})';
205 $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words']));
206 }
207 elseif ($this->canDoBooleanSearch)
208 {
209 $query_params['boolean_match'] = '';
210 foreach ($words['indexed_words'] as $fulltextWord)
211 $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' ';
212 $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1);
213
214 $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)';
215 }
216 else
217 {
218 $count = 0;
219 foreach ($words['indexed_words'] as $fulltextWord)
220 {
221 $query_where[] = (in_array($fulltextWord, $query_params['excluded_index_words']) ? 'NOT ' : '') . 'MATCH (body) AGAINST ({string:fulltext_match_' . $count . '})';
222 $query_params['fulltext_match_' . $count++] = $fulltextWord;
223 }
224 }
225
226 $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( '
227 INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
228 (' . implode(', ', array_keys($query_select)) . ')') : '') . '
229 SELECT ' . implode(', ', $query_select) . '
230 FROM {db_prefix}messages AS m
231 WHERE ' . implode('
232 AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
233 LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
234 $query_params
235 );
236
237 return $ignoreRequest;
238 }
239 }
240
241 ?>