Mercurial > hg > vamp-website
comparison forum/Sources/SearchAPI-Fulltext.php @ 76:e3e11437ecea website
Add forum code
author | Chris Cannam |
---|---|
date | Sun, 07 Jul 2013 11:25:48 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
75:72f59aa7e503 | 76:e3e11437ecea |
---|---|
1 <?php | |
2 | |
3 /** | |
4 * Simple Machines Forum (SMF) | |
5 * | |
6 * @package SMF | |
7 * @author Simple Machines http://www.simplemachines.org | |
8 * @copyright 2011 Simple Machines | |
9 * @license http://www.simplemachines.org/about/smf/license.php BSD | |
10 * | |
11 * @version 2.0 | |
12 */ | |
13 | |
14 if (!defined('SMF')) | |
15 die('Hacking attempt...'); | |
16 | |
17 /* | |
18 int searchSort(string $wordA, string $wordB) | |
19 - callback function for usort used to sort the fulltext results. | |
20 - the order of sorting is: large words, small words, large words that | |
21 are excluded from the search, small words that are excluded. | |
22 */ | |
23 | |
24 class fulltext_search | |
25 { | |
26 // This is the last version of SMF that this was tested on, to protect against API changes. | |
27 public $version_compatible = 'SMF 2.0'; | |
28 // This won't work with versions of SMF less than this. | |
29 public $min_smf_version = 'SMF 2.0 Beta 2'; | |
30 // Is it supported? | |
31 public $is_supported = true; | |
32 | |
33 // Can we do a boolean search - tested on construct. | |
34 protected $canDoBooleanSearch = false; | |
35 // What words are banned? | |
36 protected $bannedWords = array(); | |
37 // What is the minimum word length? | |
38 protected $min_word_length = 4; | |
39 // What databases support the fulltext index? | |
40 protected $supported_databases = array('mysql'); | |
41 | |
42 public function __construct() | |
43 { | |
44 global $smcFunc, $db_connection, $modSettings, $db_type; | |
45 | |
46 // Is this database supported? | |
47 if (!in_array($db_type, $this->supported_databases)) | |
48 { | |
49 $this->is_supported = false; | |
50 return; | |
51 } | |
52 | |
53 // Some MySQL versions are superior to others :P. | |
54 $this->canDoBooleanSearch = version_compare($smcFunc['db_server_info']($db_connection), '4.0.1', '>='); | |
55 | |
56 $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']); | |
57 $this->min_word_length = $this->_getMinWordLength(); | |
58 } | |
59 | |
60 // Check whether the method can be performed by this API. | |
61 public function supportsMethod($methodName, $query_params = null) | |
62 { | |
63 switch ($methodName) | |
64 { | |
65 case 'searchSort': | |
66 case 'prepareIndexes': | |
67 case 'indexedWordQuery': | |
68 return true; | |
69 break; | |
70 | |
71 default: | |
72 return false; | |
73 break; | |
74 } | |
75 } | |
76 | |
77 // What is the minimum word length full text supports? | |
78 protected function _getMinWordLength() | |
79 { | |
80 global $smcFunc; | |
81 | |
82 // Try to determine the minimum number of letters for a fulltext search. | |
83 $request = $smcFunc['db_search_query']('max_fulltext_length', ' | |
84 SHOW VARIABLES | |
85 LIKE {string:fulltext_minimum_word_length}', | |
86 array( | |
87 'fulltext_minimum_word_length' => 'ft_min_word_len', | |
88 ) | |
89 ); | |
90 if ($request !== false && $smcFunc['db_num_rows']($request) == 1) | |
91 { | |
92 list (, $min_word_length) = $smcFunc['db_fetch_row']($request); | |
93 $smcFunc['db_free_result']($request); | |
94 } | |
95 // 4 is the MySQL default... | |
96 else | |
97 $min_word_length = 4; | |
98 | |
99 return $min_word_length; | |
100 } | |
101 | |
102 // This function compares the length of two strings plus a little. | |
103 public function searchSort($a, $b) | |
104 { | |
105 global $modSettings, $excludedWords; | |
106 | |
107 $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0); | |
108 $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0); | |
109 | |
110 return $x < $y ? 1 : ($x > $y ? -1 : 0); | |
111 } | |
112 | |
113 // Do we have to do some work with the words we are searching for to prepare them? | |
114 public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded) | |
115 { | |
116 global $modSettings; | |
117 | |
118 $subwords = text2words($word, null, false); | |
119 | |
120 if (!$this->canDoBooleanSearch && count($subwords) > 1 && empty($modSettings['search_force_index'])) | |
121 $wordsSearch['words'][] = $word; | |
122 | |
123 if ($this->canDoBooleanSearch) | |
124 { | |
125 $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"'; | |
126 $wordsSearch['indexed_words'][] = $fulltextWord; | |
127 if ($isExcluded) | |
128 $wordsExclude[] = $fulltextWord; | |
129 } | |
130 // Excluded phrases don't benefit from being split into subwords. | |
131 elseif (count($subwords) > 1 && $isExcluded) | |
132 return; | |
133 else | |
134 { | |
135 $relyOnIndex = true; | |
136 foreach ($subwords as $subword) | |
137 { | |
138 if (($smcFunc['strlen']($subword) >= $this->min_word_length) && !in_array($subword, $this->bannedWords)) | |
139 { | |
140 $wordsSearch['indexed_words'][] = $subword; | |
141 if ($isExcluded) | |
142 $wordsExclude[] = $subword; | |
143 } | |
144 elseif (!in_array($subword, $this->bannedWords)) | |
145 $relyOnIndex = false; | |
146 } | |
147 | |
148 if ($this->canDoBooleanSearch && !$relyOnIndex && empty($modSettings['search_force_index'])) | |
149 $wordsSearch['words'][] = $word; | |
150 } | |
151 } | |
152 | |
153 // Search for indexed words. | |
154 public function indexedWordQuery($words, $search_data) | |
155 { | |
156 global $modSettings, $smcFunc; | |
157 | |
158 $query_select = array( | |
159 'id_msg' => 'm.id_msg', | |
160 ); | |
161 $query_where = array(); | |
162 $query_params = $search_data['params']; | |
163 | |
164 if ($query_params['id_search']) | |
165 $query_select['id_search'] = '{int:id_search}'; | |
166 | |
167 $count = 0; | |
168 if (empty($modSettings['search_simple_fulltext'])) | |
169 foreach ($words['words'] as $regularWord) | |
170 { | |
171 $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}'; | |
172 $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; | |
173 } | |
174 | |
175 if ($query_params['user_query']) | |
176 $query_where[] = '{raw:user_query}'; | |
177 if ($query_params['board_query']) | |
178 $query_where[] = 'm.id_board {raw:board_query}'; | |
179 | |
180 if ($query_params['topic']) | |
181 $query_where[] = 'm.id_topic = {int:topic}'; | |
182 if ($query_params['min_msg_id']) | |
183 $query_where[] = 'm.id_msg >= {int:min_msg_id}'; | |
184 if ($query_params['max_msg_id']) | |
185 $query_where[] = 'm.id_msg <= {int:max_msg_id}'; | |
186 | |
187 $count = 0; | |
188 if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) | |
189 foreach ($query_params['excluded_phrases'] as $phrase) | |
190 { | |
191 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}'; | |
192 $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; | |
193 } | |
194 $count = 0; | |
195 if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) | |
196 foreach ($query_params['excluded_subject_words'] as $excludedWord) | |
197 { | |
198 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $no_regexp ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}'; | |
199 $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $no_regexp ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; | |
200 } | |
201 | |
202 if (!empty($modSettings['search_simple_fulltext'])) | |
203 { | |
204 $query_where[] = 'MATCH (body) AGAINST ({string:body_match})'; | |
205 $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words'])); | |
206 } | |
207 elseif ($this->canDoBooleanSearch) | |
208 { | |
209 $query_params['boolean_match'] = ''; | |
210 foreach ($words['indexed_words'] as $fulltextWord) | |
211 $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' '; | |
212 $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1); | |
213 | |
214 $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)'; | |
215 } | |
216 else | |
217 { | |
218 $count = 0; | |
219 foreach ($words['indexed_words'] as $fulltextWord) | |
220 { | |
221 $query_where[] = (in_array($fulltextWord, $query_params['excluded_index_words']) ? 'NOT ' : '') . 'MATCH (body) AGAINST ({string:fulltext_match_' . $count . '})'; | |
222 $query_params['fulltext_match_' . $count++] = $fulltextWord; | |
223 } | |
224 } | |
225 | |
226 $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( ' | |
227 INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' | |
228 (' . implode(', ', array_keys($query_select)) . ')') : '') . ' | |
229 SELECT ' . implode(', ', $query_select) . ' | |
230 FROM {db_prefix}messages AS m | |
231 WHERE ' . implode(' | |
232 AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' | |
233 LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), | |
234 $query_params | |
235 ); | |
236 | |
237 return $ignoreRequest; | |
238 } | |
239 } | |
240 | |
241 ?> |