Mercurial > hg > vamp-website
comparison forum/Sources/SearchAPI-Custom.php @ 76:e3e11437ecea website
Add forum code
author | Chris Cannam |
---|---|
date | Sun, 07 Jul 2013 11:25:48 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
75:72f59aa7e503 | 76:e3e11437ecea |
---|---|
1 <?php | |
2 | |
3 /** | |
4 * Simple Machines Forum (SMF) | |
5 * | |
6 * @package SMF | |
7 * @author Simple Machines http://www.simplemachines.org | |
8 * @copyright 2011 Simple Machines | |
9 * @license http://www.simplemachines.org/about/smf/license.php BSD | |
10 * | |
11 * @version 2.0 | |
12 */ | |
13 | |
14 if (!defined('SMF')) | |
15 die('Hacking attempt...'); | |
16 | |
17 /* | |
18 int searchSort(string $wordA, string $wordB) | |
19 - callback function for usort used to sort the fulltext results. | |
20 - the order of sorting is: large words, small words, large words that | |
21 are excluded from the search, small words that are excluded. | |
22 */ | |
23 | |
24 class custom_search | |
25 { | |
26 // This is the last version of SMF that this was tested on, to protect against API changes. | |
27 public $version_compatible = 'SMF 2.0'; | |
28 // This won't work with versions of SMF less than this. | |
29 public $min_smf_version = 'SMF 2.0 Beta 2'; | |
30 // Is it supported? | |
31 public $is_supported = true; | |
32 | |
33 protected $indexSettings = array(); | |
34 // What words are banned? | |
35 protected $bannedWords = array(); | |
36 // What is the minimum word length? | |
37 protected $min_word_length = null; | |
38 // What databases support the custom index? | |
39 protected $supported_databases = array('mysql', 'postgresql', 'sqlite'); | |
40 | |
41 public function __construct() | |
42 { | |
43 global $modSettings, $db_type; | |
44 | |
45 // Is this database supported? | |
46 if (!in_array($db_type, $this->supported_databases)) | |
47 { | |
48 $this->is_supported = false; | |
49 return; | |
50 } | |
51 | |
52 if (empty($modSettings['search_custom_index_config'])) | |
53 return; | |
54 | |
55 $this->indexSettings = unserialize($modSettings['search_custom_index_config']); | |
56 | |
57 $this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']); | |
58 $this->min_word_length = $this->indexSettings['bytes_per_word']; | |
59 } | |
60 | |
61 // Check whether the search can be performed by this API. | |
62 public function supportsMethod($methodName, $query_params = null) | |
63 { | |
64 switch ($methodName) | |
65 { | |
66 case 'isValid': | |
67 case 'searchSort': | |
68 case 'prepareIndexes': | |
69 case 'indexedWordQuery': | |
70 return true; | |
71 break; | |
72 | |
73 default: | |
74 | |
75 // All other methods, too bad dunno you. | |
76 return false; | |
77 return; | |
78 } | |
79 } | |
80 | |
81 // If the settings don't exist we can't continue. | |
82 public function isValid() | |
83 { | |
84 global $modSettings; | |
85 | |
86 return !empty($modSettings['search_custom_index_config']); | |
87 } | |
88 | |
89 // This function compares the length of two strings plus a little. | |
90 public function searchSort($a, $b) | |
91 { | |
92 global $modSettings, $excludedWords; | |
93 | |
94 $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0); | |
95 $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0); | |
96 | |
97 return $y < $x ? 1 : ($y > $x ? -1 : 0); | |
98 } | |
99 | |
100 // Do we have to do some work with the words we are searching for to prepare them? | |
101 public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded) | |
102 { | |
103 global $modSettings, $smcFunc; | |
104 | |
105 $subwords = text2words($word, $this->min_word_length, true); | |
106 | |
107 if (empty($modSettings['search_force_index'])) | |
108 $wordsSearch['words'][] = $word; | |
109 | |
110 // Excluded phrases don't benefit from being split into subwords. | |
111 if (count($subwords) > 1 && $isExcluded) | |
112 continue; | |
113 else | |
114 { | |
115 foreach ($subwords as $subword) | |
116 { | |
117 if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords)) | |
118 { | |
119 $wordsSearch['indexed_words'][] = $subword; | |
120 if ($isExcluded) | |
121 $wordsExclude[] = $subword; | |
122 } | |
123 } | |
124 } | |
125 } | |
126 | |
127 // Search for indexed words. | |
128 public function indexedWordQuery($words, $search_data) | |
129 { | |
130 global $modSettings, $smcFunc; | |
131 | |
132 $query_select = array( | |
133 'id_msg' => 'm.id_msg', | |
134 ); | |
135 $query_inner_join = array(); | |
136 $query_left_join = array(); | |
137 $query_where = array(); | |
138 $query_params = $search_data['params']; | |
139 | |
140 if ($query_params['id_search']) | |
141 $query_select['id_search'] = '{int:id_search}'; | |
142 | |
143 $count = 0; | |
144 foreach ($words['words'] as $regularWord) | |
145 { | |
146 $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}'; | |
147 $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]'; | |
148 } | |
149 | |
150 if ($query_params['user_query']) | |
151 $query_where[] = '{raw:user_query}'; | |
152 if ($query_params['board_query']) | |
153 $query_where[] = 'm.id_board {raw:board_query}'; | |
154 | |
155 if ($query_params['topic']) | |
156 $query_where[] = 'm.id_topic = {int:topic}'; | |
157 if ($query_params['min_msg_id']) | |
158 $query_where[] = 'm.id_msg >= {int:min_msg_id}'; | |
159 if ($query_params['max_msg_id']) | |
160 $query_where[] = 'm.id_msg <= {int:max_msg_id}'; | |
161 | |
162 $count = 0; | |
163 if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index'])) | |
164 foreach ($query_params['excluded_phrases'] as $phrase) | |
165 { | |
166 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}'; | |
167 $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]'; | |
168 } | |
169 $count = 0; | |
170 if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index'])) | |
171 foreach ($query_params['excluded_subject_words'] as $excludedWord) | |
172 { | |
173 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}'; | |
174 $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]'; | |
175 } | |
176 | |
177 $numTables = 0; | |
178 $prev_join = 0; | |
179 foreach ($words['indexed_words'] as $indexedWord) | |
180 { | |
181 $numTables++; | |
182 if (in_array($indexedWord, $query_params['excluded_index_words'])) | |
183 { | |
184 $query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)'; | |
185 $query_where[] = '(lsw' . $numTables . '.id_word IS NULL)'; | |
186 } | |
187 else | |
188 { | |
189 $query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)'; | |
190 $query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord; | |
191 $prev_join = $numTables; | |
192 } | |
193 } | |
194 | |
195 $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( ' | |
196 INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . ' | |
197 (' . implode(', ', array_keys($query_select)) . ')') : '') . ' | |
198 SELECT ' . implode(', ', $query_select) . ' | |
199 FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : ' | |
200 INNER JOIN ' . implode(' | |
201 INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : ' | |
202 LEFT JOIN ' . implode(' | |
203 LEFT JOIN ', $query_left_join)) . ' | |
204 WHERE ' . implode(' | |
205 AND ', $query_where) . (empty($search_data['max_results']) ? '' : ' | |
206 LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])), | |
207 $query_params | |
208 ); | |
209 | |
210 return $ignoreRequest; | |
211 } | |
212 } | |
213 | |
214 ?> |