comparison forum/Sources/SearchAPI-Custom.php @ 76:e3e11437ecea website

Add forum code
author Chris Cannam
date Sun, 07 Jul 2013 11:25:48 +0200
parents
children
comparison
equal deleted inserted replaced
75:72f59aa7e503 76:e3e11437ecea
1 <?php
2
3 /**
4 * Simple Machines Forum (SMF)
5 *
6 * @package SMF
7 * @author Simple Machines http://www.simplemachines.org
8 * @copyright 2011 Simple Machines
9 * @license http://www.simplemachines.org/about/smf/license.php BSD
10 *
11 * @version 2.0
12 */
13
14 if (!defined('SMF'))
15 die('Hacking attempt...');
16
17 /*
18 int searchSort(string $wordA, string $wordB)
19 - callback function for usort used to sort the fulltext results.
20 - the order of sorting is: large words, small words, large words that
21 are excluded from the search, small words that are excluded.
22 */
23
24 class custom_search
25 {
26 // This is the last version of SMF that this was tested on, to protect against API changes.
27 public $version_compatible = 'SMF 2.0';
28 // This won't work with versions of SMF less than this.
29 public $min_smf_version = 'SMF 2.0 Beta 2';
30 // Is it supported?
31 public $is_supported = true;
32
33 protected $indexSettings = array();
34 // What words are banned?
35 protected $bannedWords = array();
36 // What is the minimum word length?
37 protected $min_word_length = null;
38 // What databases support the custom index?
39 protected $supported_databases = array('mysql', 'postgresql', 'sqlite');
40
41 public function __construct()
42 {
43 global $modSettings, $db_type;
44
45 // Is this database supported?
46 if (!in_array($db_type, $this->supported_databases))
47 {
48 $this->is_supported = false;
49 return;
50 }
51
52 if (empty($modSettings['search_custom_index_config']))
53 return;
54
55 $this->indexSettings = unserialize($modSettings['search_custom_index_config']);
56
57 $this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
58 $this->min_word_length = $this->indexSettings['bytes_per_word'];
59 }
60
61 // Check whether the search can be performed by this API.
62 public function supportsMethod($methodName, $query_params = null)
63 {
64 switch ($methodName)
65 {
66 case 'isValid':
67 case 'searchSort':
68 case 'prepareIndexes':
69 case 'indexedWordQuery':
70 return true;
71 break;
72
73 default:
74
75 // All other methods, too bad dunno you.
76 return false;
77 return;
78 }
79 }
80
81 // If the settings don't exist we can't continue.
82 public function isValid()
83 {
84 global $modSettings;
85
86 return !empty($modSettings['search_custom_index_config']);
87 }
88
89 // This function compares the length of two strings plus a little.
90 public function searchSort($a, $b)
91 {
92 global $modSettings, $excludedWords;
93
94 $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
95 $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
96
97 return $y < $x ? 1 : ($y > $x ? -1 : 0);
98 }
99
100 // Do we have to do some work with the words we are searching for to prepare them?
101 public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
102 {
103 global $modSettings, $smcFunc;
104
105 $subwords = text2words($word, $this->min_word_length, true);
106
107 if (empty($modSettings['search_force_index']))
108 $wordsSearch['words'][] = $word;
109
110 // Excluded phrases don't benefit from being split into subwords.
111 if (count($subwords) > 1 && $isExcluded)
112 continue;
113 else
114 {
115 foreach ($subwords as $subword)
116 {
117 if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
118 {
119 $wordsSearch['indexed_words'][] = $subword;
120 if ($isExcluded)
121 $wordsExclude[] = $subword;
122 }
123 }
124 }
125 }
126
127 // Search for indexed words.
128 public function indexedWordQuery($words, $search_data)
129 {
130 global $modSettings, $smcFunc;
131
132 $query_select = array(
133 'id_msg' => 'm.id_msg',
134 );
135 $query_inner_join = array();
136 $query_left_join = array();
137 $query_where = array();
138 $query_params = $search_data['params'];
139
140 if ($query_params['id_search'])
141 $query_select['id_search'] = '{int:id_search}';
142
143 $count = 0;
144 foreach ($words['words'] as $regularWord)
145 {
146 $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
147 $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
148 }
149
150 if ($query_params['user_query'])
151 $query_where[] = '{raw:user_query}';
152 if ($query_params['board_query'])
153 $query_where[] = 'm.id_board {raw:board_query}';
154
155 if ($query_params['topic'])
156 $query_where[] = 'm.id_topic = {int:topic}';
157 if ($query_params['min_msg_id'])
158 $query_where[] = 'm.id_msg >= {int:min_msg_id}';
159 if ($query_params['max_msg_id'])
160 $query_where[] = 'm.id_msg <= {int:max_msg_id}';
161
162 $count = 0;
163 if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
164 foreach ($query_params['excluded_phrases'] as $phrase)
165 {
166 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
167 $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
168 }
169 $count = 0;
170 if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
171 foreach ($query_params['excluded_subject_words'] as $excludedWord)
172 {
173 $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
174 $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
175 }
176
177 $numTables = 0;
178 $prev_join = 0;
179 foreach ($words['indexed_words'] as $indexedWord)
180 {
181 $numTables++;
182 if (in_array($indexedWord, $query_params['excluded_index_words']))
183 {
184 $query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
185 $query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
186 }
187 else
188 {
189 $query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
190 $query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
191 $prev_join = $numTables;
192 }
193 }
194
195 $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( '
196 INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
197 (' . implode(', ', array_keys($query_select)) . ')') : '') . '
198 SELECT ' . implode(', ', $query_select) . '
199 FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
200 INNER JOIN ' . implode('
201 INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
202 LEFT JOIN ' . implode('
203 LEFT JOIN ', $query_left_join)) . '
204 WHERE ' . implode('
205 AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
206 LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
207 $query_params
208 );
209
210 return $ignoreRequest;
211 }
212 }
213
214 ?>