danielebarchiesi@4
|
1 <?php
|
danielebarchiesi@4
|
2 /**
|
danielebarchiesi@4
|
3 * ARC2 RDF Store LOAD Query Handler
|
danielebarchiesi@4
|
4 *
|
danielebarchiesi@4
|
5 * @author Benjamin Nowack
|
danielebarchiesi@4
|
6 * @license <http://arc.semsol.org/license>
|
danielebarchiesi@4
|
7 * @homepage <http://arc.semsol.org/>
|
danielebarchiesi@4
|
8 * @package ARC2
|
danielebarchiesi@4
|
9 */
|
danielebarchiesi@4
|
10
|
danielebarchiesi@4
|
11 ARC2::inc('StoreQueryHandler');
|
danielebarchiesi@4
|
12
|
danielebarchiesi@4
|
13 class ARC2_StoreLoadQueryHandler extends ARC2_StoreQueryHandler {
|
danielebarchiesi@4
|
14
|
danielebarchiesi@4
|
15 function __construct($a, &$caller) {/* caller has to be a store */
|
danielebarchiesi@4
|
16 parent::__construct($a, $caller);
|
danielebarchiesi@4
|
17 }
|
danielebarchiesi@4
|
18
|
danielebarchiesi@4
|
19 function __init() {/* db_con, store_log_inserts */
|
danielebarchiesi@4
|
20 parent::__init();
|
danielebarchiesi@4
|
21 $this->store = $this->caller;
|
danielebarchiesi@4
|
22 $this->write_buffer_size = $this->v('store_write_buffer', 2500, $this->a);
|
danielebarchiesi@4
|
23 $this->split_threshold = $this->v('store_split_threshold', 0, $this->a);
|
danielebarchiesi@4
|
24 $this->strip_mb_comp_str = $this->v('store_strip_mb_comp_str', 0, $this->a);
|
danielebarchiesi@4
|
25 }
|
danielebarchiesi@4
|
26
|
danielebarchiesi@4
|
27 /* */
|
danielebarchiesi@4
|
28
|
danielebarchiesi@4
|
29 function runQuery($infos, $data = '', $keep_bnode_ids = 0) {
|
danielebarchiesi@4
|
30 $url = $infos['query']['url'];
|
danielebarchiesi@4
|
31 $graph = $infos['query']['target_graph'];
|
danielebarchiesi@4
|
32 $this->target_graph = $graph ? $this->calcURI($graph) : $this->calcURI($url);
|
danielebarchiesi@4
|
33 $this->fixed_target_graph = $graph ? $this->target_graph : '';
|
danielebarchiesi@4
|
34 $this->keep_bnode_ids = $keep_bnode_ids;
|
danielebarchiesi@4
|
35 /* reader */
|
danielebarchiesi@4
|
36 ARC2::inc('Reader');
|
danielebarchiesi@4
|
37 $reader = new ARC2_Reader($this->a, $this);
|
danielebarchiesi@4
|
38 $reader->activate($url, $data);
|
danielebarchiesi@4
|
39 /* format detection */
|
danielebarchiesi@4
|
40 $mappings = array(
|
danielebarchiesi@4
|
41 'rdfxml' => 'RDFXML',
|
danielebarchiesi@4
|
42 'sparqlxml' => 'SPOG',
|
danielebarchiesi@4
|
43 'turtle' => 'Turtle',
|
danielebarchiesi@4
|
44 'ntriples' => 'Turtle',
|
danielebarchiesi@4
|
45 'rss' => 'RSS',
|
danielebarchiesi@4
|
46 'atom' => 'Atom',
|
danielebarchiesi@4
|
47 'n3' => 'Turtle',
|
danielebarchiesi@4
|
48 'html' => 'SemHTML',
|
danielebarchiesi@4
|
49 'sgajson' => 'SGAJSON',
|
danielebarchiesi@4
|
50 'cbjson' => 'CBJSON'
|
danielebarchiesi@4
|
51 );
|
danielebarchiesi@4
|
52 $format = $reader->getFormat();
|
danielebarchiesi@4
|
53 if (!$format || !isset($mappings[$format])) {
|
danielebarchiesi@4
|
54 return $this->addError('No loader available for "' .$url. '": ' . $format);
|
danielebarchiesi@4
|
55 }
|
danielebarchiesi@4
|
56 /* format loader */
|
danielebarchiesi@4
|
57 $suffix = 'Store' . $mappings[$format] . 'Loader';
|
danielebarchiesi@4
|
58 ARC2::inc($suffix);
|
danielebarchiesi@4
|
59 $cls = 'ARC2_' . $suffix;
|
danielebarchiesi@4
|
60 $loader = new $cls($this->a, $this);
|
danielebarchiesi@4
|
61 $loader->setReader($reader);
|
danielebarchiesi@4
|
62 /* lock */
|
danielebarchiesi@4
|
63 if (!$this->store->getLock()) {
|
danielebarchiesi@4
|
64 $l_name = $this->a['db_name'] . '.' . $this->store->getTablePrefix() . '.write_lock';
|
danielebarchiesi@4
|
65 return $this->addError('Could not get lock in "runQuery" (' . $l_name . ')');
|
danielebarchiesi@4
|
66 }
|
danielebarchiesi@4
|
67 $this->has_lock = 1;
|
danielebarchiesi@4
|
68 /* logging */
|
danielebarchiesi@4
|
69 $this->t_count = 0;
|
danielebarchiesi@4
|
70 $this->t_start = ARC2::mtime();
|
danielebarchiesi@4
|
71 $this->log_inserts = $this->v('store_log_inserts', 0, $this->a);
|
danielebarchiesi@4
|
72 if ($this->log_inserts) {
|
danielebarchiesi@4
|
73 @unlink("arc_insert_log.txt");
|
danielebarchiesi@4
|
74 $this->inserts = array();
|
danielebarchiesi@4
|
75 $this->insert_times = array();
|
danielebarchiesi@4
|
76 $this->t_prev = $this->t_start;
|
danielebarchiesi@4
|
77 $this->t_count_prev = 0 ;
|
danielebarchiesi@4
|
78 }
|
danielebarchiesi@4
|
79 /* load and parse */
|
danielebarchiesi@4
|
80 $this->max_term_id = $this->getMaxTermID();
|
danielebarchiesi@4
|
81 $this->max_triple_id = $this->getMaxTripleID();
|
danielebarchiesi@4
|
82 $this->column_type = $this->store->getColumnType();
|
danielebarchiesi@4
|
83 //$this->createMergeTable();
|
danielebarchiesi@4
|
84 $this->term_ids = array();
|
danielebarchiesi@4
|
85 $this->triple_ids = array();
|
danielebarchiesi@4
|
86 $this->sql_buffers = array();
|
danielebarchiesi@4
|
87 $r = $loader->parse($url, $data);
|
danielebarchiesi@4
|
88 /* done */
|
danielebarchiesi@4
|
89 $this->checkSQLBuffers(1);
|
danielebarchiesi@4
|
90 if ($this->log_inserts) {
|
danielebarchiesi@4
|
91 $this->logInserts();
|
danielebarchiesi@4
|
92 }
|
danielebarchiesi@4
|
93 $this->store->releaseLock();
|
danielebarchiesi@4
|
94 //$this->dropMergeTable();
|
danielebarchiesi@4
|
95 if ((rand(1, 100) == 1)) $this->store->optimizeTables();
|
danielebarchiesi@4
|
96 $t2 = ARC2::mtime();
|
danielebarchiesi@4
|
97 $dur = round($t2 - $this->t_start, 4);
|
danielebarchiesi@4
|
98 $r = array(
|
danielebarchiesi@4
|
99 't_count' => $this->t_count,
|
danielebarchiesi@4
|
100 'load_time' => $dur,
|
danielebarchiesi@4
|
101 );
|
danielebarchiesi@4
|
102 if ($this->log_inserts) {
|
danielebarchiesi@4
|
103 $r['inserts'] = $this->inserts;
|
danielebarchiesi@4
|
104 $r['insert_times'] = $this->insert_times;
|
danielebarchiesi@4
|
105 }
|
danielebarchiesi@4
|
106 return $r;
|
danielebarchiesi@4
|
107 }
|
danielebarchiesi@4
|
108
|
danielebarchiesi@4
|
109 /* */
|
danielebarchiesi@4
|
110
|
danielebarchiesi@4
|
111 function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
|
danielebarchiesi@4
|
112 if (!$this->has_lock) return 0;
|
danielebarchiesi@4
|
113 $type_ids = array ('uri' => '0', 'bnode' => '1' , 'literal' => '2');
|
danielebarchiesi@4
|
114 $g = $this->getStoredTermID($this->target_graph, '0', 'id');
|
danielebarchiesi@4
|
115 $s = (($s_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $s)) . '_' . (strlen($s) > 12 ? substr(substr($s, 2) , -10) : substr($s, 2)) : $s;
|
danielebarchiesi@4
|
116 $o = (($o_type == 'bnode') && !$this->keep_bnode_ids) ? '_:b' . abs(crc32($g . $o)) . '_' . (strlen($o) > 12 ? substr(substr($o, 2), -10) : substr($o, 2)) : $o;
|
danielebarchiesi@4
|
117 /* triple */
|
danielebarchiesi@4
|
118 $t = array(
|
danielebarchiesi@4
|
119 's' => $this->getStoredTermID($s, $type_ids[$s_type], 's'),
|
danielebarchiesi@4
|
120 'p' => $this->getStoredTermID($p, '0', 'id'),
|
danielebarchiesi@4
|
121 'o' => $this->getStoredTermID($o, $type_ids[$o_type], 'o'),
|
danielebarchiesi@4
|
122 'o_lang_dt' => $this->getStoredTermID($o_dt . $o_lang, $o_dt ? '0' : '2', 'id'),
|
danielebarchiesi@4
|
123 'o_comp' => $this->getOComp($o),
|
danielebarchiesi@4
|
124 's_type' => $type_ids[$s_type],
|
danielebarchiesi@4
|
125 'o_type' => $type_ids[$o_type],
|
danielebarchiesi@4
|
126 );
|
danielebarchiesi@4
|
127 $t['t'] = $this->getTripleID($t);
|
danielebarchiesi@4
|
128 if (is_array($t['t'])) {/* t exists already */
|
danielebarchiesi@4
|
129 $t['t'] = $t['t'][0];
|
danielebarchiesi@4
|
130 }
|
danielebarchiesi@4
|
131 else {
|
danielebarchiesi@4
|
132 $this->bufferTripleSQL($t);
|
danielebarchiesi@4
|
133 }
|
danielebarchiesi@4
|
134 /* g2t */
|
danielebarchiesi@4
|
135 $g2t = array('g' => $g, 't' => $t['t']);
|
danielebarchiesi@4
|
136 $this->bufferGraphSQL($g2t);
|
danielebarchiesi@4
|
137 $this->t_count++;
|
danielebarchiesi@4
|
138 /* check buffers */
|
danielebarchiesi@4
|
139 if (($this->t_count % $this->write_buffer_size) == 0) {
|
danielebarchiesi@4
|
140 $force_write = 1;
|
danielebarchiesi@4
|
141 $reset_buffers = (($this->t_count % ($this->write_buffer_size * 2)) == 0);
|
danielebarchiesi@4
|
142 $refresh_lock = (($this->t_count % 25000) == 0);
|
danielebarchiesi@4
|
143 $split_tables = (($this->t_count % ($this->write_buffer_size * 10)) == 0);
|
danielebarchiesi@4
|
144 if ($this->log_inserts) $this->logInserts();
|
danielebarchiesi@4
|
145 $this->checkSQLBuffers($force_write, $reset_buffers, $refresh_lock, $split_tables);
|
danielebarchiesi@4
|
146 }
|
danielebarchiesi@4
|
147 }
|
danielebarchiesi@4
|
148
|
danielebarchiesi@4
|
149 /* */
|
danielebarchiesi@4
|
150
|
danielebarchiesi@4
|
151 function getMaxTermID() {
|
danielebarchiesi@4
|
152 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
153 $sql = '';
|
danielebarchiesi@4
|
154 foreach (array('id2val', 's2val', 'o2val') as $tbl) {
|
danielebarchiesi@4
|
155 $sql .= $sql ? ' UNION ' : '';
|
danielebarchiesi@4
|
156 $sql .= "(SELECT MAX(id) as `id` FROM " . $this->store->getTablePrefix() . $tbl . ')';
|
danielebarchiesi@4
|
157 }
|
danielebarchiesi@4
|
158 $r = 0;
|
danielebarchiesi@4
|
159 if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs)) {
|
danielebarchiesi@4
|
160 while ($row = mysql_fetch_array($rs)) {
|
danielebarchiesi@4
|
161 $r = ($r < $row['id']) ? $row['id'] : $r;
|
danielebarchiesi@4
|
162 }
|
danielebarchiesi@4
|
163 }
|
danielebarchiesi@4
|
164 return $r + 1;
|
danielebarchiesi@4
|
165 }
|
danielebarchiesi@4
|
166
|
danielebarchiesi@4
|
167 function getMaxTripleID() {
|
danielebarchiesi@4
|
168 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
169 $sql = "SELECT MAX(t) AS `id` FROM " . $this->store->getTablePrefix() . "triple";
|
danielebarchiesi@4
|
170 if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
|
danielebarchiesi@4
|
171 return $row['id'] + 1;
|
danielebarchiesi@4
|
172 }
|
danielebarchiesi@4
|
173 return 1;
|
danielebarchiesi@4
|
174 }
|
danielebarchiesi@4
|
175
|
danielebarchiesi@4
|
176 function getStoredTermID($val, $type_id, $tbl) {
|
danielebarchiesi@4
|
177 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
178 /* buffered */
|
danielebarchiesi@4
|
179 if (isset($this->term_ids[$val])) {
|
danielebarchiesi@4
|
180 if (!isset($this->term_ids[$val][$tbl])) {
|
danielebarchiesi@4
|
181 foreach (array('id', 's', 'o') as $other_tbl) {
|
danielebarchiesi@4
|
182 if (isset($this->term_ids[$val][$other_tbl])) {
|
danielebarchiesi@4
|
183 $this->term_ids[$val][$tbl] = $this->term_ids[$val][$other_tbl];
|
danielebarchiesi@4
|
184 $this->bufferIDSQL($tbl, $this->term_ids[$val][$tbl], $val, $type_id);
|
danielebarchiesi@4
|
185 break;
|
danielebarchiesi@4
|
186 }
|
danielebarchiesi@4
|
187 }
|
danielebarchiesi@4
|
188 }
|
danielebarchiesi@4
|
189 return $this->term_ids[$val][$tbl];
|
danielebarchiesi@4
|
190 }
|
danielebarchiesi@4
|
191 /* db */
|
danielebarchiesi@4
|
192 $tbl_prefix = $this->store->getTablePrefix();
|
danielebarchiesi@4
|
193 $sub_tbls = ($tbl == 'id') ? array('id2val', 's2val', 'o2val') : ($tbl == 's' ? array('s2val', 'id2val', 'o2val') : array('o2val', 'id2val', 's2val'));
|
danielebarchiesi@4
|
194 foreach ($sub_tbls as $sub_tbl) {
|
danielebarchiesi@4
|
195 $id = 0;
|
danielebarchiesi@4
|
196 //$sql = "SELECT id AS `id`, '" . $sub_tbl . "' AS `tbl` FROM " . $tbl_prefix . $sub_tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val, $con) . "'";
|
danielebarchiesi@4
|
197 /* via hash */
|
danielebarchiesi@4
|
198 if (preg_match('/^(s2val|o2val)$/', $sub_tbl) && $this->hasHashColumn($sub_tbl)) {
|
danielebarchiesi@4
|
199 $sql = "SELECT id AS `id`, val AS `val` FROM " . $tbl_prefix . $sub_tbl . " WHERE val_hash = BINARY '" . $this->getValueHash($val) . "'";
|
danielebarchiesi@4
|
200 if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs)) {
|
danielebarchiesi@4
|
201 while ($row = mysql_fetch_array($rs)) {
|
danielebarchiesi@4
|
202 if ($row['val'] == $val) {
|
danielebarchiesi@4
|
203 $id = $row['id'];
|
danielebarchiesi@4
|
204 break;
|
danielebarchiesi@4
|
205 }
|
danielebarchiesi@4
|
206 }
|
danielebarchiesi@4
|
207 }
|
danielebarchiesi@4
|
208 }
|
danielebarchiesi@4
|
209 else {
|
danielebarchiesi@4
|
210 $sql = "SELECT id AS `id` FROM " . $tbl_prefix . $sub_tbl . " WHERE val = BINARY '" . mysql_real_escape_string($val, $con) . "'";
|
danielebarchiesi@4
|
211 if (($rs = $this->queryDB($sql . ' LIMIT 1', $con)) && mysql_num_rows($rs)) {
|
danielebarchiesi@4
|
212 $row = mysql_fetch_array($rs);
|
danielebarchiesi@4
|
213 $id = $row['id'];
|
danielebarchiesi@4
|
214 }
|
danielebarchiesi@4
|
215 }
|
danielebarchiesi@4
|
216 if ($id) {
|
danielebarchiesi@4
|
217 $this->term_ids[$val] = array($tbl => $id);
|
danielebarchiesi@4
|
218 if ($sub_tbl != $tbl . '2val') {
|
danielebarchiesi@4
|
219 $this->bufferIDSQL($tbl, $id, $val, $type_id);
|
danielebarchiesi@4
|
220 }
|
danielebarchiesi@4
|
221 break;
|
danielebarchiesi@4
|
222 }
|
danielebarchiesi@4
|
223 }
|
danielebarchiesi@4
|
224 /* new */
|
danielebarchiesi@4
|
225 if (!isset($this->term_ids[$val])) {
|
danielebarchiesi@4
|
226 $this->term_ids[$val] = array($tbl => $this->max_term_id);
|
danielebarchiesi@4
|
227 $this->bufferIDSQL($tbl, $this->max_term_id, $val, $type_id);
|
danielebarchiesi@4
|
228 $this->max_term_id++;
|
danielebarchiesi@4
|
229 /* upgrade tables ? */
|
danielebarchiesi@4
|
230 if (($this->column_type == 'mediumint') && ($this->max_term_id >= 16750000)) {
|
danielebarchiesi@4
|
231 $this->store->extendColumns();
|
danielebarchiesi@4
|
232 $this->column_type = 'int';
|
danielebarchiesi@4
|
233 }
|
danielebarchiesi@4
|
234 }
|
danielebarchiesi@4
|
235 return $this->term_ids[$val][$tbl];
|
danielebarchiesi@4
|
236 }
|
danielebarchiesi@4
|
237
|
danielebarchiesi@4
|
238 function getTripleID($t) {
|
danielebarchiesi@4
|
239 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
240 $val = serialize($t);
|
danielebarchiesi@4
|
241 /* buffered */
|
danielebarchiesi@4
|
242 if (isset($this->triple_ids[$val])) {
|
danielebarchiesi@4
|
243 return array($this->triple_ids[$val]);/* hack for "don't insert this triple" */
|
danielebarchiesi@4
|
244 }
|
danielebarchiesi@4
|
245 /* db */
|
danielebarchiesi@4
|
246 $sql = "SELECT t FROM " . $this->store->getTablePrefix() . "triple WHERE
|
danielebarchiesi@4
|
247 s = " . $t['s'] . " AND p = " . $t['p'] . " AND o = " . $t['o'] . " AND o_lang_dt = " . $t['o_lang_dt'] . " AND s_type = " . $t['s_type'] . " AND o_type = " . $t['o_type'] . "
|
danielebarchiesi@4
|
248 LIMIT 1
|
danielebarchiesi@4
|
249 ";
|
danielebarchiesi@4
|
250 if (($rs = $this->queryDB($sql, $con)) && mysql_num_rows($rs) && ($row = mysql_fetch_array($rs))) {
|
danielebarchiesi@4
|
251 $this->triple_ids[$val] = $row['t'];/* hack for "don't insert this triple" */
|
danielebarchiesi@4
|
252 return array($row['t']);/* hack for "don't insert this triple" */
|
danielebarchiesi@4
|
253 }
|
danielebarchiesi@4
|
254 /* new */
|
danielebarchiesi@4
|
255 else {
|
danielebarchiesi@4
|
256 $this->triple_ids[$val] = $this->max_triple_id;
|
danielebarchiesi@4
|
257 $this->max_triple_id++;
|
danielebarchiesi@4
|
258 /* split tables ? */
|
danielebarchiesi@4
|
259 if (0 && $this->split_threshold && !($this->max_triple_id % $this->split_threshold)) {
|
danielebarchiesi@4
|
260 $this->store->splitTables();
|
danielebarchiesi@4
|
261 $this->dropMergeTable();
|
danielebarchiesi@4
|
262 $this->createMergeTable();
|
danielebarchiesi@4
|
263 }
|
danielebarchiesi@4
|
264 /* upgrade tables ? // Thanks to patch by Mark Fichtner (https://github.com/Knurg) */
|
danielebarchiesi@4
|
265 if (($this->column_type == 'mediumint') && ($this->max_triple_id >= 16750000)) {
|
danielebarchiesi@4
|
266 $this->store->extendColumns();
|
danielebarchiesi@4
|
267 $this->column_type = 'int';
|
danielebarchiesi@4
|
268 }
|
danielebarchiesi@4
|
269 return $this->triple_ids[$val];
|
danielebarchiesi@4
|
270 }
|
danielebarchiesi@4
|
271 }
|
danielebarchiesi@4
|
272
|
danielebarchiesi@4
|
273 function getOComp($val) {
|
danielebarchiesi@4
|
274 /* try date (e.g. 21 August 2007) */
|
danielebarchiesi@4
|
275 if (preg_match('/^[0-9]{1,2}\s+[a-z]+\s+[0-9]{4}/i', $val) && ($uts = strtotime($val)) && ($uts !== -1)) {
|
danielebarchiesi@4
|
276 return date("Y-m-d\TH:i:s", $uts);
|
danielebarchiesi@4
|
277 }
|
danielebarchiesi@4
|
278 /* xsd date (e.g. 2009-05-28T18:03:38+09:00 2009-05-28T18:03:38GMT) */
|
danielebarchiesi@4
|
279 if (preg_match('/^([0-9]{4}\-[0-9]{2}\-[0-9]{2}\T)([0-9\:]+)?([0-9\+\-\:\Z]+)?(\s*[a-z]{2,3})?$/si', $val, $m)) {
|
danielebarchiesi@4
|
280 /* yyyy-mm-dd */
|
danielebarchiesi@4
|
281 $val = $m[1];
|
danielebarchiesi@4
|
282 /* hh:ss */
|
danielebarchiesi@4
|
283 if ($m[2]) {
|
danielebarchiesi@4
|
284 $val .= $m[2];
|
danielebarchiesi@4
|
285 /* timezone offset */
|
danielebarchiesi@4
|
286 if (isset($m[3]) && ($m[3] != 'Z')) {
|
danielebarchiesi@4
|
287 $uts = strtotime(str_replace('T', ' ', $val));
|
danielebarchiesi@4
|
288 if (preg_match('/([\+\-])([0-9]{2})\:?([0-9]{2})$/', $m[3], $sub_m)) {
|
danielebarchiesi@4
|
289 $diff_mins = (3600 * ltrim($sub_m[2], '0')) + ltrim($sub_m[3], '0');
|
danielebarchiesi@4
|
290 $uts = ($sub_m[1] == '-') ? $uts + $diff_mins : $uts - $diff_mins;
|
danielebarchiesi@4
|
291 $val = date('Y-m-d\TH:i:s\Z', $uts);
|
danielebarchiesi@4
|
292 }
|
danielebarchiesi@4
|
293 }
|
danielebarchiesi@4
|
294 else {
|
danielebarchiesi@4
|
295 $val .= 'Z';
|
danielebarchiesi@4
|
296 }
|
danielebarchiesi@4
|
297 }
|
danielebarchiesi@4
|
298 return $val;
|
danielebarchiesi@4
|
299 }
|
danielebarchiesi@4
|
300 /* fallback & backup w/o UTC calculation, to be removed in later revision */
|
danielebarchiesi@4
|
301 if (preg_match('/^[0-9]{4}[0-9\-\:\T\Z\+]+([a-z]{2,3})?$/i', $val)) {
|
danielebarchiesi@4
|
302 return $val;
|
danielebarchiesi@4
|
303 }
|
danielebarchiesi@4
|
304 if (is_numeric($val)) {
|
danielebarchiesi@4
|
305 $val = sprintf("%f", $val);
|
danielebarchiesi@4
|
306 if (preg_match("/([\-\+])([0-9]*)\.([0-9]*)/", $val, $m)) {
|
danielebarchiesi@4
|
307 return $m[1] . sprintf("%018s", $m[2]) . "." . sprintf("%-015s", $m[3]);
|
danielebarchiesi@4
|
308 }
|
danielebarchiesi@4
|
309 if (preg_match("/([0-9]*)\.([0-9]*)/", $val, $m)) {
|
danielebarchiesi@4
|
310 return "+" . sprintf("%018s", $m[1]) . "." . sprintf("%-015s", $m[2]);
|
danielebarchiesi@4
|
311 }
|
danielebarchiesi@4
|
312 return $val;
|
danielebarchiesi@4
|
313 }
|
danielebarchiesi@4
|
314 /* any other string: remove tags, linebreaks etc., but keep MB-chars */
|
danielebarchiesi@4
|
315 //$val = substr(trim(preg_replace('/[\W\s]+/is', '-', strip_tags($val))), 0, 35);
|
danielebarchiesi@4
|
316 // [\PL\s]+ ( = non-Letters) kills digits
|
danielebarchiesi@4
|
317 $re = $this->has_pcre_unicode ? '/[\PL\s]+/isu' : '/[\s\'\"\´\`]+/is';
|
danielebarchiesi@4
|
318 $re = '/[\s\'\"\´\`]+/is';
|
danielebarchiesi@4
|
319 $val = trim(preg_replace($re, '-', strip_tags($val)));
|
danielebarchiesi@4
|
320 if (strlen($val) > 35) {
|
danielebarchiesi@4
|
321 $fnc = function_exists("mb_substr") ? 'mb_substr' : 'substr';
|
danielebarchiesi@4
|
322 $val = $fnc($val, 0, 17) . '-' . $fnc($val, -17);
|
danielebarchiesi@4
|
323 }
|
danielebarchiesi@4
|
324 if ($this->strip_mb_comp_str) {
|
danielebarchiesi@4
|
325 $val = urldecode(preg_replace('/\%[0-9A-F]{2}/', '', urlencode($val)));
|
danielebarchiesi@4
|
326 }
|
danielebarchiesi@4
|
327 return $this->toUTF8($val);
|
danielebarchiesi@4
|
328 }
|
danielebarchiesi@4
|
329
|
danielebarchiesi@4
|
330 /* */
|
danielebarchiesi@4
|
331
|
danielebarchiesi@4
|
332 function bufferTripleSQL($t) {
|
danielebarchiesi@4
|
333 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
334 $tbl = 'triple';
|
danielebarchiesi@4
|
335 $sql = ", ";
|
danielebarchiesi@4
|
336 if (!isset($this->sql_buffers[$tbl])) {
|
danielebarchiesi@4
|
337 $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . " (t, s, p, o, o_lang_dt, o_comp, s_type, o_type) VALUES";
|
danielebarchiesi@4
|
338 $sql = " ";
|
danielebarchiesi@4
|
339 }
|
danielebarchiesi@4
|
340 $this->sql_buffers[$tbl] .= $sql . "(" . $t['t'] . ", " . $t['s'] . ", " . $t['p'] . ", " . $t['o'] . ", " . $t['o_lang_dt'] . ", '" . mysql_real_escape_string($t['o_comp'], $con) . "', " . $t['s_type'] . ", " . $t['o_type'] . ")";
|
danielebarchiesi@4
|
341 }
|
danielebarchiesi@4
|
342
|
danielebarchiesi@4
|
343 function bufferGraphSQL($g2t) {
|
danielebarchiesi@4
|
344 $tbl = 'g2t';
|
danielebarchiesi@4
|
345 $sql = ", ";
|
danielebarchiesi@4
|
346 if (!isset($this->sql_buffers[$tbl])) {
|
danielebarchiesi@4
|
347 $this->sql_buffers[$tbl] = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . " (g, t) VALUES";
|
danielebarchiesi@4
|
348 $sql = " ";
|
danielebarchiesi@4
|
349 }
|
danielebarchiesi@4
|
350 $this->sql_buffers[$tbl] .= $sql . "(" . $g2t['g'] . ", " . $g2t['t'] . ")";
|
danielebarchiesi@4
|
351 }
|
danielebarchiesi@4
|
352
|
danielebarchiesi@4
|
353 function bufferIDSQL($tbl, $id, $val, $val_type) {
|
danielebarchiesi@4
|
354 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
355 $tbl = $tbl . '2val';
|
danielebarchiesi@4
|
356 if ($tbl == 'id2val') {
|
danielebarchiesi@4
|
357 $cols = "id, val, val_type";
|
danielebarchiesi@4
|
358 $vals = "(" . $id . ", '" . mysql_real_escape_string($val, $con) . "', " . $val_type . ")";
|
danielebarchiesi@4
|
359 }
|
danielebarchiesi@4
|
360 elseif (preg_match('/^(s2val|o2val)$/', $tbl) && $this->hasHashColumn($tbl)) {
|
danielebarchiesi@4
|
361 $cols = "id, val_hash, val";
|
danielebarchiesi@4
|
362 $vals = "(" . $id . ", '" . $this->getValueHash($val). "', '" . mysql_real_escape_string($val, $con) . "')";
|
danielebarchiesi@4
|
363 }
|
danielebarchiesi@4
|
364 else {
|
danielebarchiesi@4
|
365 $cols = "id, val";
|
danielebarchiesi@4
|
366 $vals = "(" . $id . ", '" . mysql_real_escape_string($val, $con) . "')";
|
danielebarchiesi@4
|
367 }
|
danielebarchiesi@4
|
368 if (!isset($this->sql_buffers[$tbl])) {
|
danielebarchiesi@4
|
369 $this->sql_buffers[$tbl] = '';
|
danielebarchiesi@4
|
370 $sql = "INSERT IGNORE INTO " . $this->store->getTablePrefix() . $tbl . "(" . $cols . ") VALUES ";
|
danielebarchiesi@4
|
371 }
|
danielebarchiesi@4
|
372 else {
|
danielebarchiesi@4
|
373 $sql = ", ";
|
danielebarchiesi@4
|
374 }
|
danielebarchiesi@4
|
375 $sql .= $vals;
|
danielebarchiesi@4
|
376 $this->sql_buffers[$tbl] .= $sql;
|
danielebarchiesi@4
|
377 }
|
danielebarchiesi@4
|
378
|
danielebarchiesi@4
|
379 /* */
|
danielebarchiesi@4
|
380
|
danielebarchiesi@4
|
381 function checkSQLBuffers($force_write = 0, $reset_id_buffers = 0, $refresh_lock = 0, $split_tables = 0) {
|
danielebarchiesi@4
|
382 $con = $this->store->getDBCon();
|
danielebarchiesi@4
|
383 if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a));
|
danielebarchiesi@4
|
384 foreach (array('triple', 'g2t', 'id2val', 's2val', 'o2val') as $tbl) {
|
danielebarchiesi@4
|
385 $buffer_size = isset($this->sql_buffers[$tbl]) ? 1 : 0;
|
danielebarchiesi@4
|
386 if ($buffer_size && $force_write) {
|
danielebarchiesi@4
|
387 $t1 = ARC2::mtime();
|
danielebarchiesi@4
|
388 $this->queryDB($this->sql_buffers[$tbl], $con);
|
danielebarchiesi@4
|
389 /* table error */
|
danielebarchiesi@4
|
390 if ($er = mysql_error($con)) {
|
danielebarchiesi@4
|
391 $this->autoRepairTable($er, $con, $this->sql_buffers[$tbl]);
|
danielebarchiesi@4
|
392 }
|
danielebarchiesi@4
|
393 unset($this->sql_buffers[$tbl]);
|
danielebarchiesi@4
|
394 if ($this->log_inserts) {
|
danielebarchiesi@4
|
395 $t2 = ARC2::mtime();
|
danielebarchiesi@4
|
396 $this->inserts[$tbl] = $this->v($tbl, 0, $this->inserts) + max(0, mysql_affected_rows($con));
|
danielebarchiesi@4
|
397 $dur = round($t2 - $t1, 4);
|
danielebarchiesi@4
|
398 $this->insert_times[$tbl] = isset($this->insert_times[$tbl]) ? $this->insert_times[$tbl] : array('min' => $dur, 'max' => $dur, 'sum' => $dur);
|
danielebarchiesi@4
|
399 $this->insert_times[$tbl] = array('min' => min($dur, $this->insert_times[$tbl]['min']), 'max' => max($dur, $this->insert_times[$tbl]['max']), 'sum' => $dur + $this->insert_times[$tbl]['sum']);
|
danielebarchiesi@4
|
400 }
|
danielebarchiesi@4
|
401 /* reset term id buffers */
|
danielebarchiesi@4
|
402 if ($reset_id_buffers) {
|
danielebarchiesi@4
|
403 $this->term_ids = array();
|
danielebarchiesi@4
|
404 $this->triple_ids = array();
|
danielebarchiesi@4
|
405 }
|
danielebarchiesi@4
|
406 /* refresh lock */
|
danielebarchiesi@4
|
407 if ($refresh_lock) {
|
danielebarchiesi@4
|
408 $this->store->releaseLock();
|
danielebarchiesi@4
|
409 $this->has_lock = 0;
|
danielebarchiesi@4
|
410 sleep(1);
|
danielebarchiesi@4
|
411 if (!$this->store->getLock(5)) return $this->addError('Could not re-obtain lock in "checkSQLBuffers"');
|
danielebarchiesi@4
|
412 $this->has_lock = 1;
|
danielebarchiesi@4
|
413 }
|
danielebarchiesi@4
|
414 }
|
danielebarchiesi@4
|
415 }
|
danielebarchiesi@4
|
416 return 1;
|
danielebarchiesi@4
|
417 }
|
danielebarchiesi@4
|
418
|
danielebarchiesi@4
|
419 function autoRepairTable($er, $con, $sql = '') {
|
danielebarchiesi@4
|
420 $this->addError('MySQL error: ' . $er . ' (' . $sql . ')');
|
danielebarchiesi@4
|
421 if (preg_match('/Table \'[^\']+\/([a-z0-9\_\-]+)\' .*(crashed|repair)/i', $er, $m)) {
|
danielebarchiesi@4
|
422 $rs = $this->queryDB('REPAIR TABLE ' . rawurlencode($m[1]), $con);
|
danielebarchiesi@4
|
423 $msg = $rs ? mysql_fetch_array($rs) : array();
|
danielebarchiesi@4
|
424 if ($this->v('Msg_type', 'error', $msg) == 'error') {
|
danielebarchiesi@4
|
425 /* auto-reset */
|
danielebarchiesi@4
|
426 if ($this->v('store_reset_on_table_crash', 0, $this->a)) {
|
danielebarchiesi@4
|
427 $this->store->drop();
|
danielebarchiesi@4
|
428 $this->store->setUp();
|
danielebarchiesi@4
|
429 }
|
danielebarchiesi@4
|
430 else {
|
danielebarchiesi@4
|
431 $er = $this->v('Msg_text', 'unknown error', $msg);
|
danielebarchiesi@4
|
432 $this->addError('Auto-repair failed on ' . rawurlencode($m[1]) . ': ' . $er);
|
danielebarchiesi@4
|
433 }
|
danielebarchiesi@4
|
434 //die("Fatal errors: \n" . print_r($this->getErrors(), 1));
|
danielebarchiesi@4
|
435 }
|
danielebarchiesi@4
|
436 }
|
danielebarchiesi@4
|
437 }
|
danielebarchiesi@4
|
438
|
danielebarchiesi@4
|
439 /* speed log */
|
danielebarchiesi@4
|
440
|
danielebarchiesi@4
|
441 function logInserts() {
|
danielebarchiesi@4
|
442 $t_start = $this->t_start;
|
danielebarchiesi@4
|
443 $t_prev = $this->t_prev;
|
danielebarchiesi@4
|
444 $t_now = ARC2::mtime();
|
danielebarchiesi@4
|
445 $tc_prev = $this->t_count_prev;
|
danielebarchiesi@4
|
446 $tc_now = $this->t_count;
|
danielebarchiesi@4
|
447 $tc_diff = $tc_now - $tc_prev;
|
danielebarchiesi@4
|
448
|
danielebarchiesi@4
|
449 $dur_full = $t_now - $t_start;
|
danielebarchiesi@4
|
450 $dur_diff = $t_now - $t_prev;
|
danielebarchiesi@4
|
451
|
danielebarchiesi@4
|
452 $speed_full = round($tc_now / $dur_full);
|
danielebarchiesi@4
|
453 $speed_now = round($tc_diff / $dur_diff);
|
danielebarchiesi@4
|
454
|
danielebarchiesi@4
|
455 $r = $tc_diff . ' in ' . round($dur_diff, 5) . ' = ' . $speed_now . ' t/s (' .$tc_now. ' in ' . round($dur_full, 5). ' = ' . $speed_full . ' t/s )';
|
danielebarchiesi@4
|
456 $fp = @fopen("arc_insert_log.txt", "a");
|
danielebarchiesi@4
|
457 @fwrite($fp, $r . "\r\n");
|
danielebarchiesi@4
|
458 @fclose($fp);
|
danielebarchiesi@4
|
459
|
danielebarchiesi@4
|
460 $this->t_prev = $t_now;
|
danielebarchiesi@4
|
461 $this->t_count_prev = $tc_now;
|
danielebarchiesi@4
|
462 }
|
danielebarchiesi@4
|
463
|
danielebarchiesi@4
|
464 }
|