Mercurial > hg > rr-repo
comparison sites/all/libraries/ARC2/arc/parsers/ARC2_TurtleParser.php @ 4:ce11bbd8f642
added modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Thu, 19 Sep 2013 10:38:44 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:b28be78d8160 | 4:ce11bbd8f642 |
---|---|
1 <?php | |
2 /** | |
3 * ARC2 SPARQL-enhanced Turtle Parser | |
4 * | |
5 * @author Benjamin Nowack | |
6 * @license <http://arc.semsol.org/license> | |
7 * @homepage <http://arc.semsol.org/> | |
8 * @package ARC2 | |
9 * @version 2010-11-16 | |
10 */ | |
11 | |
12 ARC2::inc('RDFParser'); | |
13 | |
14 class ARC2_TurtleParser extends ARC2_RDFParser { | |
15 | |
16 function __construct($a, &$caller) { | |
17 parent::__construct($a, $caller); | |
18 } | |
19 | |
20 function __init() {/* reader */ | |
21 parent::__init(); | |
22 $this->state = 0; | |
23 $this->xml = 'http://www.w3.org/XML/1998/namespace'; | |
24 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; | |
25 $this->xsd = 'http://www.w3.org/2001/XMLSchema#'; | |
26 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf', $this->xsd => 'xsd'); | |
27 $this->unparsed_code = ''; | |
28 $this->max_parsing_loops = $this->v('turtle_max_parsing_loops', 500, $this->a); | |
29 } | |
30 | |
31 /* */ | |
32 | |
33 function x($re, $v, $options = 'si') { | |
34 $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v); | |
35 while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {/* comment removal */ | |
36 $v = $m[2]; | |
37 } | |
38 return ARC2::x($re, $v, $options); | |
39 //$this->unparsed_code = ($sub_r && count($sub_r)) ? $sub_r[count($sub_r) - 1] : ''; | |
40 } | |
41 | |
42 function createBnodeID(){ | |
43 $this->bnode_id++; | |
44 return '_:' . $this->bnode_prefix . $this->bnode_id; | |
45 } | |
46 | |
47 /* */ | |
48 | |
49 function addT($t) { | |
50 if ($this->skip_dupes) { | |
51 $h = md5(serialize($t)); | |
52 if (!isset($this->added_triples[$h])) { | |
53 $this->triples[$this->t_count] = $t; | |
54 $this->t_count++; | |
55 $this->added_triples[$h] = true; | |
56 } | |
57 } | |
58 else { | |
59 $this->triples[$this->t_count] = $t; | |
60 $this->t_count++; | |
61 } | |
62 } | |
63 | |
64 /* */ | |
65 | |
66 function getTriples() { | |
67 return $this->v('triples', array()); | |
68 } | |
69 | |
70 function countTriples() { | |
71 return $this->t_count; | |
72 } | |
73 | |
74 /* */ | |
75 | |
76 function getUnparsedCode() { | |
77 return $this->v('unparsed_code', ''); | |
78 } | |
79 | |
80 /* */ | |
81 | |
82 function setDefaultPrefixes() { | |
83 $this->prefixes = array( | |
84 'rdf:' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', | |
85 'rdfs:' => 'http://www.w3.org/2000/01/rdf-schema#', | |
86 'owl:' => 'http://www.w3.org/2002/07/owl#', | |
87 'xsd:' => 'http://www.w3.org/2001/XMLSchema#', | |
88 ); | |
89 if ($ns = $this->v('ns', array(), $this->a)) { | |
90 foreach ($ns as $p => $u) $this->prefixes[$p . ':'] = $u; | |
91 } | |
92 } | |
93 | |
94 | |
95 function parse($path, $data = '', $iso_fallback = false) { | |
96 $this->setDefaultPrefixes(); | |
97 /* reader */ | |
98 if (!$this->v('reader')) { | |
99 ARC2::inc('Reader'); | |
100 $this->reader = new ARC2_Reader($this->a, $this); | |
101 } | |
102 $this->reader->setAcceptHeader('Accept: application/x-turtle; q=0.9, */*; q=0.1'); | |
103 $this->reader->activate($path, $data); | |
104 $this->base = $this->v1('base', $this->reader->base, $this->a); | |
105 $this->r = array('vars' => array()); | |
106 /* parse */ | |
107 $buffer = ''; | |
108 $more_triples = array(); | |
109 $sub_v = ''; | |
110 $sub_v2 = ''; | |
111 $loops = 0; | |
112 $prologue_done = 0; | |
113 while ($d = $this->reader->readStream(0, 8192)) { | |
114 $buffer .= $d; | |
115 $sub_v = $buffer; | |
116 do { | |
117 $proceed = 0; | |
118 if (!$prologue_done) { | |
119 $proceed = 1; | |
120 if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) { | |
121 $loops = 0; | |
122 $sub_v .= $this->reader->readStream(0, 128); | |
123 /* we might have missed the final DOT in the previous prologue loop */ | |
124 if ($sub_r = $this->x('\.', $sub_v)) $sub_v = $sub_r[1]; | |
125 if ($this->x("\@?(base|prefix)", $sub_v)) {/* more prologue to come, use outer loop */ | |
126 $proceed = 0; | |
127 } | |
128 } | |
129 else { | |
130 $prologue_done = 1; | |
131 } | |
132 } | |
133 if ($prologue_done && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v)) && is_array($sub_r)) { | |
134 $proceed = 1; | |
135 $loops = 0; | |
136 foreach ($sub_r as $t) { | |
137 $this->addT($t); | |
138 } | |
139 } | |
140 } while ($proceed); | |
141 $loops++; | |
142 $buffer = $sub_v; | |
143 if ($loops > $this->max_parsing_loops) {/* most probably a parser or code bug, might also be a huge object value, though */ | |
144 $this->addError('too many loops: ' . $loops . '. Could not parse "' . substr($buffer, 0, 200) . '..."'); | |
145 break; | |
146 } | |
147 } | |
148 foreach ($more_triples as $t) { | |
149 $this->addT($t); | |
150 } | |
151 $sub_v = count($more_triples) ? $sub_v2 : $sub_v; | |
152 $buffer = $sub_v; | |
153 $this->unparsed_code = $buffer; | |
154 $this->reader->closeStream(); | |
155 unset($this->reader); | |
156 /* remove trailing comments */ | |
157 while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) $this->unparsed_code = $m[2]; | |
158 if ($this->unparsed_code && !$this->getErrors()) { | |
159 $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30)); | |
160 if (trim($rest)) $this->addError('Could not parse "' . $rest . '"'); | |
161 } | |
162 return $this->done(); | |
163 } | |
164 | |
165 function xPrologue($v) { | |
166 $r = 0; | |
167 if (!$this->t_count) { | |
168 if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) { | |
169 $this->base = $sub_r; | |
170 $r = 1; | |
171 } | |
172 while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) { | |
173 $this->prefixes[$sub_r['prefix']] = $sub_r['uri']; | |
174 $r = 1; | |
175 } | |
176 } | |
177 return array($r, $v); | |
178 } | |
179 | |
180 /* 3 */ | |
181 | |
182 function xBaseDecl($v) { | |
183 if ($r = $this->x("\@?base\s+", $v)) { | |
184 if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) { | |
185 if ($sub_r = $this->x('\.', $sub_v)) { | |
186 $sub_v = $sub_r[1]; | |
187 } | |
188 return array($r, $sub_v); | |
189 } | |
190 } | |
191 return array(0, $v); | |
192 } | |
193 | |
194 /* 4 */ | |
195 | |
196 function xPrefixDecl($v) { | |
197 if ($r = $this->x("\@?prefix\s+", $v)) { | |
198 if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) { | |
199 $prefix = $r; | |
200 if((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) { | |
201 $uri = $this->calcURI($r, $this->base); | |
202 if ($sub_r = $this->x('\.', $sub_v)) { | |
203 $sub_v = $sub_r[1]; | |
204 } | |
205 return array(array('prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri), $sub_v); | |
206 } | |
207 } | |
208 } | |
209 return array(0, $v); | |
210 } | |
211 | |
212 /* 21.., 32.. */ | |
213 | |
214 function xTriplesBlock($v) { | |
215 $pre_r = array(); | |
216 $r = array(); | |
217 $state = 1; | |
218 $sub_v = $v; | |
219 $buffer = $sub_v; | |
220 do { | |
221 $proceed = 0; | |
222 if ($state == 1) {/* expecting subject */ | |
223 $t = array('type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''); | |
224 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
225 $t['s'] = $sub_r['value']; | |
226 $t['s_type'] = $sub_r['type']; | |
227 $state = 2; | |
228 $proceed = 1; | |
229 if ($sub_r = $this->x('(\}|\.)', $sub_v)) { | |
230 if ($t['s_type'] == 'placeholder') { | |
231 $state = 4; | |
232 } | |
233 else { | |
234 $this->addError('"' . $sub_r[1]. '" after subject found.'); | |
235 } | |
236 } | |
237 } | |
238 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) { | |
239 $t['s'] = $sub_r['id']; | |
240 $t['s_type'] = $sub_r['type']; | |
241 $pre_r = array_merge($pre_r, $sub_r['triples']); | |
242 $state = 2; | |
243 $proceed = 1; | |
244 if ($sub_r = $this->x('\.', $sub_v)) { | |
245 $this->addError('DOT after subject found.'); | |
246 } | |
247 } | |
248 elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) { | |
249 $t['s'] = $sub_r['id']; | |
250 $t['s_type'] = $sub_r['type']; | |
251 $pre_r = array_merge($pre_r, $sub_r['triples']); | |
252 $state = 2; | |
253 $proceed = 1; | |
254 } | |
255 elseif ($sub_r = $this->x('\.', $sub_v)) { | |
256 $this->addError('Subject expected, DOT found.' . $sub_v); | |
257 } | |
258 } | |
259 if ($state == 2) {/* expecting predicate */ | |
260 if ($sub_r = $this->x('a\s+', $sub_v)) { | |
261 $sub_v = $sub_r[1]; | |
262 $t['p'] = $this->rdf . 'type'; | |
263 $t['p_type'] = 'uri'; | |
264 $state = 3; | |
265 $proceed = 1; | |
266 } | |
267 elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
268 if ($sub_r['type'] == 'bnode') { | |
269 $this->addError('Blank node used as triple predicate'); | |
270 } | |
271 $t['p'] = $sub_r['value']; | |
272 $t['p_type'] = $sub_r['type']; | |
273 $state = 3; | |
274 $proceed = 1; | |
275 } | |
276 elseif ($sub_r = $this->x('\.', $sub_v)) { | |
277 $state = 4; | |
278 } | |
279 elseif ($sub_r = $this->x('\}', $sub_v)) { | |
280 $buffer = $sub_v; | |
281 $r = array_merge($r, $pre_r); | |
282 $pre_r = array(); | |
283 $proceed = 0; | |
284 } | |
285 } | |
286 if ($state == 3) {/* expecting object */ | |
287 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
288 $t['o'] = $sub_r['value']; | |
289 $t['o_type'] = $sub_r['type']; | |
290 $t['o_lang'] = $this->v('lang', '', $sub_r); | |
291 $t['o_datatype'] = $this->v('datatype', '', $sub_r); | |
292 $pre_r[] = $t; | |
293 $state = 4; | |
294 $proceed = 1; | |
295 } | |
296 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) { | |
297 $t['o'] = $sub_r['id']; | |
298 $t['o_type'] = $sub_r['type']; | |
299 $t['o_datatype'] = ''; | |
300 $pre_r = array_merge($pre_r, array($t), $sub_r['triples']); | |
301 $state = 4; | |
302 $proceed = 1; | |
303 } | |
304 elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) { | |
305 $t['o'] = $sub_r['id']; | |
306 $t['o_type'] = $sub_r['type']; | |
307 $t['o_datatype'] = ''; | |
308 $pre_r = array_merge($pre_r, array($t), $sub_r['triples']); | |
309 $state = 4; | |
310 $proceed = 1; | |
311 } | |
312 } | |
313 if ($state == 4) {/* expecting . or ; or , or } */ | |
314 if ($sub_r = $this->x('\.', $sub_v)) { | |
315 $sub_v = $sub_r[1]; | |
316 $buffer = $sub_v; | |
317 $r = array_merge($r, $pre_r); | |
318 $pre_r = array(); | |
319 $state = 1; | |
320 $proceed = 1; | |
321 } | |
322 elseif ($sub_r = $this->x('\;', $sub_v)) { | |
323 $sub_v = $sub_r[1]; | |
324 $state = 2; | |
325 $proceed = 1; | |
326 } | |
327 elseif ($sub_r = $this->x('\,', $sub_v)) { | |
328 $sub_v = $sub_r[1]; | |
329 $state = 3; | |
330 $proceed = 1; | |
331 if ($sub_r = $this->x('\}', $sub_v)) { | |
332 $this->addError('Object expected, } found.'); | |
333 } | |
334 } | |
335 if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) { | |
336 $buffer = $sub_v; | |
337 $r = array_merge($r, $pre_r); | |
338 $pre_r = array(); | |
339 $proceed = 0; | |
340 } | |
341 } | |
342 } while ($proceed); | |
343 return count($r) ? array($r, $buffer, $pre_r, $sub_v) : array(0, $buffer, $pre_r, $sub_v); | |
344 } | |
345 | |
346 /* 39.. */ | |
347 | |
348 function xBlankNodePropertyList($v) { | |
349 if ($sub_r = $this->x('\[', $v)) { | |
350 $sub_v = $sub_r[1]; | |
351 $s = $this->createBnodeID(); | |
352 $r = array('id' => $s, 'type' => 'bnode', 'triples' => array()); | |
353 $t = array('type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => ''); | |
354 $state = 2; | |
355 $closed = 0; | |
356 do { | |
357 $proceed = 0; | |
358 if ($state == 2) {/* expecting predicate */ | |
359 if ($sub_r = $this->x('a\s+', $sub_v)) { | |
360 $sub_v = $sub_r[1]; | |
361 $t['p'] = $this->rdf . 'type'; | |
362 $t['p_type'] = 'uri'; | |
363 $state = 3; | |
364 $proceed = 1; | |
365 } | |
366 elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
367 $t['p'] = $sub_r['value']; | |
368 $t['p_type'] = $sub_r['type']; | |
369 $state = 3; | |
370 $proceed = 1; | |
371 } | |
372 } | |
373 if ($state == 3) {/* expecting object */ | |
374 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
375 $t['o'] = $sub_r['value']; | |
376 $t['o_type'] = $sub_r['type']; | |
377 $t['o_lang'] = $this->v('lang', '', $sub_r); | |
378 $t['o_datatype'] = $this->v('datatype', '', $sub_r); | |
379 $r['triples'][] = $t; | |
380 $state = 4; | |
381 $proceed = 1; | |
382 } | |
383 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) { | |
384 $t['o'] = $sub_r['id']; | |
385 $t['o_type'] = $sub_r['type']; | |
386 $t['o_datatype'] = ''; | |
387 $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']); | |
388 $state = 4; | |
389 $proceed = 1; | |
390 } | |
391 elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) { | |
392 $t['o'] = $sub_r['id']; | |
393 $t['o_type'] = $sub_r['type']; | |
394 $t['o_datatype'] = ''; | |
395 $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']); | |
396 $state = 4; | |
397 $proceed = 1; | |
398 } | |
399 } | |
400 if ($state == 4) {/* expecting . or ; or , or ] */ | |
401 if ($sub_r = $this->x('\.', $sub_v)) { | |
402 $sub_v = $sub_r[1]; | |
403 $state = 1; | |
404 $proceed = 1; | |
405 } | |
406 if ($sub_r = $this->x('\;', $sub_v)) { | |
407 $sub_v = $sub_r[1]; | |
408 $state = 2; | |
409 $proceed = 1; | |
410 } | |
411 if ($sub_r = $this->x('\,', $sub_v)) { | |
412 $sub_v = $sub_r[1]; | |
413 $state = 3; | |
414 $proceed = 1; | |
415 } | |
416 if ($sub_r = $this->x('\]', $sub_v)) { | |
417 $sub_v = $sub_r[1]; | |
418 $proceed = 0; | |
419 $closed = 1; | |
420 } | |
421 } | |
422 } while ($proceed); | |
423 if ($closed) { | |
424 return array($r, $sub_v); | |
425 } | |
426 return array(0, $v); | |
427 } | |
428 return array(0, $v); | |
429 } | |
430 | |
431 /* 40.. */ | |
432 | |
433 function xCollection($v) { | |
434 if ($sub_r = $this->x('\(', $v)) { | |
435 $sub_v = $sub_r[1]; | |
436 $s = $this->createBnodeID(); | |
437 $r = array('id' => $s, 'type' => 'bnode', 'triples' => array()); | |
438 $closed = 0; | |
439 do { | |
440 $proceed = 0; | |
441 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) { | |
442 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['value'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => $this->v('lang', '', $sub_r), 'o_datatype' => $this->v('datatype', '', $sub_r)); | |
443 $proceed = 1; | |
444 } | |
445 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) { | |
446 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => ''); | |
447 $r['triples'] = array_merge($r['triples'], $sub_r['triples']); | |
448 $proceed = 1; | |
449 } | |
450 elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) { | |
451 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => ''); | |
452 $r['triples'] = array_merge($r['triples'], $sub_r['triples']); | |
453 $proceed = 1; | |
454 } | |
455 if ($proceed) { | |
456 if ($sub_r = $this->x('\)', $sub_v)) { | |
457 $sub_v = $sub_r[1]; | |
458 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $this->rdf . 'nil', 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'uri', 'o_lang' => '', 'o_datatype' => ''); | |
459 $closed = 1; | |
460 $proceed = 0; | |
461 } | |
462 else { | |
463 $next_s = $this->createBnodeID(); | |
464 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $next_s, 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'bnode', 'o_lang' => '', 'o_datatype' => ''); | |
465 $s = $next_s; | |
466 } | |
467 } | |
468 } while ($proceed); | |
469 if ($closed) { | |
470 return array($r, $sub_v); | |
471 } | |
472 } | |
473 return array (0, $v); | |
474 } | |
475 | |
476 /* 42 */ | |
477 | |
478 function xVarOrTerm($v) { | |
479 if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) { | |
480 return array($sub_r, $sub_v); | |
481 } | |
482 elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) { | |
483 return array($sub_r, $sub_v); | |
484 } | |
485 return array(0, $v); | |
486 } | |
487 | |
488 /* 44, 74.., 75.. */ | |
489 | |
490 function xVar($v) { | |
491 if ($r = $this->x('(\?|\$)([^\s]+)', $v)) { | |
492 if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) { | |
493 if (!in_array($sub_r, $this->r['vars'])) { | |
494 $this->r['vars'][] = $sub_r; | |
495 } | |
496 return array(array('value' => $sub_r, 'type' => 'var'), $sub_v . $r[3]); | |
497 } | |
498 } | |
499 return array(0, $v); | |
500 } | |
501 | |
502 /* 45 */ | |
503 | |
504 function xGraphTerm($v) { | |
505 foreach (array( | |
506 'IRIref' => 'uri', | |
507 'RDFLiteral' => 'literal', | |
508 'NumericLiteral' => 'literal', | |
509 'BooleanLiteral' => 'literal', | |
510 'BlankNode' => 'bnode', | |
511 'NIL' => 'uri', | |
512 'Placeholder' => 'placeholder' | |
513 ) as $term => $type) { | |
514 $m = 'x' . $term; | |
515 if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) { | |
516 if (!is_array($sub_r)) { | |
517 $sub_r = array('value' => $sub_r); | |
518 } | |
519 $sub_r['type'] = $this->v1('type', $type, $sub_r); | |
520 return array($sub_r, $sub_v); | |
521 } | |
522 } | |
523 return array(0, $v); | |
524 } | |
525 | |
526 /* 60 */ | |
527 | |
528 function xRDFLiteral($v) { | |
529 if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) { | |
530 $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']); | |
531 $r = $sub_r; | |
532 if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) { | |
533 $r['lang'] = $sub_r; | |
534 } | |
535 elseif (!$this->x('\s', $sub_v) && ($sub_r = $this->x('\^\^', $sub_v)) && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1])) && $sub_r[1]) { | |
536 $r['datatype'] = $sub_r; | |
537 } | |
538 return array($r, $sub_v); | |
539 } | |
540 return array(0, $v); | |
541 } | |
542 | |
543 /* 61.., 62.., 63.., 64.. */ | |
544 | |
545 function xNumericLiteral($v) { | |
546 $sub_r = $this->x('(\-|\+)?', $v); | |
547 $prefix = $sub_r[1]; | |
548 $sub_v = $sub_r[2]; | |
549 foreach (array('DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer') as $type => $xsd) { | |
550 $m = 'x' . $type; | |
551 if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && ($sub_r !== false)) { | |
552 $r = array('value' => $prefix . $sub_r, 'type' => 'literal', 'datatype' => $this->xsd . $xsd); | |
553 return array($r, $sub_v); | |
554 } | |
555 } | |
556 return array(0, $v); | |
557 } | |
558 | |
559 /* 65.. */ | |
560 | |
561 function xBooleanLiteral($v) { | |
562 if ($r = $this->x('(true|false)', $v)) { | |
563 return array($r[1], $r[2]); | |
564 } | |
565 return array(0, $v); | |
566 } | |
567 | |
568 /* 66.., 87.., 88.., 89.., 90.., 91.. */ | |
569 | |
570 function xString($v) {/* largely simplified, may need some tweaks in following revisions */ | |
571 $sub_v = $v; | |
572 if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) return array(0, $v); | |
573 $delim = $m[1]; | |
574 $rest = $m[2]; | |
575 $sub_types = array("'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2'); | |
576 $sub_type = $sub_types[$delim]; | |
577 $pos = 0; | |
578 $r = false; | |
579 do { | |
580 $proceed = 0; | |
581 $delim_pos = strpos($rest, $delim, $pos); | |
582 if ($delim_pos === false) break; | |
583 $new_rest = substr($rest, $delim_pos + strlen($delim)); | |
584 $r = substr($rest, 0, $delim_pos); | |
585 if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(strlen($m[1]) % 2)) { | |
586 $rest = $new_rest; | |
587 } | |
588 else { | |
589 $r = false; | |
590 $pos = $delim_pos + 1; | |
591 $proceed = 1; | |
592 } | |
593 } while ($proceed); | |
594 if ($r !== false) { | |
595 return array(array('value' => $this->toUTF8($r) , 'type' => 'literal', 'sub_type' => $sub_type), $rest); | |
596 } | |
597 return array(0, $v); | |
598 } | |
599 | |
600 /* 67 */ | |
601 | |
602 function xIRIref($v) { | |
603 if ((list($r, $v) = $this->xIRI_REF($v)) && $r) { | |
604 return array($this->calcURI($r, $this->base), $v); | |
605 } | |
606 elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) { | |
607 return array($r, $v); | |
608 } | |
609 return array(0, $v); | |
610 } | |
611 | |
612 /* 68 */ | |
613 | |
614 function xPrefixedName($v) { | |
615 if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) { | |
616 return array($r, $v); | |
617 } | |
618 elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) { | |
619 return isset($this->prefixes[$r]) ? array($this->prefixes[$r], $sub_v) : array(0, $v); | |
620 } | |
621 return array(0, $v); | |
622 } | |
623 | |
624 /* 69.., 73.., 93, 94.. */ | |
625 | |
626 function xBlankNode($v) { | |
627 if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) { | |
628 return array(array('type' => 'bnode', 'value' => '_:' . $r), $sub_v); | |
629 } | |
630 if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) { | |
631 return array(array('type' => 'bnode', 'value' => $this->createBnodeID()), $r[1]); | |
632 } | |
633 return array(0, $v); | |
634 } | |
635 | |
636 /* 70.. @@sync with SPARQLParser */ | |
637 | |
638 function xIRI_REF($v) { | |
639 //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) { | |
640 if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) { | |
641 return array($r[1], $r[2]); | |
642 } | |
643 elseif ($r = $this->x('\<\>', $v)) { | |
644 return array(true, $r[1]); | |
645 } | |
646 elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) { | |
647 return array($r[1] ? $r[1] : true, $r[2]); | |
648 } | |
649 return array(0, $v); | |
650 } | |
651 | |
652 /* 71 */ | |
653 | |
654 function xPNAME_NS($v) { | |
655 list($r, $sub_v) = $this->xPN_PREFIX($v); | |
656 $prefix = $r ? $r : ''; | |
657 return ($r = $this->x("\:", $sub_v)) ? array($prefix . ':', $r[1]) : array(0, $v); | |
658 } | |
659 | |
660 /* 72 */ | |
661 | |
662 function xPNAME_LN($v) { | |
663 if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) { | |
664 if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) { | |
665 if (!isset($this->prefixes[$r])) { | |
666 return array(0, $v); | |
667 } | |
668 return array($this->prefixes[$r] . $sub_r, $sub_v); | |
669 } | |
670 } | |
671 return array(0, $v); | |
672 } | |
673 | |
674 /* 76 */ | |
675 | |
676 function xLANGTAG($v) { | |
677 if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) { | |
678 return array($r[1], $r[3]); | |
679 } | |
680 return array(0, $v); | |
681 } | |
682 | |
683 /* 77.. */ | |
684 | |
685 function xINTEGER($v) { | |
686 if ($r = $this->x('([0-9]+)', $v)) { | |
687 return array($r[1], $r[2]); | |
688 } | |
689 return array(false, $v); | |
690 } | |
691 | |
692 /* 78.. */ | |
693 | |
694 function xDECIMAL($v) { | |
695 if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) { | |
696 return array($r[1], $r[2]); | |
697 } | |
698 if ($r = $this->x('(\.[0-9]+)', $v)) { | |
699 return array($r[1], $r[2]); | |
700 } | |
701 return array(false, $v); | |
702 } | |
703 | |
704 /* 79.., 86.. */ | |
705 | |
706 function xDOUBLE($v) { | |
707 if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) { | |
708 return array($r[1], $r[2]); | |
709 } | |
710 if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) { | |
711 return array($r[1], $r[2]); | |
712 } | |
713 if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) { | |
714 return array($r[1], $r[2]); | |
715 } | |
716 return array(false, $v); | |
717 } | |
718 | |
719 /* 92 */ | |
720 | |
721 function xNIL($v) { | |
722 if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) { | |
723 return array(array('type' => 'uri', 'value' => $this->rdf . 'nil'), $r[1]); | |
724 } | |
725 return array(0, $v); | |
726 } | |
727 | |
728 /* 95.. */ | |
729 | |
730 function xPN_CHARS_BASE($v) { | |
731 if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) { | |
732 return array($r[1], $r[2]); | |
733 } | |
734 return array(0, $v); | |
735 } | |
736 | |
737 /* 96 */ | |
738 | |
739 function xPN_CHARS_U($v) { | |
740 if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) { | |
741 return array($r, $sub_v); | |
742 } | |
743 elseif ($r = $this->x("(_)", $v)) { | |
744 return array($r[1], $r[2]); | |
745 } | |
746 return array(0, $v); | |
747 } | |
748 | |
749 /* 97.. */ | |
750 | |
751 function xVARNAME($v) { | |
752 $r = ''; | |
753 do { | |
754 $proceed = 0; | |
755 if ($sub_r = $this->x('([0-9]+)', $v)) { | |
756 $r .= $sub_r[1]; | |
757 $v = $sub_r[2]; | |
758 $proceed = 1; | |
759 } | |
760 elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) { | |
761 $r .= $sub_r; | |
762 $v = $sub_v; | |
763 $proceed = 1; | |
764 } | |
765 elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) { | |
766 $r .= $sub_r[1]; | |
767 $v = $sub_r[2]; | |
768 $proceed = 1; | |
769 } | |
770 } while ($proceed); | |
771 return array($r, $v); | |
772 } | |
773 | |
774 /* 98.. */ | |
775 | |
776 function xPN_CHARS($v) { | |
777 if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) { | |
778 return array($r, $sub_v); | |
779 } | |
780 elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) { | |
781 return array($r[1], $r[2]); | |
782 } | |
783 return array(false, $v); | |
784 } | |
785 | |
786 /* 99 */ | |
787 | |
788 function xPN_PREFIX($v) { | |
789 if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */ | |
790 return array($sub_r[1], $sub_r[2]);/* @@testing */ | |
791 } | |
792 if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) { | |
793 do { | |
794 $proceed = 0; | |
795 list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v); | |
796 if ($sub_r !== false) { | |
797 $r .= $sub_r; | |
798 $proceed = 1; | |
799 } | |
800 elseif ($sub_r = $this->x("\.", $sub_v)) { | |
801 $r .= '.'; | |
802 $sub_v = $sub_r[1]; | |
803 $proceed = 1; | |
804 } | |
805 } while ($proceed); | |
806 list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v); | |
807 $r .= $sub_r ? $sub_r : ''; | |
808 } | |
809 return array($r, $sub_v); | |
810 } | |
811 | |
812 /* 100 */ | |
813 | |
814 function xPN_LOCAL($v) { | |
815 if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */ | |
816 return array($sub_r[1], $sub_r[2]);/* @@testing */ | |
817 } | |
818 $r = ''; | |
819 $sub_v = $v; | |
820 do { | |
821 $proceed = 0; | |
822 if ($this->x('\s', $sub_v)) { | |
823 return array($r, $sub_v); | |
824 } | |
825 if ($sub_r = $this->x('([0-9])', $sub_v)) { | |
826 $r .= $sub_r[1]; | |
827 $sub_v = $sub_r[2]; | |
828 $proceed = 1; | |
829 } | |
830 elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) { | |
831 $r .= $sub_r; | |
832 $proceed = 1; | |
833 } | |
834 elseif ($r) { | |
835 if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) { | |
836 $r .= $sub_r[1]; | |
837 $sub_v = $sub_r[2]; | |
838 } | |
839 if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) { | |
840 $r .= $sub_r; | |
841 $proceed = 1; | |
842 } | |
843 } | |
844 } while ($proceed); | |
845 return array($r, $sub_v); | |
846 } | |
847 | |
848 /* */ | |
849 | |
850 function unescapeNtripleUTF($v) { | |
851 if (strpos($v, '\\') === false) return $v; | |
852 $mappings = array('t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'"); | |
853 foreach ($mappings as $in => $out) { | |
854 $v = preg_replace('/\x5c([' . $in . '])/', $out, $v); | |
855 } | |
856 if (strpos(strtolower($v), '\u') === false) return $v; | |
857 while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) { | |
858 $no = hexdec($m[2]); | |
859 if ($no < 128) $char = chr($no); | |
860 else if ($no < 2048) $char = chr(($no >> 6) + 192) . chr(($no & 63) + 128); | |
861 else if ($no < 65536) $char = chr(($no >> 12) + 224) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128); | |
862 else if ($no < 2097152) $char = chr(($no >> 18) + 240) . chr((($no >> 12) & 63) + 128) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128); | |
863 else $char= ''; | |
864 $v = str_replace('\\' . $m[1] . $m[2], $char, $v); | |
865 } | |
866 return $v; | |
867 } | |
868 | |
869 /* */ | |
870 | |
871 function xPlaceholder($v) { | |
872 //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) { | |
873 if ($r = $this->x('(\?|\$)', $v)) { | |
874 if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && strpos(trim($r[2]), $m[1]) === 0) { | |
875 $ph = substr($m[1], 1, -1); | |
876 $rest = substr(trim($r[2]), strlen($m[1])); | |
877 if (!isset($this->r['placeholders'])) $this->r['placeholders'] = array(); | |
878 if (!in_array($ph, $this->r['placeholders'])) $this->r['placeholders'][] = $ph; | |
879 return array(array('value' => $ph, 'type' => 'placeholder'), $rest); | |
880 } | |
881 } | |
882 return array(0, $v); | |
883 } | |
884 | |
885 /* */ | |
886 } |