comparison sites/all/libraries/ARC2/arc/parsers/ARC2_TurtleParser.php @ 4:ce11bbd8f642

added modules
author danieleb <danielebarchiesi@me.com>
date Thu, 19 Sep 2013 10:38:44 +0100
parents
children
comparison
equal deleted inserted replaced
3:b28be78d8160 4:ce11bbd8f642
1 <?php
2 /**
3 * ARC2 SPARQL-enhanced Turtle Parser
4 *
5 * @author Benjamin Nowack
6 * @license <http://arc.semsol.org/license>
7 * @homepage <http://arc.semsol.org/>
8 * @package ARC2
9 * @version 2010-11-16
10 */
11
12 ARC2::inc('RDFParser');
13
14 class ARC2_TurtleParser extends ARC2_RDFParser {
15
16 function __construct($a, &$caller) {
17 parent::__construct($a, $caller);
18 }
19
20 function __init() {/* reader */
21 parent::__init();
22 $this->state = 0;
23 $this->xml = 'http://www.w3.org/XML/1998/namespace';
24 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
25 $this->xsd = 'http://www.w3.org/2001/XMLSchema#';
26 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf', $this->xsd => 'xsd');
27 $this->unparsed_code = '';
28 $this->max_parsing_loops = $this->v('turtle_max_parsing_loops', 500, $this->a);
29 }
30
31 /* */
32
33 function x($re, $v, $options = 'si') {
34 $v = preg_replace('/^[\xA0\xC2]+/', ' ', $v);
35 while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $v, $m)) {/* comment removal */
36 $v = $m[2];
37 }
38 return ARC2::x($re, $v, $options);
39 //$this->unparsed_code = ($sub_r && count($sub_r)) ? $sub_r[count($sub_r) - 1] : '';
40 }
41
42 function createBnodeID(){
43 $this->bnode_id++;
44 return '_:' . $this->bnode_prefix . $this->bnode_id;
45 }
46
47 /* */
48
49 function addT($t) {
50 if ($this->skip_dupes) {
51 $h = md5(serialize($t));
52 if (!isset($this->added_triples[$h])) {
53 $this->triples[$this->t_count] = $t;
54 $this->t_count++;
55 $this->added_triples[$h] = true;
56 }
57 }
58 else {
59 $this->triples[$this->t_count] = $t;
60 $this->t_count++;
61 }
62 }
63
64 /* */
65
66 function getTriples() {
67 return $this->v('triples', array());
68 }
69
70 function countTriples() {
71 return $this->t_count;
72 }
73
74 /* */
75
76 function getUnparsedCode() {
77 return $this->v('unparsed_code', '');
78 }
79
80 /* */
81
82 function setDefaultPrefixes() {
83 $this->prefixes = array(
84 'rdf:' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
85 'rdfs:' => 'http://www.w3.org/2000/01/rdf-schema#',
86 'owl:' => 'http://www.w3.org/2002/07/owl#',
87 'xsd:' => 'http://www.w3.org/2001/XMLSchema#',
88 );
89 if ($ns = $this->v('ns', array(), $this->a)) {
90 foreach ($ns as $p => $u) $this->prefixes[$p . ':'] = $u;
91 }
92 }
93
94
95 function parse($path, $data = '', $iso_fallback = false) {
96 $this->setDefaultPrefixes();
97 /* reader */
98 if (!$this->v('reader')) {
99 ARC2::inc('Reader');
100 $this->reader = new ARC2_Reader($this->a, $this);
101 }
102 $this->reader->setAcceptHeader('Accept: application/x-turtle; q=0.9, */*; q=0.1');
103 $this->reader->activate($path, $data);
104 $this->base = $this->v1('base', $this->reader->base, $this->a);
105 $this->r = array('vars' => array());
106 /* parse */
107 $buffer = '';
108 $more_triples = array();
109 $sub_v = '';
110 $sub_v2 = '';
111 $loops = 0;
112 $prologue_done = 0;
113 while ($d = $this->reader->readStream(0, 8192)) {
114 $buffer .= $d;
115 $sub_v = $buffer;
116 do {
117 $proceed = 0;
118 if (!$prologue_done) {
119 $proceed = 1;
120 if ((list($sub_r, $sub_v) = $this->xPrologue($sub_v)) && $sub_r) {
121 $loops = 0;
122 $sub_v .= $this->reader->readStream(0, 128);
123 /* we might have missed the final DOT in the previous prologue loop */
124 if ($sub_r = $this->x('\.', $sub_v)) $sub_v = $sub_r[1];
125 if ($this->x("\@?(base|prefix)", $sub_v)) {/* more prologue to come, use outer loop */
126 $proceed = 0;
127 }
128 }
129 else {
130 $prologue_done = 1;
131 }
132 }
133 if ($prologue_done && (list($sub_r, $sub_v, $more_triples, $sub_v2) = $this->xTriplesBlock($sub_v)) && is_array($sub_r)) {
134 $proceed = 1;
135 $loops = 0;
136 foreach ($sub_r as $t) {
137 $this->addT($t);
138 }
139 }
140 } while ($proceed);
141 $loops++;
142 $buffer = $sub_v;
143 if ($loops > $this->max_parsing_loops) {/* most probably a parser or code bug, might also be a huge object value, though */
144 $this->addError('too many loops: ' . $loops . '. Could not parse "' . substr($buffer, 0, 200) . '..."');
145 break;
146 }
147 }
148 foreach ($more_triples as $t) {
149 $this->addT($t);
150 }
151 $sub_v = count($more_triples) ? $sub_v2 : $sub_v;
152 $buffer = $sub_v;
153 $this->unparsed_code = $buffer;
154 $this->reader->closeStream();
155 unset($this->reader);
156 /* remove trailing comments */
157 while (preg_match('/^\s*(\#[^\xd\xa]*)(.*)$/si', $this->unparsed_code, $m)) $this->unparsed_code = $m[2];
158 if ($this->unparsed_code && !$this->getErrors()) {
159 $rest = preg_replace('/[\x0a|\x0d]/i', ' ', substr($this->unparsed_code, 0, 30));
160 if (trim($rest)) $this->addError('Could not parse "' . $rest . '"');
161 }
162 return $this->done();
163 }
164
165 function xPrologue($v) {
166 $r = 0;
167 if (!$this->t_count) {
168 if ((list($sub_r, $v) = $this->xBaseDecl($v)) && $sub_r) {
169 $this->base = $sub_r;
170 $r = 1;
171 }
172 while ((list($sub_r, $v) = $this->xPrefixDecl($v)) && $sub_r) {
173 $this->prefixes[$sub_r['prefix']] = $sub_r['uri'];
174 $r = 1;
175 }
176 }
177 return array($r, $v);
178 }
179
180 /* 3 */
181
182 function xBaseDecl($v) {
183 if ($r = $this->x("\@?base\s+", $v)) {
184 if ((list($r, $sub_v) = $this->xIRI_REF($r[1])) && $r) {
185 if ($sub_r = $this->x('\.', $sub_v)) {
186 $sub_v = $sub_r[1];
187 }
188 return array($r, $sub_v);
189 }
190 }
191 return array(0, $v);
192 }
193
194 /* 4 */
195
196 function xPrefixDecl($v) {
197 if ($r = $this->x("\@?prefix\s+", $v)) {
198 if ((list($r, $sub_v) = $this->xPNAME_NS($r[1])) && $r) {
199 $prefix = $r;
200 if((list($r, $sub_v) = $this->xIRI_REF($sub_v)) && $r) {
201 $uri = $this->calcURI($r, $this->base);
202 if ($sub_r = $this->x('\.', $sub_v)) {
203 $sub_v = $sub_r[1];
204 }
205 return array(array('prefix' => $prefix, 'uri_ref' => $r, 'uri' => $uri), $sub_v);
206 }
207 }
208 }
209 return array(0, $v);
210 }
211
212 /* 21.., 32.. */
213
214 function xTriplesBlock($v) {
215 $pre_r = array();
216 $r = array();
217 $state = 1;
218 $sub_v = $v;
219 $buffer = $sub_v;
220 do {
221 $proceed = 0;
222 if ($state == 1) {/* expecting subject */
223 $t = array('type' => 'triple', 's' => '', 'p' => '', 'o' => '', 's_type' => '', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
224 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
225 $t['s'] = $sub_r['value'];
226 $t['s_type'] = $sub_r['type'];
227 $state = 2;
228 $proceed = 1;
229 if ($sub_r = $this->x('(\}|\.)', $sub_v)) {
230 if ($t['s_type'] == 'placeholder') {
231 $state = 4;
232 }
233 else {
234 $this->addError('"' . $sub_r[1]. '" after subject found.');
235 }
236 }
237 }
238 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
239 $t['s'] = $sub_r['id'];
240 $t['s_type'] = $sub_r['type'];
241 $pre_r = array_merge($pre_r, $sub_r['triples']);
242 $state = 2;
243 $proceed = 1;
244 if ($sub_r = $this->x('\.', $sub_v)) {
245 $this->addError('DOT after subject found.');
246 }
247 }
248 elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
249 $t['s'] = $sub_r['id'];
250 $t['s_type'] = $sub_r['type'];
251 $pre_r = array_merge($pre_r, $sub_r['triples']);
252 $state = 2;
253 $proceed = 1;
254 }
255 elseif ($sub_r = $this->x('\.', $sub_v)) {
256 $this->addError('Subject expected, DOT found.' . $sub_v);
257 }
258 }
259 if ($state == 2) {/* expecting predicate */
260 if ($sub_r = $this->x('a\s+', $sub_v)) {
261 $sub_v = $sub_r[1];
262 $t['p'] = $this->rdf . 'type';
263 $t['p_type'] = 'uri';
264 $state = 3;
265 $proceed = 1;
266 }
267 elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
268 if ($sub_r['type'] == 'bnode') {
269 $this->addError('Blank node used as triple predicate');
270 }
271 $t['p'] = $sub_r['value'];
272 $t['p_type'] = $sub_r['type'];
273 $state = 3;
274 $proceed = 1;
275 }
276 elseif ($sub_r = $this->x('\.', $sub_v)) {
277 $state = 4;
278 }
279 elseif ($sub_r = $this->x('\}', $sub_v)) {
280 $buffer = $sub_v;
281 $r = array_merge($r, $pre_r);
282 $pre_r = array();
283 $proceed = 0;
284 }
285 }
286 if ($state == 3) {/* expecting object */
287 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
288 $t['o'] = $sub_r['value'];
289 $t['o_type'] = $sub_r['type'];
290 $t['o_lang'] = $this->v('lang', '', $sub_r);
291 $t['o_datatype'] = $this->v('datatype', '', $sub_r);
292 $pre_r[] = $t;
293 $state = 4;
294 $proceed = 1;
295 }
296 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
297 $t['o'] = $sub_r['id'];
298 $t['o_type'] = $sub_r['type'];
299 $t['o_datatype'] = '';
300 $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
301 $state = 4;
302 $proceed = 1;
303 }
304 elseif ((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
305 $t['o'] = $sub_r['id'];
306 $t['o_type'] = $sub_r['type'];
307 $t['o_datatype'] = '';
308 $pre_r = array_merge($pre_r, array($t), $sub_r['triples']);
309 $state = 4;
310 $proceed = 1;
311 }
312 }
313 if ($state == 4) {/* expecting . or ; or , or } */
314 if ($sub_r = $this->x('\.', $sub_v)) {
315 $sub_v = $sub_r[1];
316 $buffer = $sub_v;
317 $r = array_merge($r, $pre_r);
318 $pre_r = array();
319 $state = 1;
320 $proceed = 1;
321 }
322 elseif ($sub_r = $this->x('\;', $sub_v)) {
323 $sub_v = $sub_r[1];
324 $state = 2;
325 $proceed = 1;
326 }
327 elseif ($sub_r = $this->x('\,', $sub_v)) {
328 $sub_v = $sub_r[1];
329 $state = 3;
330 $proceed = 1;
331 if ($sub_r = $this->x('\}', $sub_v)) {
332 $this->addError('Object expected, } found.');
333 }
334 }
335 if ($sub_r = $this->x('(\}|\{|OPTIONAL|FILTER|GRAPH)', $sub_v)) {
336 $buffer = $sub_v;
337 $r = array_merge($r, $pre_r);
338 $pre_r = array();
339 $proceed = 0;
340 }
341 }
342 } while ($proceed);
343 return count($r) ? array($r, $buffer, $pre_r, $sub_v) : array(0, $buffer, $pre_r, $sub_v);
344 }
345
346 /* 39.. */
347
348 function xBlankNodePropertyList($v) {
349 if ($sub_r = $this->x('\[', $v)) {
350 $sub_v = $sub_r[1];
351 $s = $this->createBnodeID();
352 $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
353 $t = array('type' => 'triple', 's' => $s, 'p' => '', 'o' => '', 's_type' => 'bnode', 'p_type' => '', 'o_type' => '', 'o_datatype' => '', 'o_lang' => '');
354 $state = 2;
355 $closed = 0;
356 do {
357 $proceed = 0;
358 if ($state == 2) {/* expecting predicate */
359 if ($sub_r = $this->x('a\s+', $sub_v)) {
360 $sub_v = $sub_r[1];
361 $t['p'] = $this->rdf . 'type';
362 $t['p_type'] = 'uri';
363 $state = 3;
364 $proceed = 1;
365 }
366 elseif ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
367 $t['p'] = $sub_r['value'];
368 $t['p_type'] = $sub_r['type'];
369 $state = 3;
370 $proceed = 1;
371 }
372 }
373 if ($state == 3) {/* expecting object */
374 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
375 $t['o'] = $sub_r['value'];
376 $t['o_type'] = $sub_r['type'];
377 $t['o_lang'] = $this->v('lang', '', $sub_r);
378 $t['o_datatype'] = $this->v('datatype', '', $sub_r);
379 $r['triples'][] = $t;
380 $state = 4;
381 $proceed = 1;
382 }
383 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
384 $t['o'] = $sub_r['id'];
385 $t['o_type'] = $sub_r['type'];
386 $t['o_datatype'] = '';
387 $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
388 $state = 4;
389 $proceed = 1;
390 }
391 elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
392 $t['o'] = $sub_r['id'];
393 $t['o_type'] = $sub_r['type'];
394 $t['o_datatype'] = '';
395 $r['triples'] = array_merge($r['triples'], array($t), $sub_r['triples']);
396 $state = 4;
397 $proceed = 1;
398 }
399 }
400 if ($state == 4) {/* expecting . or ; or , or ] */
401 if ($sub_r = $this->x('\.', $sub_v)) {
402 $sub_v = $sub_r[1];
403 $state = 1;
404 $proceed = 1;
405 }
406 if ($sub_r = $this->x('\;', $sub_v)) {
407 $sub_v = $sub_r[1];
408 $state = 2;
409 $proceed = 1;
410 }
411 if ($sub_r = $this->x('\,', $sub_v)) {
412 $sub_v = $sub_r[1];
413 $state = 3;
414 $proceed = 1;
415 }
416 if ($sub_r = $this->x('\]', $sub_v)) {
417 $sub_v = $sub_r[1];
418 $proceed = 0;
419 $closed = 1;
420 }
421 }
422 } while ($proceed);
423 if ($closed) {
424 return array($r, $sub_v);
425 }
426 return array(0, $v);
427 }
428 return array(0, $v);
429 }
430
431 /* 40.. */
432
433 function xCollection($v) {
434 if ($sub_r = $this->x('\(', $v)) {
435 $sub_v = $sub_r[1];
436 $s = $this->createBnodeID();
437 $r = array('id' => $s, 'type' => 'bnode', 'triples' => array());
438 $closed = 0;
439 do {
440 $proceed = 0;
441 if ((list($sub_r, $sub_v) = $this->xVarOrTerm($sub_v)) && $sub_r) {
442 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['value'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => $this->v('lang', '', $sub_r), 'o_datatype' => $this->v('datatype', '', $sub_r));
443 $proceed = 1;
444 }
445 elseif ((list($sub_r, $sub_v) = $this->xCollection($sub_v)) && $sub_r) {
446 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
447 $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
448 $proceed = 1;
449 }
450 elseif((list($sub_r, $sub_v) = $this->xBlankNodePropertyList($sub_v)) && $sub_r) {
451 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'first', 'o' => $sub_r['id'], 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => $sub_r['type'], 'o_lang' => '', 'o_datatype' => '');
452 $r['triples'] = array_merge($r['triples'], $sub_r['triples']);
453 $proceed = 1;
454 }
455 if ($proceed) {
456 if ($sub_r = $this->x('\)', $sub_v)) {
457 $sub_v = $sub_r[1];
458 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $this->rdf . 'nil', 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'uri', 'o_lang' => '', 'o_datatype' => '');
459 $closed = 1;
460 $proceed = 0;
461 }
462 else {
463 $next_s = $this->createBnodeID();
464 $r['triples'][] = array('type' => 'triple', 's' => $s, 'p' => $this->rdf . 'rest', 'o' => $next_s, 's_type' => 'bnode', 'p_type' => 'uri', 'o_type' => 'bnode', 'o_lang' => '', 'o_datatype' => '');
465 $s = $next_s;
466 }
467 }
468 } while ($proceed);
469 if ($closed) {
470 return array($r, $sub_v);
471 }
472 }
473 return array (0, $v);
474 }
475
476 /* 42 */
477
478 function xVarOrTerm($v) {
479 if ((list($sub_r, $sub_v) = $this->xVar($v)) && $sub_r) {
480 return array($sub_r, $sub_v);
481 }
482 elseif ((list($sub_r, $sub_v) = $this->xGraphTerm($v)) && $sub_r) {
483 return array($sub_r, $sub_v);
484 }
485 return array(0, $v);
486 }
487
488 /* 44, 74.., 75.. */
489
490 function xVar($v) {
491 if ($r = $this->x('(\?|\$)([^\s]+)', $v)) {
492 if ((list($sub_r, $sub_v) = $this->xVARNAME($r[2])) && $sub_r) {
493 if (!in_array($sub_r, $this->r['vars'])) {
494 $this->r['vars'][] = $sub_r;
495 }
496 return array(array('value' => $sub_r, 'type' => 'var'), $sub_v . $r[3]);
497 }
498 }
499 return array(0, $v);
500 }
501
502 /* 45 */
503
504 function xGraphTerm($v) {
505 foreach (array(
506 'IRIref' => 'uri',
507 'RDFLiteral' => 'literal',
508 'NumericLiteral' => 'literal',
509 'BooleanLiteral' => 'literal',
510 'BlankNode' => 'bnode',
511 'NIL' => 'uri',
512 'Placeholder' => 'placeholder'
513 ) as $term => $type) {
514 $m = 'x' . $term;
515 if ((list($sub_r, $sub_v) = $this->$m($v)) && $sub_r) {
516 if (!is_array($sub_r)) {
517 $sub_r = array('value' => $sub_r);
518 }
519 $sub_r['type'] = $this->v1('type', $type, $sub_r);
520 return array($sub_r, $sub_v);
521 }
522 }
523 return array(0, $v);
524 }
525
526 /* 60 */
527
528 function xRDFLiteral($v) {
529 if ((list($sub_r, $sub_v) = $this->xString($v)) && $sub_r) {
530 $sub_r['value'] = $this->unescapeNtripleUTF($sub_r['value']);
531 $r = $sub_r;
532 if ((list($sub_r, $sub_v) = $this->xLANGTAG($sub_v)) && $sub_r) {
533 $r['lang'] = $sub_r;
534 }
535 elseif (!$this->x('\s', $sub_v) && ($sub_r = $this->x('\^\^', $sub_v)) && (list($sub_r, $sub_v) = $this->xIRIref($sub_r[1])) && $sub_r[1]) {
536 $r['datatype'] = $sub_r;
537 }
538 return array($r, $sub_v);
539 }
540 return array(0, $v);
541 }
542
543 /* 61.., 62.., 63.., 64.. */
544
545 function xNumericLiteral($v) {
546 $sub_r = $this->x('(\-|\+)?', $v);
547 $prefix = $sub_r[1];
548 $sub_v = $sub_r[2];
549 foreach (array('DOUBLE' => 'double', 'DECIMAL' => 'decimal', 'INTEGER' => 'integer') as $type => $xsd) {
550 $m = 'x' . $type;
551 if ((list($sub_r, $sub_v) = $this->$m($sub_v)) && ($sub_r !== false)) {
552 $r = array('value' => $prefix . $sub_r, 'type' => 'literal', 'datatype' => $this->xsd . $xsd);
553 return array($r, $sub_v);
554 }
555 }
556 return array(0, $v);
557 }
558
559 /* 65.. */
560
561 function xBooleanLiteral($v) {
562 if ($r = $this->x('(true|false)', $v)) {
563 return array($r[1], $r[2]);
564 }
565 return array(0, $v);
566 }
567
568 /* 66.., 87.., 88.., 89.., 90.., 91.. */
569
570 function xString($v) {/* largely simplified, may need some tweaks in following revisions */
571 $sub_v = $v;
572 if (!preg_match('/^\s*([\']{3}|\'|[\"]{3}|\")(.*)$/s', $sub_v, $m)) return array(0, $v);
573 $delim = $m[1];
574 $rest = $m[2];
575 $sub_types = array("'''" => 'literal_long1', '"""' => 'literal_long2', "'" => 'literal1', '"' => 'literal2');
576 $sub_type = $sub_types[$delim];
577 $pos = 0;
578 $r = false;
579 do {
580 $proceed = 0;
581 $delim_pos = strpos($rest, $delim, $pos);
582 if ($delim_pos === false) break;
583 $new_rest = substr($rest, $delim_pos + strlen($delim));
584 $r = substr($rest, 0, $delim_pos);
585 if (!preg_match('/([\x5c]+)$/s', $r, $m) || !(strlen($m[1]) % 2)) {
586 $rest = $new_rest;
587 }
588 else {
589 $r = false;
590 $pos = $delim_pos + 1;
591 $proceed = 1;
592 }
593 } while ($proceed);
594 if ($r !== false) {
595 return array(array('value' => $this->toUTF8($r) , 'type' => 'literal', 'sub_type' => $sub_type), $rest);
596 }
597 return array(0, $v);
598 }
599
600 /* 67 */
601
602 function xIRIref($v) {
603 if ((list($r, $v) = $this->xIRI_REF($v)) && $r) {
604 return array($this->calcURI($r, $this->base), $v);
605 }
606 elseif ((list($r, $v) = $this->xPrefixedName($v)) && $r) {
607 return array($r, $v);
608 }
609 return array(0, $v);
610 }
611
612 /* 68 */
613
614 function xPrefixedName($v) {
615 if ((list($r, $v) = $this->xPNAME_LN($v)) && $r) {
616 return array($r, $v);
617 }
618 elseif ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
619 return isset($this->prefixes[$r]) ? array($this->prefixes[$r], $sub_v) : array(0, $v);
620 }
621 return array(0, $v);
622 }
623
624 /* 69.., 73.., 93, 94.. */
625
626 function xBlankNode($v) {
627 if (($r = $this->x('\_\:', $v)) && (list($r, $sub_v) = $this->xPN_LOCAL($r[1])) && $r) {
628 return array(array('type' => 'bnode', 'value' => '_:' . $r), $sub_v);
629 }
630 if ($r = $this->x('\[[\x20\x9\xd\xa]*\]', $v)) {
631 return array(array('type' => 'bnode', 'value' => $this->createBnodeID()), $r[1]);
632 }
633 return array(0, $v);
634 }
635
636 /* 70.. @@sync with SPARQLParser */
637
638 function xIRI_REF($v) {
639 //if ($r = $this->x('\<([^\<\>\"\{\}\|\^\'[:space:]]*)\>', $v)) {
640 if (($r = $this->x('\<(\$\{[^\>]*\})\>', $v)) && ($sub_r = $this->xPlaceholder($r[1]))) {
641 return array($r[1], $r[2]);
642 }
643 elseif ($r = $this->x('\<\>', $v)) {
644 return array(true, $r[1]);
645 }
646 elseif ($r = $this->x('\<([^\s][^\<\>]*)\>', $v)) {
647 return array($r[1] ? $r[1] : true, $r[2]);
648 }
649 return array(0, $v);
650 }
651
652 /* 71 */
653
654 function xPNAME_NS($v) {
655 list($r, $sub_v) = $this->xPN_PREFIX($v);
656 $prefix = $r ? $r : '';
657 return ($r = $this->x("\:", $sub_v)) ? array($prefix . ':', $r[1]) : array(0, $v);
658 }
659
660 /* 72 */
661
662 function xPNAME_LN($v) {
663 if ((list($r, $sub_v) = $this->xPNAME_NS($v)) && $r) {
664 if (!$this->x('\s', $sub_v) && (list($sub_r, $sub_v) = $this->xPN_LOCAL($sub_v)) && $sub_r) {
665 if (!isset($this->prefixes[$r])) {
666 return array(0, $v);
667 }
668 return array($this->prefixes[$r] . $sub_r, $sub_v);
669 }
670 }
671 return array(0, $v);
672 }
673
674 /* 76 */
675
676 function xLANGTAG($v) {
677 if (!$this->x('\s', $v) && ($r = $this->x('\@([a-z]+(\-[a-z0-9]+)*)', $v))) {
678 return array($r[1], $r[3]);
679 }
680 return array(0, $v);
681 }
682
683 /* 77.. */
684
685 function xINTEGER($v) {
686 if ($r = $this->x('([0-9]+)', $v)) {
687 return array($r[1], $r[2]);
688 }
689 return array(false, $v);
690 }
691
692 /* 78.. */
693
694 function xDECIMAL($v) {
695 if ($r = $this->x('([0-9]+\.[0-9]*)', $v)) {
696 return array($r[1], $r[2]);
697 }
698 if ($r = $this->x('(\.[0-9]+)', $v)) {
699 return array($r[1], $r[2]);
700 }
701 return array(false, $v);
702 }
703
704 /* 79.., 86.. */
705
706 function xDOUBLE($v) {
707 if ($r = $this->x('([0-9]+\.[0-9]*E[\+\-]?[0-9]+)', $v)) {
708 return array($r[1], $r[2]);
709 }
710 if ($r = $this->x('(\.[0-9]+E[\+\-]?[0-9]+)', $v)) {
711 return array($r[1], $r[2]);
712 }
713 if ($r = $this->x('([0-9]+E[\+\-]?[0-9]+)', $v)) {
714 return array($r[1], $r[2]);
715 }
716 return array(false, $v);
717 }
718
719 /* 92 */
720
721 function xNIL($v) {
722 if ($r = $this->x('\([\x20\x9\xd\xa]*\)', $v)) {
723 return array(array('type' => 'uri', 'value' => $this->rdf . 'nil'), $r[1]);
724 }
725 return array(0, $v);
726 }
727
728 /* 95.. */
729
730 function xPN_CHARS_BASE($v) {
731 if ($r = $this->x("([a-z]+|\\\u[0-9a-f]{1,4})", $v)) {
732 return array($r[1], $r[2]);
733 }
734 return array(0, $v);
735 }
736
737 /* 96 */
738
739 function xPN_CHARS_U($v) {
740 if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
741 return array($r, $sub_v);
742 }
743 elseif ($r = $this->x("(_)", $v)) {
744 return array($r[1], $r[2]);
745 }
746 return array(0, $v);
747 }
748
749 /* 97.. */
750
751 function xVARNAME($v) {
752 $r = '';
753 do {
754 $proceed = 0;
755 if ($sub_r = $this->x('([0-9]+)', $v)) {
756 $r .= $sub_r[1];
757 $v = $sub_r[2];
758 $proceed = 1;
759 }
760 elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($v)) && $sub_r) {
761 $r .= $sub_r;
762 $v = $sub_v;
763 $proceed = 1;
764 }
765 elseif ($r && ($sub_r = $this->x('([\xb7\x300-\x36f]+)', $v))) {
766 $r .= $sub_r[1];
767 $v = $sub_r[2];
768 $proceed = 1;
769 }
770 } while ($proceed);
771 return array($r, $v);
772 }
773
774 /* 98.. */
775
776 function xPN_CHARS($v) {
777 if ((list($r, $sub_v) = $this->xPN_CHARS_U($v)) && $r) {
778 return array($r, $sub_v);
779 }
780 elseif ($r = $this->x('([\-0-9\xb7\x300-\x36f])', $v)) {
781 return array($r[1], $r[2]);
782 }
783 return array(false, $v);
784 }
785
786 /* 99 */
787
788 function xPN_PREFIX($v) {
789 if ($sub_r = $this->x("([^\s\:\(\)\{\}\;\,]+)", $v, 's')) {/* accelerator */
790 return array($sub_r[1], $sub_r[2]);/* @@testing */
791 }
792 if ((list($r, $sub_v) = $this->xPN_CHARS_BASE($v)) && $r) {
793 do {
794 $proceed = 0;
795 list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
796 if ($sub_r !== false) {
797 $r .= $sub_r;
798 $proceed = 1;
799 }
800 elseif ($sub_r = $this->x("\.", $sub_v)) {
801 $r .= '.';
802 $sub_v = $sub_r[1];
803 $proceed = 1;
804 }
805 } while ($proceed);
806 list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v);
807 $r .= $sub_r ? $sub_r : '';
808 }
809 return array($r, $sub_v);
810 }
811
812 /* 100 */
813
814 function xPN_LOCAL($v) {
815 if (($sub_r = $this->x("([^\s\(\)\{\}\[\]\;\,\.]+)", $v, 's')) && !preg_match('/^\./', $sub_r[2])) {/* accelerator */
816 return array($sub_r[1], $sub_r[2]);/* @@testing */
817 }
818 $r = '';
819 $sub_v = $v;
820 do {
821 $proceed = 0;
822 if ($this->x('\s', $sub_v)) {
823 return array($r, $sub_v);
824 }
825 if ($sub_r = $this->x('([0-9])', $sub_v)) {
826 $r .= $sub_r[1];
827 $sub_v = $sub_r[2];
828 $proceed = 1;
829 }
830 elseif ((list($sub_r, $sub_v) = $this->xPN_CHARS_U($sub_v)) && $sub_r) {
831 $r .= $sub_r;
832 $proceed = 1;
833 }
834 elseif ($r) {
835 if (($sub_r = $this->x('(\.)', $sub_v)) && !preg_match('/^[\s\}]/s', $sub_r[2])) {
836 $r .= $sub_r[1];
837 $sub_v = $sub_r[2];
838 }
839 if ((list($sub_r, $sub_v) = $this->xPN_CHARS($sub_v)) && $sub_r) {
840 $r .= $sub_r;
841 $proceed = 1;
842 }
843 }
844 } while ($proceed);
845 return array($r, $sub_v);
846 }
847
848 /* */
849
850 function unescapeNtripleUTF($v) {
851 if (strpos($v, '\\') === false) return $v;
852 $mappings = array('t' => "\t", 'n' => "\n", 'r' => "\r", '\"' => '"', '\'' => "'");
853 foreach ($mappings as $in => $out) {
854 $v = preg_replace('/\x5c([' . $in . '])/', $out, $v);
855 }
856 if (strpos(strtolower($v), '\u') === false) return $v;
857 while (preg_match('/\\\(U)([0-9A-F]{8})/', $v, $m) || preg_match('/\\\(u)([0-9A-F]{4})/', $v, $m)) {
858 $no = hexdec($m[2]);
859 if ($no < 128) $char = chr($no);
860 else if ($no < 2048) $char = chr(($no >> 6) + 192) . chr(($no & 63) + 128);
861 else if ($no < 65536) $char = chr(($no >> 12) + 224) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
862 else if ($no < 2097152) $char = chr(($no >> 18) + 240) . chr((($no >> 12) & 63) + 128) . chr((($no >> 6) & 63) + 128) . chr(($no & 63) + 128);
863 else $char= '';
864 $v = str_replace('\\' . $m[1] . $m[2], $char, $v);
865 }
866 return $v;
867 }
868
869 /* */
870
871 function xPlaceholder($v) {
872 //if ($r = $this->x('(\?|\$)\{([^\}]+)\}', $v)) {
873 if ($r = $this->x('(\?|\$)', $v)) {
874 if (preg_match('/(\{(?:[^{}]+|(?R))*\})/', $r[2], $m) && strpos(trim($r[2]), $m[1]) === 0) {
875 $ph = substr($m[1], 1, -1);
876 $rest = substr(trim($r[2]), strlen($m[1]));
877 if (!isset($this->r['placeholders'])) $this->r['placeholders'] = array();
878 if (!in_array($ph, $this->r['placeholders'])) $this->r['placeholders'][] = $ph;
879 return array(array('value' => $ph, 'type' => 'placeholder'), $rest);
880 }
881 }
882 return array(0, $v);
883 }
884
885 /* */
886 }