danielebarchiesi@4
|
1 <?php
|
danielebarchiesi@4
|
2 /**
|
danielebarchiesi@4
|
3 * ARC2 RDF/XML Parser
|
danielebarchiesi@4
|
4 *
|
danielebarchiesi@4
|
5 * @author Benjamin Nowack <bnowack@semsol.com>
|
danielebarchiesi@4
|
6 * @license http://arc.semsol.org/license
|
danielebarchiesi@4
|
7 * @homepage <http://arc.semsol.org/>
|
danielebarchiesi@4
|
8 * @package ARC2
|
danielebarchiesi@4
|
9 */
|
danielebarchiesi@4
|
10
|
danielebarchiesi@4
|
11 ARC2::inc('RDFParser');
|
danielebarchiesi@4
|
12
|
danielebarchiesi@4
|
13 class ARC2_RDFXMLParser extends ARC2_RDFParser {
|
danielebarchiesi@4
|
14
|
danielebarchiesi@4
|
15 function __construct($a, &$caller) {
|
danielebarchiesi@4
|
16 parent::__construct($a, $caller);
|
danielebarchiesi@4
|
17 }
|
danielebarchiesi@4
|
18
|
danielebarchiesi@4
|
19 function __init() {/* reader */
|
danielebarchiesi@4
|
20 parent::__init();
|
danielebarchiesi@4
|
21 $this->encoding = $this->v('encoding', false, $this->a);
|
danielebarchiesi@4
|
22 $this->state = 0;
|
danielebarchiesi@4
|
23 $this->x_lang = '';
|
danielebarchiesi@4
|
24 $this->x_base = $this->base;
|
danielebarchiesi@4
|
25 $this->xml = 'http://www.w3.org/XML/1998/namespace';
|
danielebarchiesi@4
|
26 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
|
danielebarchiesi@4
|
27 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
|
danielebarchiesi@4
|
28 $this->s_stack = array();
|
danielebarchiesi@4
|
29 $this->s_count = 0;
|
danielebarchiesi@4
|
30 $this->target_encoding = '';
|
danielebarchiesi@4
|
31 }
|
danielebarchiesi@4
|
32
|
danielebarchiesi@4
|
33 /* */
|
danielebarchiesi@4
|
34
|
danielebarchiesi@4
|
35 function parse($path, $data = '', $iso_fallback = false) {
|
danielebarchiesi@4
|
36 /* reader */
|
danielebarchiesi@4
|
37 if (!$this->v('reader')) {
|
danielebarchiesi@4
|
38 ARC2::inc('Reader');
|
danielebarchiesi@4
|
39 $this->reader = new ARC2_Reader($this->a, $this);
|
danielebarchiesi@4
|
40 }
|
danielebarchiesi@4
|
41 $this->reader->setAcceptHeader('Accept: application/rdf+xml; q=0.9, */*; q=0.1');
|
danielebarchiesi@4
|
42 $this->reader->activate($path, $data);
|
danielebarchiesi@4
|
43 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
|
danielebarchiesi@4
|
44 /* xml parser */
|
danielebarchiesi@4
|
45 $this->initXMLParser();
|
danielebarchiesi@4
|
46 /* parse */
|
danielebarchiesi@4
|
47 $first = true;
|
danielebarchiesi@4
|
48 while ($d = $this->reader->readStream()) {
|
danielebarchiesi@4
|
49 if (!$this->keep_time_limit) @set_time_limit($this->v('time_limit', 60, $this->a));
|
danielebarchiesi@4
|
50 if ($iso_fallback && $first) {
|
danielebarchiesi@4
|
51 $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
|
danielebarchiesi@4
|
52 $first = false;
|
danielebarchiesi@4
|
53 }
|
danielebarchiesi@4
|
54 if (!xml_parse($this->xml_parser, $d, false)) {
|
danielebarchiesi@4
|
55 $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
|
danielebarchiesi@4
|
56 $line = xml_get_current_line_number($this->xml_parser);
|
danielebarchiesi@4
|
57 $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')';
|
danielebarchiesi@4
|
58 if (!$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
|
danielebarchiesi@4
|
59 xml_parser_free($this->xml_parser);
|
danielebarchiesi@4
|
60 unset($this->xml_parser);
|
danielebarchiesi@4
|
61 $this->reader->closeStream();
|
danielebarchiesi@4
|
62 $this->__init();
|
danielebarchiesi@4
|
63 $this->encoding = 'ISO-8859-1';
|
danielebarchiesi@4
|
64 unset($this->xml_parser);
|
danielebarchiesi@4
|
65 unset($this->reader);
|
danielebarchiesi@4
|
66 return $this->parse($path, $data, true);
|
danielebarchiesi@4
|
67 }
|
danielebarchiesi@4
|
68 else {
|
danielebarchiesi@4
|
69 return $this->addError($this->tmp_error);
|
danielebarchiesi@4
|
70 }
|
danielebarchiesi@4
|
71 }
|
danielebarchiesi@4
|
72 }
|
danielebarchiesi@4
|
73 $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
|
danielebarchiesi@4
|
74 xml_parser_free($this->xml_parser);
|
danielebarchiesi@4
|
75 $this->reader->closeStream();
|
danielebarchiesi@4
|
76 unset($this->reader);
|
danielebarchiesi@4
|
77 return $this->done();
|
danielebarchiesi@4
|
78 }
|
danielebarchiesi@4
|
79
|
danielebarchiesi@4
|
80 /* */
|
danielebarchiesi@4
|
81
|
danielebarchiesi@4
|
82 function initXMLParser() {
|
danielebarchiesi@4
|
83 if (!isset($this->xml_parser)) {
|
danielebarchiesi@4
|
84 $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
|
danielebarchiesi@4
|
85 $parser = xml_parser_create_ns($enc, '');
|
danielebarchiesi@4
|
86 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
|
danielebarchiesi@4
|
87 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
|
danielebarchiesi@4
|
88 xml_set_element_handler($parser, 'open', 'close');
|
danielebarchiesi@4
|
89 xml_set_character_data_handler($parser, 'cdata');
|
danielebarchiesi@4
|
90 xml_set_start_namespace_decl_handler($parser, 'nsDecl');
|
danielebarchiesi@4
|
91 xml_set_object($parser, $this);
|
danielebarchiesi@4
|
92 $this->xml_parser = $parser;
|
danielebarchiesi@4
|
93 }
|
danielebarchiesi@4
|
94 }
|
danielebarchiesi@4
|
95
|
danielebarchiesi@4
|
96 /* */
|
danielebarchiesi@4
|
97
|
danielebarchiesi@4
|
98 function getEncoding($src = 'config') {
|
danielebarchiesi@4
|
99 if ($src == 'parser') {
|
danielebarchiesi@4
|
100 return $this->target_encoding;
|
danielebarchiesi@4
|
101 }
|
danielebarchiesi@4
|
102 elseif (($src == 'config') && $this->encoding) {
|
danielebarchiesi@4
|
103 return $this->encoding;
|
danielebarchiesi@4
|
104 }
|
danielebarchiesi@4
|
105 return $this->reader->getEncoding();
|
danielebarchiesi@4
|
106 }
|
danielebarchiesi@4
|
107
|
danielebarchiesi@4
|
108 /* */
|
danielebarchiesi@4
|
109
|
danielebarchiesi@4
|
110 function getTriples() {
|
danielebarchiesi@4
|
111 return $this->v('triples', array());
|
danielebarchiesi@4
|
112 }
|
danielebarchiesi@4
|
113
|
danielebarchiesi@4
|
114 function countTriples() {
|
danielebarchiesi@4
|
115 return $this->t_count;
|
danielebarchiesi@4
|
116 }
|
danielebarchiesi@4
|
117
|
danielebarchiesi@4
|
118 /* */
|
danielebarchiesi@4
|
119
|
danielebarchiesi@4
|
120 function pushS(&$s) {
|
danielebarchiesi@4
|
121 $s['pos'] = $this->s_count;
|
danielebarchiesi@4
|
122 $this->s_stack[$this->s_count] = $s;
|
danielebarchiesi@4
|
123 $this->s_count++;
|
danielebarchiesi@4
|
124 }
|
danielebarchiesi@4
|
125
|
danielebarchiesi@4
|
126 function popS(){/* php 4.0.x-safe */
|
danielebarchiesi@4
|
127 $r = array();
|
danielebarchiesi@4
|
128 $this->s_count--;
|
danielebarchiesi@4
|
129 for ($i = 0, $i_max = $this->s_count; $i < $i_max; $i++) {
|
danielebarchiesi@4
|
130 $r[$i] = $this->s_stack[$i];
|
danielebarchiesi@4
|
131 }
|
danielebarchiesi@4
|
132 $this->s_stack = $r;
|
danielebarchiesi@4
|
133 }
|
danielebarchiesi@4
|
134
|
danielebarchiesi@4
|
135 function updateS($s) {
|
danielebarchiesi@4
|
136 $this->s_stack[$s['pos']] = $s;
|
danielebarchiesi@4
|
137 }
|
danielebarchiesi@4
|
138
|
danielebarchiesi@4
|
139 function getParentS() {
|
danielebarchiesi@4
|
140 return ($this->s_count && isset($this->s_stack[$this->s_count - 1])) ? $this->s_stack[$this->s_count - 1] : false;
|
danielebarchiesi@4
|
141 }
|
danielebarchiesi@4
|
142
|
danielebarchiesi@4
|
143 function getParentXBase() {
|
danielebarchiesi@4
|
144 if ($p = $this->getParentS()) {
|
danielebarchiesi@4
|
145 return isset($p['p_x_base']) && $p['p_x_base'] ? $p['p_x_base'] : (isset($p['x_base']) ? $p['x_base'] : '');
|
danielebarchiesi@4
|
146 }
|
danielebarchiesi@4
|
147 return $this->x_base;
|
danielebarchiesi@4
|
148 }
|
danielebarchiesi@4
|
149
|
danielebarchiesi@4
|
150 function getParentXLang() {
|
danielebarchiesi@4
|
151 if ($p = $this->getParentS()) {
|
danielebarchiesi@4
|
152 return isset($p['p_x_lang']) && $p['p_x_lang'] ? $p['p_x_lang'] : (isset($p['x_lang']) ? $p['x_lang'] : '');
|
danielebarchiesi@4
|
153 }
|
danielebarchiesi@4
|
154 return $this->x_lang;
|
danielebarchiesi@4
|
155 }
|
danielebarchiesi@4
|
156
|
danielebarchiesi@4
|
157 /* */
|
danielebarchiesi@4
|
158
|
danielebarchiesi@4
|
159 function addT($s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
|
danielebarchiesi@4
|
160 //echo "-----\nadding $s / $p / $o\n-----\n";
|
danielebarchiesi@4
|
161 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang);
|
danielebarchiesi@4
|
162 if ($this->skip_dupes) {
|
danielebarchiesi@4
|
163 $h = md5(serialize($t));
|
danielebarchiesi@4
|
164 if (!isset($this->added_triples[$h])) {
|
danielebarchiesi@4
|
165 $this->triples[$this->t_count] = $t;
|
danielebarchiesi@4
|
166 $this->t_count++;
|
danielebarchiesi@4
|
167 $this->added_triples[$h] = true;
|
danielebarchiesi@4
|
168 }
|
danielebarchiesi@4
|
169 }
|
danielebarchiesi@4
|
170 else {
|
danielebarchiesi@4
|
171 $this->triples[$this->t_count] = $t;
|
danielebarchiesi@4
|
172 $this->t_count++;
|
danielebarchiesi@4
|
173 }
|
danielebarchiesi@4
|
174 }
|
danielebarchiesi@4
|
175
|
danielebarchiesi@4
|
176 function reify($t, $s, $p, $o, $s_type, $o_type, $o_dt = '', $o_lang = '') {
|
danielebarchiesi@4
|
177 $this->addT($t, $this->rdf.'type', $this->rdf.'Statement', 'uri', 'uri');
|
danielebarchiesi@4
|
178 $this->addT($t, $this->rdf.'subject', $s, 'uri', $s_type);
|
danielebarchiesi@4
|
179 $this->addT($t, $this->rdf.'predicate', $p, 'uri', 'uri');
|
danielebarchiesi@4
|
180 $this->addT($t, $this->rdf.'object', $o, 'uri', $o_type, $o_dt, $o_lang);
|
danielebarchiesi@4
|
181 }
|
danielebarchiesi@4
|
182
|
danielebarchiesi@4
|
183 /* */
|
danielebarchiesi@4
|
184
|
danielebarchiesi@4
|
185 function open($p, $t, $a) {
|
danielebarchiesi@4
|
186 //echo "state is $this->state\n";
|
danielebarchiesi@4
|
187 //echo "opening $t\n";
|
danielebarchiesi@4
|
188 switch($this->state) {
|
danielebarchiesi@4
|
189 case 0: return $this->h0Open($t, $a);
|
danielebarchiesi@4
|
190 case 1: return $this->h1Open($t, $a);
|
danielebarchiesi@4
|
191 case 2: return $this->h2Open($t, $a);
|
danielebarchiesi@4
|
192 case 4: return $this->h4Open($t, $a);
|
danielebarchiesi@4
|
193 case 5: return $this->h5Open($t, $a);
|
danielebarchiesi@4
|
194 case 6: return $this->h6Open($t, $a);
|
danielebarchiesi@4
|
195 default: $this->addError('open() called at state ' . $this->state . ' in '.$t);
|
danielebarchiesi@4
|
196 }
|
danielebarchiesi@4
|
197 }
|
danielebarchiesi@4
|
198
|
danielebarchiesi@4
|
199 function close($p, $t) {
|
danielebarchiesi@4
|
200 //echo "state is $this->state\n";
|
danielebarchiesi@4
|
201 //echo "closing $t\n";
|
danielebarchiesi@4
|
202 switch($this->state){
|
danielebarchiesi@4
|
203 case 1: return $this->h1Close($t);
|
danielebarchiesi@4
|
204 case 2: return $this->h2Close($t);
|
danielebarchiesi@4
|
205 case 3: return $this->h3Close($t);
|
danielebarchiesi@4
|
206 case 4: return $this->h4Close($t);
|
danielebarchiesi@4
|
207 case 5: return $this->h5Close($t);
|
danielebarchiesi@4
|
208 case 6: return $this->h6Close($t);
|
danielebarchiesi@4
|
209 default: $this->addError('close() called at state ' . $this->state . ' in '.$t);
|
danielebarchiesi@4
|
210 }
|
danielebarchiesi@4
|
211 }
|
danielebarchiesi@4
|
212
|
danielebarchiesi@4
|
213 function cdata($p, $d) {
|
danielebarchiesi@4
|
214 //echo "state is $this->state\n";
|
danielebarchiesi@4
|
215 //echo "cdata\n";
|
danielebarchiesi@4
|
216 switch($this->state){
|
danielebarchiesi@4
|
217 case 4: return $this->h4Cdata($d);
|
danielebarchiesi@4
|
218 case 6: return $this->h6Cdata($d);
|
danielebarchiesi@4
|
219 default: return false;
|
danielebarchiesi@4
|
220 }
|
danielebarchiesi@4
|
221 }
|
danielebarchiesi@4
|
222
|
danielebarchiesi@4
|
223 function nsDecl($p, $prf, $uri) {
|
danielebarchiesi@4
|
224 $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;
|
danielebarchiesi@4
|
225 }
|
danielebarchiesi@4
|
226
|
danielebarchiesi@4
|
227 /* */
|
danielebarchiesi@4
|
228
|
danielebarchiesi@4
|
229 function h0Open($t, $a) {
|
danielebarchiesi@4
|
230 $this->x_lang = $this->v($this->xml.'lang', $this->x_lang, $a);
|
danielebarchiesi@4
|
231 $this->x_base = $this->calcURI($this->v($this->xml.'base', $this->x_base, $a));
|
danielebarchiesi@4
|
232 $this->state = 1;
|
danielebarchiesi@4
|
233 if ($t !== $this->rdf.'RDF') {
|
danielebarchiesi@4
|
234 $this->h1Open($t, $a);
|
danielebarchiesi@4
|
235 }
|
danielebarchiesi@4
|
236 }
|
danielebarchiesi@4
|
237
|
danielebarchiesi@4
|
238 /* */
|
danielebarchiesi@4
|
239
|
danielebarchiesi@4
|
240 function h1Open($t, $a) {
|
danielebarchiesi@4
|
241 $s = array(
|
danielebarchiesi@4
|
242 'x_base' => isset($a[$this->xml.'base']) ? $this->calcURI($a[$this->xml.'base']) : $this->getParentXBase(),
|
danielebarchiesi@4
|
243 'x_lang' => isset($a[$this->xml.'lang']) ? $a[$this->xml.'lang'] : $this->getParentXLang(),
|
danielebarchiesi@4
|
244 'li_count' => 0,
|
danielebarchiesi@4
|
245 );
|
danielebarchiesi@4
|
246 /* ID */
|
danielebarchiesi@4
|
247 if (isset($a[$this->rdf.'ID'])) {
|
danielebarchiesi@4
|
248 $s['type'] = 'uri';
|
danielebarchiesi@4
|
249 $s['value'] = $this->calcURI('#'.$a[$this->rdf.'ID'], $s['x_base']);
|
danielebarchiesi@4
|
250 }
|
danielebarchiesi@4
|
251 /* about */
|
danielebarchiesi@4
|
252 elseif (isset($a[$this->rdf.'about'])) {
|
danielebarchiesi@4
|
253 $s['type'] = 'uri';
|
danielebarchiesi@4
|
254 $s['value'] = $this->calcURI($a[$this->rdf.'about'], $s['x_base']);
|
danielebarchiesi@4
|
255 }
|
danielebarchiesi@4
|
256 /* bnode */
|
danielebarchiesi@4
|
257 else {
|
danielebarchiesi@4
|
258 $s['type'] = 'bnode';
|
danielebarchiesi@4
|
259 if (isset($a[$this->rdf.'nodeID'])) {
|
danielebarchiesi@4
|
260 $s['value'] = '_:'.$a[$this->rdf.'nodeID'];
|
danielebarchiesi@4
|
261 }
|
danielebarchiesi@4
|
262 else {
|
danielebarchiesi@4
|
263 $s['value'] = $this->createBnodeID();
|
danielebarchiesi@4
|
264 }
|
danielebarchiesi@4
|
265 }
|
danielebarchiesi@4
|
266 /* sub-node */
|
danielebarchiesi@4
|
267 if ($this->state === 4) {
|
danielebarchiesi@4
|
268 $sup_s = $this->getParentS();
|
danielebarchiesi@4
|
269 /* new collection */
|
danielebarchiesi@4
|
270 if (isset($sup_s['o_is_coll']) && $sup_s['o_is_coll']) {
|
danielebarchiesi@4
|
271 $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']);
|
danielebarchiesi@4
|
272 $this->addT($sup_s['value'], $sup_s['p'], $coll['value'], $sup_s['type'], $coll['type']);
|
danielebarchiesi@4
|
273 $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']);
|
danielebarchiesi@4
|
274 $this->pushS($coll);
|
danielebarchiesi@4
|
275 }
|
danielebarchiesi@4
|
276 /* new entry in existing coll */
|
danielebarchiesi@4
|
277 elseif (isset($sup_s['is_coll']) && $sup_s['is_coll']) {
|
danielebarchiesi@4
|
278 $coll = array('value' => $this->createBnodeID(), 'type' => 'bnode', 'is_coll' => true, 'x_base' => $s['x_base'], 'x_lang' => $s['x_lang']);
|
danielebarchiesi@4
|
279 $this->addT($sup_s['value'], $this->rdf . 'rest', $coll['value'], $sup_s['type'], $coll['type']);
|
danielebarchiesi@4
|
280 $this->addT($coll['value'], $this->rdf . 'first', $s['value'], $coll['type'], $s['type']);
|
danielebarchiesi@4
|
281 $this->pushS($coll);
|
danielebarchiesi@4
|
282 }
|
danielebarchiesi@4
|
283 /* normal sub-node */
|
danielebarchiesi@4
|
284 elseif(isset($sup_s['p']) && $sup_s['p']) {
|
danielebarchiesi@4
|
285 $this->addT($sup_s['value'], $sup_s['p'], $s['value'], $sup_s['type'], $s['type']);
|
danielebarchiesi@4
|
286 }
|
danielebarchiesi@4
|
287 }
|
danielebarchiesi@4
|
288 /* typed node */
|
danielebarchiesi@4
|
289 if ($t !== $this->rdf.'Description') {
|
danielebarchiesi@4
|
290 $this->addT($s['value'], $this->rdf.'type', $t, $s['type'], 'uri');
|
danielebarchiesi@4
|
291 }
|
danielebarchiesi@4
|
292 /* (additional) typing attr */
|
danielebarchiesi@4
|
293 if (isset($a[$this->rdf.'type'])) {
|
danielebarchiesi@4
|
294 $this->addT($s['value'], $this->rdf.'type', $a[$this->rdf.'type'], $s['type'], 'uri');
|
danielebarchiesi@4
|
295 }
|
danielebarchiesi@4
|
296 /* Seq|Bag|Alt */
|
danielebarchiesi@4
|
297 if (in_array($t, array($this->rdf.'Seq', $this->rdf.'Bag', $this->rdf.'Alt'))) {
|
danielebarchiesi@4
|
298 $s['is_con'] = true;
|
danielebarchiesi@4
|
299 }
|
danielebarchiesi@4
|
300 /* any other attrs (skip rdf and xml, except rdf:_, rdf:value, rdf:Seq) */
|
danielebarchiesi@4
|
301 foreach($a as $k => $v) {
|
danielebarchiesi@4
|
302 if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value|Seq|Bag|Alt|Statement|Property|List)$/', $k)) {
|
danielebarchiesi@4
|
303 if (strpos($k, ':')) {
|
danielebarchiesi@4
|
304 $this->addT($s['value'], $k, $v, $s['type'], 'literal', '', $s['x_lang']);
|
danielebarchiesi@4
|
305 }
|
danielebarchiesi@4
|
306 }
|
danielebarchiesi@4
|
307 }
|
danielebarchiesi@4
|
308 $this->pushS($s);
|
danielebarchiesi@4
|
309 $this->state = 2;
|
danielebarchiesi@4
|
310 }
|
danielebarchiesi@4
|
311
|
danielebarchiesi@4
|
312 /* */
|
danielebarchiesi@4
|
313
|
danielebarchiesi@4
|
314 function h2Open($t, $a) {
|
danielebarchiesi@4
|
315 $s = $this->getParentS();
|
danielebarchiesi@4
|
316 foreach (array('p_x_base', 'p_x_lang', 'p_id', 'o_is_coll') as $k) {
|
danielebarchiesi@4
|
317 unset($s[$k]);
|
danielebarchiesi@4
|
318 }
|
danielebarchiesi@4
|
319 /* base */
|
danielebarchiesi@4
|
320 if (isset($a[$this->xml.'base'])) {
|
danielebarchiesi@4
|
321 $s['p_x_base'] = $this->calcURI($a[$this->xml.'base'], $s['x_base']);
|
danielebarchiesi@4
|
322 }
|
danielebarchiesi@4
|
323 $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : $s['x_base'];
|
danielebarchiesi@4
|
324 /* lang */
|
danielebarchiesi@4
|
325 if (isset($a[$this->xml.'lang'])) {
|
danielebarchiesi@4
|
326 $s['p_x_lang'] = $a[$this->xml.'lang'];
|
danielebarchiesi@4
|
327 }
|
danielebarchiesi@4
|
328 $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : $s['x_lang'];
|
danielebarchiesi@4
|
329 /* adjust li */
|
danielebarchiesi@4
|
330 if ($t === $this->rdf.'li') {
|
danielebarchiesi@4
|
331 $s['li_count']++;
|
danielebarchiesi@4
|
332 $t = $this->rdf.'_'.$s['li_count'];
|
danielebarchiesi@4
|
333 }
|
danielebarchiesi@4
|
334 /* set p */
|
danielebarchiesi@4
|
335 $s['p'] = $t;
|
danielebarchiesi@4
|
336 /* reification */
|
danielebarchiesi@4
|
337 if (isset($a[$this->rdf.'ID'])) {
|
danielebarchiesi@4
|
338 $s['p_id'] = $a[$this->rdf.'ID'];
|
danielebarchiesi@4
|
339 }
|
danielebarchiesi@4
|
340 $o = array('value' => '', 'type' => '', 'x_base' => $b, 'x_lang' => $l);
|
danielebarchiesi@4
|
341 /* resource/rdf:resource */
|
danielebarchiesi@4
|
342 if (isset($a['resource'])) {
|
danielebarchiesi@4
|
343 $a[$this->rdf . 'resource'] = $a['resource'];
|
danielebarchiesi@4
|
344 unset($a['resource']);
|
danielebarchiesi@4
|
345 }
|
danielebarchiesi@4
|
346 if (isset($a[$this->rdf.'resource'])) {
|
danielebarchiesi@4
|
347 $o['value'] = $this->calcURI($a[$this->rdf.'resource'], $b);
|
danielebarchiesi@4
|
348 $o['type'] = 'uri';
|
danielebarchiesi@4
|
349 $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
350 /* type */
|
danielebarchiesi@4
|
351 if (isset($a[$this->rdf.'type'])) {
|
danielebarchiesi@4
|
352 $this->addT($o['value'], $this->rdf.'type', $a[$this->rdf.'type'], 'uri', 'uri');
|
danielebarchiesi@4
|
353 }
|
danielebarchiesi@4
|
354 /* reification */
|
danielebarchiesi@4
|
355 if (isset($s['p_id'])) {
|
danielebarchiesi@4
|
356 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
357 unset($s['p_id']);
|
danielebarchiesi@4
|
358 }
|
danielebarchiesi@4
|
359 $this->state = 3;
|
danielebarchiesi@4
|
360 }
|
danielebarchiesi@4
|
361 /* named bnode */
|
danielebarchiesi@4
|
362 elseif (isset($a[$this->rdf.'nodeID'])) {
|
danielebarchiesi@4
|
363 $o['value'] = '_:' . $a[$this->rdf.'nodeID'];
|
danielebarchiesi@4
|
364 $o['type'] = 'bnode';
|
danielebarchiesi@4
|
365 $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
366 $this->state = 3;
|
danielebarchiesi@4
|
367 /* reification */
|
danielebarchiesi@4
|
368 if (isset($s['p_id'])) {
|
danielebarchiesi@4
|
369 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
370 }
|
danielebarchiesi@4
|
371 }
|
danielebarchiesi@4
|
372 /* parseType */
|
danielebarchiesi@4
|
373 elseif (isset($a[$this->rdf.'parseType'])) {
|
danielebarchiesi@4
|
374 if ($a[$this->rdf.'parseType'] === 'Literal') {
|
danielebarchiesi@4
|
375 $s['o_xml_level'] = 0;
|
danielebarchiesi@4
|
376 $s['o_xml_data'] = '';
|
danielebarchiesi@4
|
377 $s['p_xml_literal_level'] = 0;
|
danielebarchiesi@4
|
378 $s['ns'] = array();
|
danielebarchiesi@4
|
379 $this->state = 6;
|
danielebarchiesi@4
|
380 }
|
danielebarchiesi@4
|
381 elseif ($a[$this->rdf.'parseType'] === 'Resource') {
|
danielebarchiesi@4
|
382 $o['value'] = $this->createBnodeID();
|
danielebarchiesi@4
|
383 $o['type'] = 'bnode';
|
danielebarchiesi@4
|
384 $o['has_closing_tag'] = 0;
|
danielebarchiesi@4
|
385 $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
386 $this->pushS($o);
|
danielebarchiesi@4
|
387 /* reification */
|
danielebarchiesi@4
|
388 if (isset($s['p_id'])) {
|
danielebarchiesi@4
|
389 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
390 unset($s['p_id']);
|
danielebarchiesi@4
|
391 }
|
danielebarchiesi@4
|
392 $this->state = 2;
|
danielebarchiesi@4
|
393 }
|
danielebarchiesi@4
|
394 elseif ($a[$this->rdf.'parseType'] === 'Collection') {
|
danielebarchiesi@4
|
395 $s['o_is_coll'] = true;
|
danielebarchiesi@4
|
396 $this->state = 4;
|
danielebarchiesi@4
|
397 }
|
danielebarchiesi@4
|
398 }
|
danielebarchiesi@4
|
399 /* sub-node or literal */
|
danielebarchiesi@4
|
400 else {
|
danielebarchiesi@4
|
401 $s['o_cdata'] = '';
|
danielebarchiesi@4
|
402 if (isset($a[$this->rdf.'datatype'])) {
|
danielebarchiesi@4
|
403 $s['o_datatype'] = $a[$this->rdf.'datatype'];
|
danielebarchiesi@4
|
404 }
|
danielebarchiesi@4
|
405 $this->state = 4;
|
danielebarchiesi@4
|
406 }
|
danielebarchiesi@4
|
407 /* any other attrs (skip rdf and xml) */
|
danielebarchiesi@4
|
408 foreach($a as $k => $v) {
|
danielebarchiesi@4
|
409 if (((strpos($k, $this->xml) === false) && (strpos($k, $this->rdf) === false)) || preg_match('/(\_[0-9]+|value)$/', $k)) {
|
danielebarchiesi@4
|
410 if (strpos($k, ':')) {
|
danielebarchiesi@4
|
411 if (!$o['value']) {
|
danielebarchiesi@4
|
412 $o['value'] = $this->createBnodeID();
|
danielebarchiesi@4
|
413 $o['type'] = 'bnode';
|
danielebarchiesi@4
|
414 $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
415 }
|
danielebarchiesi@4
|
416 /* reification */
|
danielebarchiesi@4
|
417 if (isset($s['p_id'])) {
|
danielebarchiesi@4
|
418 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type']);
|
danielebarchiesi@4
|
419 unset($s['p_id']);
|
danielebarchiesi@4
|
420 }
|
danielebarchiesi@4
|
421 $this->addT($o['value'], $k, $v, $o['type'], 'literal');
|
danielebarchiesi@4
|
422 $this->state = 3;
|
danielebarchiesi@4
|
423 }
|
danielebarchiesi@4
|
424 }
|
danielebarchiesi@4
|
425 }
|
danielebarchiesi@4
|
426 $this->updateS($s);
|
danielebarchiesi@4
|
427 }
|
danielebarchiesi@4
|
428
|
danielebarchiesi@4
|
429 /* */
|
danielebarchiesi@4
|
430
|
danielebarchiesi@4
|
431 function h4Open($t, $a) {
|
danielebarchiesi@4
|
432 return $this->h1Open($t, $a);
|
danielebarchiesi@4
|
433 }
|
danielebarchiesi@4
|
434
|
danielebarchiesi@4
|
435 /* */
|
danielebarchiesi@4
|
436
|
danielebarchiesi@4
|
437 function h5Open($t, $a) {
|
danielebarchiesi@4
|
438 $this->state = 4;
|
danielebarchiesi@4
|
439 return $this->h4Open($t, $a);
|
danielebarchiesi@4
|
440 }
|
danielebarchiesi@4
|
441
|
danielebarchiesi@4
|
442 /* */
|
danielebarchiesi@4
|
443
|
danielebarchiesi@4
|
444 function h6Open($t, $a) {
|
danielebarchiesi@4
|
445 $s = $this->getParentS();
|
danielebarchiesi@4
|
446 $data = isset($s['o_xml_data']) ? $s['o_xml_data'] : '';
|
danielebarchiesi@4
|
447 $ns = isset($s['ns']) ? $s['ns'] : array();
|
danielebarchiesi@4
|
448 $parts = $this->splitURI($t);
|
danielebarchiesi@4
|
449 if ((count($parts) === 1) || empty($parts[1])) {
|
danielebarchiesi@4
|
450 $data .= '<'.$t;
|
danielebarchiesi@4
|
451 }
|
danielebarchiesi@4
|
452 else {
|
danielebarchiesi@4
|
453 $ns_uri = $parts[0];
|
danielebarchiesi@4
|
454 $name = $parts[1];
|
danielebarchiesi@4
|
455 if (!isset($this->nsp[$ns_uri])) {
|
danielebarchiesi@4
|
456 foreach ($this->nsp as $tmp1 => $tmp2) {
|
danielebarchiesi@4
|
457 if (strpos($t, $tmp1) === 0) {
|
danielebarchiesi@4
|
458 $ns_uri = $tmp1;
|
danielebarchiesi@4
|
459 $name = substr($t, strlen($tmp1));
|
danielebarchiesi@4
|
460 break;
|
danielebarchiesi@4
|
461 }
|
danielebarchiesi@4
|
462 }
|
danielebarchiesi@4
|
463 }
|
danielebarchiesi@4
|
464 $nsp = $this->nsp[$ns_uri];
|
danielebarchiesi@4
|
465 $data .= $nsp ? '<' . $nsp . ':' . $name : '<' . $name;
|
danielebarchiesi@4
|
466 /* ns */
|
danielebarchiesi@4
|
467 if (!isset($ns[$nsp.'='.$ns_uri]) || !$ns[$nsp.'='.$ns_uri]) {
|
danielebarchiesi@4
|
468 $data .= $nsp ? ' xmlns:'.$nsp.'="'.$ns_uri.'"' : ' xmlns="'.$ns_uri.'"';
|
danielebarchiesi@4
|
469 $ns[$nsp.'='.$ns_uri] = true;
|
danielebarchiesi@4
|
470 $s['ns'] = $ns;
|
danielebarchiesi@4
|
471 }
|
danielebarchiesi@4
|
472 }
|
danielebarchiesi@4
|
473 foreach ($a as $k => $v) {
|
danielebarchiesi@4
|
474 $parts = $this->splitURI($k);
|
danielebarchiesi@4
|
475 if (count($parts) === 1) {
|
danielebarchiesi@4
|
476 $data .= ' '.$k.'="'.$v.'"';
|
danielebarchiesi@4
|
477 }
|
danielebarchiesi@4
|
478 else {
|
danielebarchiesi@4
|
479 $ns_uri = $parts[0];
|
danielebarchiesi@4
|
480 $name = $parts[1];
|
danielebarchiesi@4
|
481 $nsp = $this->v($ns_uri, '', $this->nsp);
|
danielebarchiesi@4
|
482 $data .= $nsp ? ' '.$nsp.':'.$name.'="'.$v.'"' : ' '.$name.'="'.$v.'"' ;
|
danielebarchiesi@4
|
483 }
|
danielebarchiesi@4
|
484 }
|
danielebarchiesi@4
|
485 $data .= '>';
|
danielebarchiesi@4
|
486 $s['o_xml_data'] = $data;
|
danielebarchiesi@4
|
487 $s['o_xml_level'] = isset($s['o_xml_level']) ? $s['o_xml_level'] + 1 : 1;
|
danielebarchiesi@4
|
488 if ($t == $s['p']) {/* xml container prop */
|
danielebarchiesi@4
|
489 $s['p_xml_literal_level'] = isset($s['p_xml_literal_level']) ? $s['p_xml_literal_level'] + 1 : 1;
|
danielebarchiesi@4
|
490 }
|
danielebarchiesi@4
|
491 $this->updateS($s);
|
danielebarchiesi@4
|
492 }
|
danielebarchiesi@4
|
493
|
danielebarchiesi@4
|
494 /* */
|
danielebarchiesi@4
|
495
|
danielebarchiesi@4
|
496 function h1Close($t) {/* end of doc */
|
danielebarchiesi@4
|
497 $this->state = 0;
|
danielebarchiesi@4
|
498 }
|
danielebarchiesi@4
|
499
|
danielebarchiesi@4
|
500 /* */
|
danielebarchiesi@4
|
501
|
danielebarchiesi@4
|
502 function h2Close($t) {/* expecting a prop, getting a close */
|
danielebarchiesi@4
|
503 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
504 $has_closing_tag = (isset($s['has_closing_tag']) && !$s['has_closing_tag']) ? 0 : 1;
|
danielebarchiesi@4
|
505 $this->popS();
|
danielebarchiesi@4
|
506 $this->state = 5;
|
danielebarchiesi@4
|
507 if ($s = $this->getParentS()) {/* new s */
|
danielebarchiesi@4
|
508 if (!isset($s['p']) || !$s['p']) {/* p close after collection|parseType=Resource|node close after p close */
|
danielebarchiesi@4
|
509 $this->state = $this->s_count ? 4 : 1;
|
danielebarchiesi@4
|
510 if (!$has_closing_tag) {
|
danielebarchiesi@4
|
511 $this->state = 2;
|
danielebarchiesi@4
|
512 }
|
danielebarchiesi@4
|
513 }
|
danielebarchiesi@4
|
514 elseif (!$has_closing_tag) {
|
danielebarchiesi@4
|
515 $this->state = 2;
|
danielebarchiesi@4
|
516 }
|
danielebarchiesi@4
|
517 }
|
danielebarchiesi@4
|
518 }
|
danielebarchiesi@4
|
519 }
|
danielebarchiesi@4
|
520
|
danielebarchiesi@4
|
521 /* */
|
danielebarchiesi@4
|
522
|
danielebarchiesi@4
|
523 function h3Close($t) {/* p close */
|
danielebarchiesi@4
|
524 $this->state = 2;
|
danielebarchiesi@4
|
525 }
|
danielebarchiesi@4
|
526
|
danielebarchiesi@4
|
527 /* */
|
danielebarchiesi@4
|
528
|
danielebarchiesi@4
|
529 function h4Close($t) {/* empty p | pClose after cdata | pClose after collection */
|
danielebarchiesi@4
|
530 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
531 $b = isset($s['p_x_base']) && $s['p_x_base'] ? $s['p_x_base'] : (isset($s['x_base']) ? $s['x_base'] : '');
|
danielebarchiesi@4
|
532 if (isset($s['is_coll']) && $s['is_coll']) {
|
danielebarchiesi@4
|
533 $this->addT($s['value'], $this->rdf . 'rest', $this->rdf . 'nil', $s['type'], 'uri');
|
danielebarchiesi@4
|
534 /* back to collection start */
|
danielebarchiesi@4
|
535 while ((!isset($s['p']) || ($s['p'] != $t))) {
|
danielebarchiesi@4
|
536 $sub_s = $s;
|
danielebarchiesi@4
|
537 $this->popS();
|
danielebarchiesi@4
|
538 $s = $this->getParentS();
|
danielebarchiesi@4
|
539 }
|
danielebarchiesi@4
|
540 /* reification */
|
danielebarchiesi@4
|
541 if (isset($s['p_id']) && $s['p_id']) {
|
danielebarchiesi@4
|
542 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $sub_s['value'], $s['type'], $sub_s['type']);
|
danielebarchiesi@4
|
543 }
|
danielebarchiesi@4
|
544 unset($s['p']);
|
danielebarchiesi@4
|
545 $this->updateS($s);
|
danielebarchiesi@4
|
546 }
|
danielebarchiesi@4
|
547 else {
|
danielebarchiesi@4
|
548 $dt = isset($s['o_datatype']) ? $s['o_datatype'] : '';
|
danielebarchiesi@4
|
549 $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : '');
|
danielebarchiesi@4
|
550 $o = array('type' => 'literal', 'value' => $s['o_cdata']);
|
danielebarchiesi@4
|
551 $this->addT($s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l);
|
danielebarchiesi@4
|
552 /* reification */
|
danielebarchiesi@4
|
553 if (isset($s['p_id']) && $s['p_id']) {
|
danielebarchiesi@4
|
554 $this->reify($this->calcURI('#'.$s['p_id'], $b), $s['value'], $s['p'], $o['value'], $s['type'], $o['type'], $dt, $l);
|
danielebarchiesi@4
|
555 }
|
danielebarchiesi@4
|
556 unset($s['o_cdata']);
|
danielebarchiesi@4
|
557 unset($s['o_datatype']);
|
danielebarchiesi@4
|
558 unset($s['p']);
|
danielebarchiesi@4
|
559 $this->updateS($s);
|
danielebarchiesi@4
|
560 }
|
danielebarchiesi@4
|
561 $this->state = 2;
|
danielebarchiesi@4
|
562 }
|
danielebarchiesi@4
|
563 }
|
danielebarchiesi@4
|
564
|
danielebarchiesi@4
|
565 /* */
|
danielebarchiesi@4
|
566
|
danielebarchiesi@4
|
567 function h5Close($t) {/* p close */
|
danielebarchiesi@4
|
568 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
569 unset($s['p']);
|
danielebarchiesi@4
|
570 $this->updateS($s);
|
danielebarchiesi@4
|
571 $this->state = 2;
|
danielebarchiesi@4
|
572 }
|
danielebarchiesi@4
|
573 }
|
danielebarchiesi@4
|
574
|
danielebarchiesi@4
|
575 /* */
|
danielebarchiesi@4
|
576
|
danielebarchiesi@4
|
577 function h6Close($t) {
|
danielebarchiesi@4
|
578 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
579 $l = isset($s['p_x_lang']) && $s['p_x_lang'] ? $s['p_x_lang'] : (isset($s['x_lang']) ? $s['x_lang'] : '');
|
danielebarchiesi@4
|
580 $data = $s['o_xml_data'];
|
danielebarchiesi@4
|
581 $level = $s['o_xml_level'];
|
danielebarchiesi@4
|
582 if ($level === 0) {/* pClose */
|
danielebarchiesi@4
|
583 $this->addT($s['value'], $s['p'], trim($data, ' '), $s['type'], 'literal', $this->rdf.'XMLLiteral', $l);
|
danielebarchiesi@4
|
584 unset($s['o_xml_data']);
|
danielebarchiesi@4
|
585 $this->state = 2;
|
danielebarchiesi@4
|
586 }
|
danielebarchiesi@4
|
587 else {
|
danielebarchiesi@4
|
588 $parts = $this->splitURI($t);
|
danielebarchiesi@4
|
589 if ((count($parts) === 1) || empty($parts[1])) {
|
danielebarchiesi@4
|
590 $data .= '</'.$t.'>';
|
danielebarchiesi@4
|
591 }
|
danielebarchiesi@4
|
592 else {
|
danielebarchiesi@4
|
593 $ns_uri = $parts[0];
|
danielebarchiesi@4
|
594 $name = $parts[1];
|
danielebarchiesi@4
|
595 if (!isset($this->nsp[$ns_uri])) {
|
danielebarchiesi@4
|
596 foreach ($this->nsp as $tmp1 => $tmp2) {
|
danielebarchiesi@4
|
597 if (strpos($t, $tmp1) === 0) {
|
danielebarchiesi@4
|
598 $ns_uri = $tmp1;
|
danielebarchiesi@4
|
599 $name = substr($t, strlen($tmp1));
|
danielebarchiesi@4
|
600 break;
|
danielebarchiesi@4
|
601 }
|
danielebarchiesi@4
|
602 }
|
danielebarchiesi@4
|
603 }
|
danielebarchiesi@4
|
604 $nsp = $this->nsp[$ns_uri];
|
danielebarchiesi@4
|
605 $data .= $nsp ? '</'.$nsp.':'.$name.'>' : '</'.$name.'>';
|
danielebarchiesi@4
|
606 }
|
danielebarchiesi@4
|
607 $s['o_xml_data'] = $data;
|
danielebarchiesi@4
|
608 $s['o_xml_level'] = $level - 1;
|
danielebarchiesi@4
|
609 if ($t == $s['p']) {/* xml container prop */
|
danielebarchiesi@4
|
610 $s['p_xml_literal_level']--;
|
danielebarchiesi@4
|
611 }
|
danielebarchiesi@4
|
612 }
|
danielebarchiesi@4
|
613 $this->updateS($s);
|
danielebarchiesi@4
|
614 }
|
danielebarchiesi@4
|
615 }
|
danielebarchiesi@4
|
616
|
danielebarchiesi@4
|
617 /* */
|
danielebarchiesi@4
|
618
|
danielebarchiesi@4
|
619 function h4Cdata($d) {
|
danielebarchiesi@4
|
620 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
621 $s['o_cdata'] = isset($s['o_cdata']) ? $s['o_cdata'] . $d : $d;
|
danielebarchiesi@4
|
622 $this->updateS($s);
|
danielebarchiesi@4
|
623 }
|
danielebarchiesi@4
|
624 }
|
danielebarchiesi@4
|
625
|
danielebarchiesi@4
|
626 /* */
|
danielebarchiesi@4
|
627
|
danielebarchiesi@4
|
628 function h6Cdata($d) {
|
danielebarchiesi@4
|
629 if ($s = $this->getParentS()) {
|
danielebarchiesi@4
|
630 if (isset($s['o_xml_data']) || preg_match("/[\n\r]/", $d) || trim($d)) {
|
danielebarchiesi@4
|
631 $d = htmlspecialchars($d, ENT_NOQUOTES);
|
danielebarchiesi@4
|
632 $s['o_xml_data'] = isset($s['o_xml_data']) ? $s['o_xml_data'] . $d : $d;
|
danielebarchiesi@4
|
633 }
|
danielebarchiesi@4
|
634 $this->updateS($s);
|
danielebarchiesi@4
|
635 }
|
danielebarchiesi@4
|
636 }
|
danielebarchiesi@4
|
637
|
danielebarchiesi@4
|
638 /* */
|
danielebarchiesi@4
|
639
|
danielebarchiesi@4
|
640 }
|