Chris@0
|
1 <?php
|
Chris@0
|
2
|
Chris@0
|
3 /**
|
Chris@0
|
4 * EasyRdf
|
Chris@0
|
5 *
|
Chris@0
|
6 * LICENSE
|
Chris@0
|
7 *
|
Chris@0
|
8 * Copyright (c) 2012-2013 Nicholas J Humfrey.
|
Chris@0
|
9 * All rights reserved.
|
Chris@0
|
10 *
|
Chris@0
|
11 * Redistribution and use in source and binary forms, with or without
|
Chris@0
|
12 * modification, are permitted provided that the following conditions are met:
|
Chris@0
|
13 * 1. Redistributions of source code must retain the above copyright
|
Chris@0
|
14 * notice, this list of conditions and the following disclaimer.
|
Chris@0
|
15 * 2. Redistributions in binary form must reproduce the above copyright notice,
|
Chris@0
|
16 * this list of conditions and the following disclaimer in the documentation
|
Chris@0
|
17 * and/or other materials provided with the distribution.
|
Chris@0
|
18 * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
|
Chris@0
|
19 * promote products derived from this software without specific prior
|
Chris@0
|
20 * written permission.
|
Chris@0
|
21 *
|
Chris@0
|
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
Chris@0
|
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
Chris@0
|
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
Chris@0
|
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
Chris@0
|
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
Chris@0
|
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
Chris@0
|
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
Chris@0
|
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
Chris@0
|
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
Chris@0
|
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
Chris@0
|
32 * POSSIBILITY OF SUCH DAMAGE.
|
Chris@0
|
33 *
|
Chris@0
|
34 * @package EasyRdf
|
Chris@0
|
35 * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
|
Chris@0
|
36 * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/)
|
Chris@0
|
37 * @license http://www.opensource.org/licenses/bsd-license.php
|
Chris@0
|
38 */
|
Chris@0
|
39
|
Chris@0
|
40 /**
|
Chris@0
|
41 * Class to parse RDFa 1.1 with no external dependancies.
|
Chris@0
|
42 *
|
Chris@0
|
43 * http://www.w3.org/TR/rdfa-core/
|
Chris@0
|
44 *
|
Chris@0
|
45 * @package EasyRdf
|
Chris@0
|
46 * @copyright Copyright (c) 2012-2013 Nicholas J Humfrey
|
Chris@0
|
47 * @license http://www.opensource.org/licenses/bsd-license.php
|
Chris@0
|
48 */
|
Chris@0
|
49 class EasyRdf_Parser_Rdfa extends EasyRdf_Parser
|
Chris@0
|
50 {
|
Chris@0
|
51 const XML_NS = 'http://www.w3.org/XML/1998/namespace';
|
Chris@0
|
52 const RDF_XML_LITERAL = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral';
|
Chris@0
|
53 const TERM_REGEXP = '/^([a-zA-Z_])([0-9a-zA-Z_\.-]*)$/';
|
Chris@0
|
54
|
Chris@0
|
55 public $debug = false;
|
Chris@0
|
56
|
Chris@0
|
57 /**
|
Chris@0
|
58 * Constructor
|
Chris@0
|
59 *
|
Chris@0
|
60 * @return object EasyRdf_Parser_Rdfa
|
Chris@0
|
61 */
|
Chris@0
|
62 public function __construct()
|
Chris@0
|
63 {
|
Chris@0
|
64 }
|
Chris@0
|
65
|
Chris@0
|
66 protected function addTriple($resource, $property, $value)
|
Chris@0
|
67 {
|
Chris@0
|
68 if ($this->debug) {
|
Chris@0
|
69 print "Adding triple: $resource -> $property -> ".$value['type'].':'.$value['value']."\n";
|
Chris@0
|
70 }
|
Chris@0
|
71 $count = $this->graph->add($resource, $property, $value);
|
Chris@0
|
72 $this->tripleCount += $count;
|
Chris@0
|
73 return $count;
|
Chris@0
|
74 }
|
Chris@0
|
75
|
Chris@0
|
76 protected function generateList($subject, $property, $list)
|
Chris@0
|
77 {
|
Chris@0
|
78 $current = $subject;
|
Chris@0
|
79 $prop = $property;
|
Chris@0
|
80
|
Chris@0
|
81 // Output a blank node for each item in the list
|
Chris@0
|
82 foreach ($list as $item) {
|
Chris@0
|
83 $newNode = $this->graph->newBNodeId();
|
Chris@0
|
84 $this->addTriple($current, $prop, array('type' => 'bnode', 'value' => $newNode));
|
Chris@0
|
85 $this->addTriple($newNode, 'rdf:first', $item);
|
Chris@0
|
86
|
Chris@0
|
87 $current = $newNode;
|
Chris@0
|
88 $prop = 'rdf:rest';
|
Chris@0
|
89 }
|
Chris@0
|
90
|
Chris@0
|
91 // Finally, terminate the list
|
Chris@0
|
92 $this->addTriple(
|
Chris@0
|
93 $current,
|
Chris@0
|
94 $prop,
|
Chris@0
|
95 array('type' => 'uri', 'value' => EasyRdf_Namespace::expand('rdf:nil'))
|
Chris@0
|
96 );
|
Chris@0
|
97 }
|
Chris@0
|
98
|
Chris@0
|
99 protected function addToList($listMapping, $property, $value)
|
Chris@0
|
100 {
|
Chris@0
|
101 if ($this->debug) {
|
Chris@0
|
102 print "Adding to list: $property -> ".$value['type'].':'.$value['value']."\n";
|
Chris@0
|
103 }
|
Chris@0
|
104
|
Chris@0
|
105 // Create property in the list mapping if it doesn't already exist
|
Chris@0
|
106 if (!isset($listMapping->$property)) {
|
Chris@0
|
107 $listMapping->$property = array();
|
Chris@0
|
108 }
|
Chris@0
|
109 array_push($listMapping->$property, $value);
|
Chris@0
|
110 }
|
Chris@0
|
111
|
Chris@0
|
112 protected function printNode($node, $depth)
|
Chris@0
|
113 {
|
Chris@0
|
114 $indent = str_repeat(' ', $depth);
|
Chris@0
|
115 print $indent;
|
Chris@0
|
116 switch($node->nodeType) {
|
Chris@0
|
117 case XML_ELEMENT_NODE:
|
Chris@0
|
118 print 'node';
|
Chris@0
|
119 break;
|
Chris@0
|
120 case XML_ATTRIBUTE_NODE:
|
Chris@0
|
121 print 'attr';
|
Chris@0
|
122 break;
|
Chris@0
|
123 case XML_TEXT_NODE:
|
Chris@0
|
124 print 'text';
|
Chris@0
|
125 break;
|
Chris@0
|
126 case XML_CDATA_SECTION_NODE:
|
Chris@0
|
127 print 'cdata';
|
Chris@0
|
128 break;
|
Chris@0
|
129 case XML_ENTITY_REF_NODE:
|
Chris@0
|
130 print 'entref';
|
Chris@0
|
131 break;
|
Chris@0
|
132 case XML_ENTITY_NODE:
|
Chris@0
|
133 print 'entity';
|
Chris@0
|
134 break;
|
Chris@0
|
135 case XML_PI_NODE:
|
Chris@0
|
136 print 'pi';
|
Chris@0
|
137 break;
|
Chris@0
|
138 case XML_COMMENT_NODE:
|
Chris@0
|
139 print 'comment';
|
Chris@0
|
140 break;
|
Chris@0
|
141 case XML_DOCUMENT_NODE:
|
Chris@0
|
142 print 'doc';
|
Chris@0
|
143 break;
|
Chris@0
|
144 case XML_DOCUMENT_TYPE_NODE:
|
Chris@0
|
145 print 'doctype';
|
Chris@0
|
146 break;
|
Chris@0
|
147 case XML_HTML_DOCUMENT_NODE:
|
Chris@0
|
148 print 'html';
|
Chris@0
|
149 break;
|
Chris@0
|
150 default:
|
Chris@0
|
151 throw new EasyRdf_Exception("unknown node type: ".$node->nodeType);
|
Chris@0
|
152 break;
|
Chris@0
|
153 }
|
Chris@0
|
154 print ' '.$node->nodeName."\n";
|
Chris@0
|
155
|
Chris@0
|
156 if ($node->hasAttributes()) {
|
Chris@0
|
157 foreach ($node->attributes as $attr) {
|
Chris@0
|
158 print $indent.' '.$attr->nodeName." => ".$attr->nodeValue."\n";
|
Chris@0
|
159 }
|
Chris@0
|
160 }
|
Chris@0
|
161 }
|
Chris@0
|
162
|
Chris@0
|
163 protected function guessTimeDatatype($value)
|
Chris@0
|
164 {
|
Chris@0
|
165 if (preg_match('/^-?\d{4}-\d{2}-\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
|
Chris@0
|
166 return 'http://www.w3.org/2001/XMLSchema#date';
|
Chris@0
|
167 } elseif (preg_match('/^\d{2}:\d{2}:\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
|
Chris@0
|
168 return 'http://www.w3.org/2001/XMLSchema#time';
|
Chris@0
|
169 } elseif (preg_match('/^-?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
|
Chris@0
|
170 return 'http://www.w3.org/2001/XMLSchema#dateTime';
|
Chris@0
|
171 } elseif (preg_match('/^P(\d+Y)?(\d+M)?(\d+D)?T?(\d+H)?(\d+M)?(\d+S)?$/', $value)) {
|
Chris@0
|
172 return 'http://www.w3.org/2001/XMLSchema#duration';
|
Chris@0
|
173 } elseif (preg_match('/^\d{4}$/', $value)) {
|
Chris@0
|
174 return 'http://www.w3.org/2001/XMLSchema#gYear';
|
Chris@0
|
175 } elseif (preg_match('/^\d{4}-\d{2}$/', $value)) {
|
Chris@0
|
176 return 'http://www.w3.org/2001/XMLSchema#gYearMonth';
|
Chris@0
|
177 } else {
|
Chris@0
|
178 return null;
|
Chris@0
|
179 }
|
Chris@0
|
180 }
|
Chris@0
|
181
|
Chris@0
|
182 protected function initialContext()
|
Chris@0
|
183 {
|
Chris@0
|
184 $context = array(
|
Chris@0
|
185 'prefixes' => array(),
|
Chris@0
|
186 'vocab' => null,
|
Chris@0
|
187 'subject' => $this->baseUri,
|
Chris@0
|
188 'property' => null,
|
Chris@0
|
189 'object' => null,
|
Chris@0
|
190 'terms' => array(),
|
Chris@0
|
191 'incompleteRels' => array(),
|
Chris@0
|
192 'incompleteRevs' => array(),
|
Chris@0
|
193 'listMapping' => null,
|
Chris@0
|
194 'lang' => null,
|
Chris@0
|
195 'path' => '',
|
Chris@0
|
196 'xmlns' => array(),
|
Chris@0
|
197 );
|
Chris@0
|
198
|
Chris@0
|
199 // Set the default prefix
|
Chris@0
|
200 $context['prefixes'][''] = 'http://www.w3.org/1999/xhtml/vocab#';
|
Chris@0
|
201
|
Chris@0
|
202 // RDFa 1.1 default term mapping
|
Chris@0
|
203 $context['terms']['describedby'] = 'http://www.w3.org/2007/05/powder-s#describedby';
|
Chris@0
|
204 $context['terms']['license'] = 'http://www.w3.org/1999/xhtml/vocab#license';
|
Chris@0
|
205 $context['terms']['role'] = 'http://www.w3.org/1999/xhtml/vocab#role';
|
Chris@0
|
206
|
Chris@0
|
207 return $context;
|
Chris@0
|
208 }
|
Chris@0
|
209
|
Chris@0
|
210 protected function expandCurie($node, &$context, $value)
|
Chris@0
|
211 {
|
Chris@0
|
212 if (preg_match('/^(\w*?):(.*)$/', $value, $matches)) {
|
Chris@0
|
213 list (, $prefix, $local) = $matches;
|
Chris@0
|
214 $prefix = strtolower($prefix);
|
Chris@0
|
215 if ($prefix === '_') {
|
Chris@0
|
216 // It is a bnode
|
Chris@0
|
217 return $this->remapBnode(substr($value, 2));
|
Chris@0
|
218 } elseif (empty($prefix) and $context['vocab']) {
|
Chris@0
|
219 // Empty prefix
|
Chris@0
|
220 return $context['vocab'] . $local;
|
Chris@0
|
221 } elseif (isset($context['prefixes'][$prefix])) {
|
Chris@0
|
222 return $context['prefixes'][$prefix] . $local;
|
Chris@0
|
223 } elseif ($uri = $node->lookupNamespaceURI($prefix)) {
|
Chris@0
|
224 return $uri . $local;
|
Chris@0
|
225 } elseif (!empty($prefix) and $uri = EasyRdf_Namespace::get($prefix)) {
|
Chris@0
|
226 // Expand using well-known prefixes
|
Chris@0
|
227 return $uri . $local;
|
Chris@0
|
228 }
|
Chris@0
|
229 }
|
Chris@0
|
230 }
|
Chris@0
|
231
|
Chris@0
|
232 protected function processUri($node, &$context, $value, $isProp = false)
|
Chris@0
|
233 {
|
Chris@0
|
234 if (preg_match('/^\[(.*)\]$/', $value, $matches)) {
|
Chris@0
|
235 // Safe CURIE
|
Chris@0
|
236 return $this->expandCurie($node, $context, $matches[1]);
|
Chris@0
|
237 } elseif (preg_match(self::TERM_REGEXP, $value) and $isProp) {
|
Chris@0
|
238 $term = strtolower($value);
|
Chris@0
|
239 if ($context['vocab']) {
|
Chris@0
|
240 return $context['vocab'] . $value;
|
Chris@0
|
241 } elseif (isset($context['terms'][$term])) {
|
Chris@0
|
242 return $context['terms'][$term];
|
Chris@0
|
243 }
|
Chris@0
|
244 } elseif (substr($value, 0, 2) === '_:' and $isProp) {
|
Chris@0
|
245 return null;
|
Chris@0
|
246 } else {
|
Chris@0
|
247 $uri = $this->expandCurie($node, $context, $value);
|
Chris@0
|
248 if ($uri) {
|
Chris@0
|
249 return $uri;
|
Chris@0
|
250 } else {
|
Chris@0
|
251 $parsed = new EasyRdf_ParsedUri($value);
|
Chris@0
|
252 if ($parsed->isAbsolute()) {
|
Chris@0
|
253 return $value;
|
Chris@0
|
254 } elseif ($isProp) {
|
Chris@0
|
255 // Properties can't be relative URIs
|
Chris@0
|
256 return null;
|
Chris@0
|
257 } elseif ($this->baseUri) {
|
Chris@0
|
258 return $this->baseUri->resolve($parsed);
|
Chris@0
|
259 }
|
Chris@0
|
260 }
|
Chris@0
|
261 }
|
Chris@0
|
262 }
|
Chris@0
|
263
|
Chris@0
|
264 protected function processUriList($node, $context, $values)
|
Chris@0
|
265 {
|
Chris@0
|
266 if (!$values) {
|
Chris@0
|
267 return array();
|
Chris@0
|
268 }
|
Chris@0
|
269
|
Chris@0
|
270 $uris = array();
|
Chris@0
|
271 foreach (preg_split('/\s+/', $values) as $value) {
|
Chris@0
|
272 $uri = $this->processUri($node, $context, $value, true);
|
Chris@0
|
273 if ($uri) {
|
Chris@0
|
274 array_push($uris, $uri);
|
Chris@0
|
275 }
|
Chris@0
|
276 }
|
Chris@0
|
277 return $uris;
|
Chris@0
|
278 }
|
Chris@0
|
279
|
Chris@0
|
280 protected function getUriAttribute($node, &$context, $attributes)
|
Chris@0
|
281 {
|
Chris@0
|
282 if (!is_array($attributes)) {
|
Chris@0
|
283 $attributes = array($attributes);
|
Chris@0
|
284 }
|
Chris@0
|
285
|
Chris@0
|
286 // Find the first attribute that returns a valid URI
|
Chris@0
|
287 foreach ($attributes as $attribute) {
|
Chris@0
|
288 if ($node->hasAttribute($attribute)) {
|
Chris@0
|
289 $value = $node->getAttribute($attribute);
|
Chris@0
|
290 $uri = $this->processUri($node, $context, $value);
|
Chris@0
|
291 if ($uri) {
|
Chris@0
|
292 return $uri;
|
Chris@0
|
293 }
|
Chris@0
|
294 }
|
Chris@0
|
295 }
|
Chris@0
|
296 }
|
Chris@0
|
297
|
Chris@0
|
298 protected function processNode($node, &$context, $depth = 1)
|
Chris@0
|
299 {
|
Chris@0
|
300 if ($this->debug) {
|
Chris@0
|
301 $this->printNode($node, $depth);
|
Chris@0
|
302 }
|
Chris@0
|
303
|
Chris@0
|
304 // Step 1: establish local variables
|
Chris@0
|
305 $skip = false;
|
Chris@0
|
306 $subject = null;
|
Chris@0
|
307 $typedResource = null;
|
Chris@0
|
308 $object = null;
|
Chris@0
|
309 $rels = array();
|
Chris@0
|
310 $revs = array();
|
Chris@0
|
311 $lang = $context['lang'];
|
Chris@0
|
312 $incompleteRels = array();
|
Chris@0
|
313 $incompleteRevs = array();
|
Chris@0
|
314
|
Chris@0
|
315 if ($node->nodeType === XML_ELEMENT_NODE) {
|
Chris@0
|
316 $context['path'] .= '/' . $node->nodeName;
|
Chris@0
|
317
|
Chris@0
|
318 $content = $node->hasAttribute('content') ? $node->getAttribute('content') : null;
|
Chris@0
|
319 $datatype = $node->hasAttribute('datatype') ? $node->getAttribute('datatype') : null;
|
Chris@0
|
320 $property = $node->getAttribute('property') ? $node->getAttribute('property') : null;
|
Chris@0
|
321 $typeof = $node->getAttribute('typeof') ? $node->getAttribute('typeof') : null;
|
Chris@0
|
322
|
Chris@0
|
323 // Step 2: Default vocabulary
|
Chris@0
|
324 if ($node->hasAttribute('vocab')) {
|
Chris@0
|
325 $context['vocab'] = $node->getAttribute('vocab');
|
Chris@0
|
326 if ($context['vocab']) {
|
Chris@0
|
327 $this->addTriple(
|
Chris@0
|
328 $this->baseUri,
|
Chris@0
|
329 'rdfa:usesVocabulary',
|
Chris@0
|
330 array('type' => 'uri', 'value' => $context['vocab'])
|
Chris@0
|
331 );
|
Chris@0
|
332 }
|
Chris@0
|
333 }
|
Chris@0
|
334
|
Chris@0
|
335 // Step 3: Set prefix mappings
|
Chris@0
|
336 // Support for deprecated xmlns if present in document
|
Chris@0
|
337 foreach ($context['xmlns'] as $prefix => $uri) {
|
Chris@0
|
338 if ($node->hasAttribute('xmlns:' . $prefix)) {
|
Chris@0
|
339 $context['prefixes'][$prefix] = $node->getAttribute('xmlns:' . $prefix);
|
Chris@0
|
340 if ($this->debug) {
|
Chris@0
|
341 print "Prefix (xmlns): $prefix => $uri\n";
|
Chris@0
|
342 }
|
Chris@0
|
343 }
|
Chris@0
|
344 }
|
Chris@0
|
345 if ($node->hasAttribute('prefix')) {
|
Chris@0
|
346 $mappings = preg_split('/\s+/', $node->getAttribute('prefix'));
|
Chris@0
|
347 while (count($mappings)) {
|
Chris@0
|
348 $prefix = strtolower(array_shift($mappings));
|
Chris@0
|
349 $uri = array_shift($mappings);
|
Chris@0
|
350
|
Chris@0
|
351 if (substr($prefix, -1) === ':') {
|
Chris@0
|
352 $prefix = substr($prefix, 0, -1);
|
Chris@0
|
353 } else {
|
Chris@0
|
354 continue;
|
Chris@0
|
355 }
|
Chris@0
|
356
|
Chris@0
|
357 if ($prefix === '_') {
|
Chris@0
|
358 continue;
|
Chris@0
|
359 } elseif (!empty($prefix)) {
|
Chris@0
|
360 $context['prefixes'][$prefix] = $uri;
|
Chris@0
|
361 if ($this->debug) {
|
Chris@0
|
362 print "Prefix: $prefix => $uri\n";
|
Chris@0
|
363 }
|
Chris@0
|
364 }
|
Chris@0
|
365 }
|
Chris@0
|
366 }
|
Chris@0
|
367
|
Chris@0
|
368 // Step 4
|
Chris@0
|
369 if ($node->hasAttributeNS(self::XML_NS, 'lang')) {
|
Chris@0
|
370 $lang = $node->getAttributeNS(self::XML_NS, 'lang');
|
Chris@0
|
371 } elseif ($node->hasAttribute('lang')) {
|
Chris@0
|
372 $lang = $node->getAttribute('lang');
|
Chris@0
|
373 }
|
Chris@0
|
374
|
Chris@0
|
375 // HTML+RDFa 1.1: ignore rel and rev unless they contain CURIEs.
|
Chris@0
|
376 foreach (array('rel', 'rev') as $attr) {
|
Chris@0
|
377 if ($node->hasAttribute('property') and $node->hasAttribute($attr)) {
|
Chris@0
|
378 // Quick check in case there are no CURIEs to deal with.
|
Chris@0
|
379 if (strpos($node->getAttribute($attr), ':') === false) {
|
Chris@0
|
380 $node->removeAttribute($attr);
|
Chris@0
|
381 } else {
|
Chris@0
|
382 // Only keep CURIEs.
|
Chris@0
|
383 $curies = array();
|
Chris@0
|
384 foreach (preg_split('/\s+/', $node->getAttribute($attr)) as $token) {
|
Chris@0
|
385 if (strpos($token, ':')) {
|
Chris@0
|
386 $curies[] = $token;
|
Chris@0
|
387 }
|
Chris@0
|
388 }
|
Chris@0
|
389 $node->setAttribute($attr, implode(' ', $curies));
|
Chris@0
|
390 }
|
Chris@0
|
391 }
|
Chris@0
|
392 }
|
Chris@0
|
393
|
Chris@0
|
394 $rels = $this->processUriList($node, $context, $node->getAttribute('rel'));
|
Chris@0
|
395 $revs = $this->processUriList($node, $context, $node->getAttribute('rev'));
|
Chris@0
|
396
|
Chris@0
|
397 if (!$node->hasAttribute('rel') and !$node->hasAttribute('rev')) {
|
Chris@0
|
398 // Step 5: Establish a new subject if no rel/rev
|
Chris@0
|
399 if ($property and is_null($content) and is_null($datatype)) {
|
Chris@0
|
400 $subject = $this->getUriAttribute($node, $context, 'about');
|
Chris@0
|
401 if ($typeof and !$subject) {
|
Chris@0
|
402 $typedResource = $this->getUriAttribute(
|
Chris@0
|
403 $node,
|
Chris@0
|
404 $context,
|
Chris@0
|
405 array('resource', 'href', 'src')
|
Chris@0
|
406 );
|
Chris@0
|
407 if (!$typedResource) {
|
Chris@0
|
408 $typedResource = $this->graph->newBNodeId();
|
Chris@0
|
409 }
|
Chris@0
|
410 $object = $typedResource;
|
Chris@0
|
411 }
|
Chris@0
|
412 } else {
|
Chris@0
|
413 $subject = $this->getUriAttribute(
|
Chris@0
|
414 $node,
|
Chris@0
|
415 $context,
|
Chris@0
|
416 array('about', 'resource', 'href', 'src')
|
Chris@0
|
417 );
|
Chris@0
|
418 }
|
Chris@0
|
419
|
Chris@0
|
420 // Establish a subject if there isn't one
|
Chris@0
|
421 # FIXME: refactor this
|
Chris@0
|
422 if (is_null($subject)) {
|
Chris@0
|
423 if ($context['path'] === '/html/head') {
|
Chris@0
|
424 $subject = $context['object'];
|
Chris@0
|
425 } elseif ($depth <= 2) {
|
Chris@0
|
426 $subject = $this->baseUri;
|
Chris@0
|
427 } elseif ($typeof and !$property) {
|
Chris@0
|
428 $subject = $this->graph->newBNodeId();
|
Chris@0
|
429 } else {
|
Chris@0
|
430 if (!$property) {
|
Chris@0
|
431 $skip = true;
|
Chris@0
|
432 }
|
Chris@0
|
433 $subject = $context['object'];
|
Chris@0
|
434 }
|
Chris@0
|
435 }
|
Chris@0
|
436
|
Chris@0
|
437 } else {
|
Chris@0
|
438 // Step 6
|
Chris@0
|
439 // If the current element does contain a @rel or @rev attribute, then the next step is to
|
Chris@0
|
440 // establish both a value for new subject and a value for current object resource:
|
Chris@0
|
441
|
Chris@0
|
442 $subject = $this->getUriAttribute($node, $context, 'about');
|
Chris@0
|
443
|
Chris@0
|
444 $object = $this->getUriAttribute(
|
Chris@0
|
445 $node,
|
Chris@0
|
446 $context,
|
Chris@0
|
447 array('resource', 'href', 'src')
|
Chris@0
|
448 );
|
Chris@0
|
449
|
Chris@0
|
450 if ($typeof) {
|
Chris@0
|
451 if (!$object and !$subject) {
|
Chris@0
|
452 $object = $this->graph->newBNodeId();
|
Chris@0
|
453 }
|
Chris@0
|
454 $typedResource = $subject ? $subject : $object;
|
Chris@0
|
455 }
|
Chris@0
|
456
|
Chris@0
|
457 # FIXME: if the element is the root element of the document
|
Chris@0
|
458 # then act as if there is an empty @about present
|
Chris@0
|
459 if (!$subject) {
|
Chris@0
|
460 $subject = $context['object'];
|
Chris@0
|
461 }
|
Chris@0
|
462
|
Chris@0
|
463 }
|
Chris@0
|
464
|
Chris@0
|
465 # FIXME: better place for this?
|
Chris@0
|
466 if ($typeof and $subject and !$typedResource) {
|
Chris@0
|
467 $typedResource = $subject;
|
Chris@0
|
468 }
|
Chris@0
|
469
|
Chris@0
|
470 // Step 7: Process @typeof if there is a subject
|
Chris@0
|
471 if ($typedResource) {
|
Chris@0
|
472 foreach ($this->processUriList($node, $context, $typeof) as $type) {
|
Chris@0
|
473 $this->addTriple(
|
Chris@0
|
474 $typedResource,
|
Chris@0
|
475 'rdf:type',
|
Chris@0
|
476 array('type' => 'uri', 'value' => $type)
|
Chris@0
|
477 );
|
Chris@0
|
478 }
|
Chris@0
|
479 }
|
Chris@0
|
480
|
Chris@0
|
481 // Step 8: Create new List mapping if the subject has changed
|
Chris@0
|
482 if ($subject and $subject !== $context['subject']) {
|
Chris@0
|
483 $listMapping = new StdClass();
|
Chris@0
|
484 } else {
|
Chris@0
|
485 $listMapping = $context['listMapping'];
|
Chris@0
|
486 }
|
Chris@0
|
487
|
Chris@0
|
488 // Step 9: Generate triples with given object
|
Chris@0
|
489 if ($subject and $object) {
|
Chris@0
|
490 foreach ($rels as $prop) {
|
Chris@0
|
491 $obj = array('type' => 'uri', 'value' => $object);
|
Chris@0
|
492 if ($node->hasAttribute('inlist')) {
|
Chris@0
|
493 $this->addToList($listMapping, $prop, $obj);
|
Chris@0
|
494 } else {
|
Chris@0
|
495 $this->addTriple($subject, $prop, $obj);
|
Chris@0
|
496 }
|
Chris@0
|
497 }
|
Chris@0
|
498
|
Chris@0
|
499 foreach ($revs as $prop) {
|
Chris@0
|
500 $this->addTriple(
|
Chris@0
|
501 $object,
|
Chris@0
|
502 $prop,
|
Chris@0
|
503 array('type' => 'uri', 'value' => $subject)
|
Chris@0
|
504 );
|
Chris@0
|
505 }
|
Chris@0
|
506 } elseif ($rels or $revs) {
|
Chris@0
|
507 // Step 10: Incomplete triples and bnode creation
|
Chris@0
|
508 $object = $this->graph->newBNodeId();
|
Chris@0
|
509 if ($rels) {
|
Chris@0
|
510 if ($node->hasAttribute('inlist')) {
|
Chris@0
|
511 foreach ($rels as $prop) {
|
Chris@0
|
512 # FIXME: add support for incomplete lists
|
Chris@0
|
513 if (!isset($listMapping->$prop)) {
|
Chris@0
|
514 $listMapping->$prop = array();
|
Chris@0
|
515 }
|
Chris@0
|
516 }
|
Chris@0
|
517 } else {
|
Chris@0
|
518 $incompleteRels = $rels;
|
Chris@0
|
519 if ($this->debug) {
|
Chris@0
|
520 print "Incomplete rels: ".implode(',', $rels)."\n";
|
Chris@0
|
521 }
|
Chris@0
|
522 }
|
Chris@0
|
523 }
|
Chris@0
|
524
|
Chris@0
|
525 if ($revs) {
|
Chris@0
|
526 $incompleteRevs = $revs;
|
Chris@0
|
527 if ($this->debug) {
|
Chris@0
|
528 print "Incomplete revs: ".implode(',', $revs)."\n";
|
Chris@0
|
529 }
|
Chris@0
|
530 }
|
Chris@0
|
531 }
|
Chris@0
|
532
|
Chris@0
|
533 // Step 11: establish current property value
|
Chris@0
|
534 if ($subject and $property) {
|
Chris@0
|
535 $value = array();
|
Chris@0
|
536
|
Chris@0
|
537 if ($datatype) {
|
Chris@0
|
538 $datatype = $this->processUri($node, $context, $datatype, true);
|
Chris@0
|
539 }
|
Chris@0
|
540
|
Chris@0
|
541 if ($content !== null) {
|
Chris@0
|
542 $value['value'] = $content;
|
Chris@0
|
543 } elseif ($node->hasAttribute('datetime')) {
|
Chris@0
|
544 $value['value'] = $node->getAttribute('datetime');
|
Chris@0
|
545 $datetime = true;
|
Chris@0
|
546 } elseif ($datatype === '') {
|
Chris@0
|
547 $value['value'] = $node->textContent;
|
Chris@0
|
548 } elseif ($datatype === self::RDF_XML_LITERAL) {
|
Chris@0
|
549 $value['value'] = '';
|
Chris@0
|
550 foreach ($node->childNodes as $child) {
|
Chris@0
|
551 $value['value'] .= $child->C14N();
|
Chris@0
|
552 }
|
Chris@0
|
553 } elseif (is_null($datatype) and empty($rels) and empty($revs)) {
|
Chris@0
|
554 $value['value'] = $this->getUriAttribute(
|
Chris@0
|
555 $node,
|
Chris@0
|
556 $context,
|
Chris@0
|
557 array('resource', 'href', 'src')
|
Chris@0
|
558 );
|
Chris@0
|
559
|
Chris@0
|
560 if ($value['value']) {
|
Chris@0
|
561 $value['type'] = 'uri';
|
Chris@0
|
562 }
|
Chris@0
|
563 }
|
Chris@0
|
564
|
Chris@0
|
565 if (empty($value['value']) and $typedResource and !$node->hasAttribute('about')) {
|
Chris@0
|
566 $value['type'] = 'uri';
|
Chris@0
|
567 $value['value'] = $typedResource;
|
Chris@0
|
568 }
|
Chris@0
|
569
|
Chris@0
|
570 if (empty($value['value'])) {
|
Chris@0
|
571 $value['value'] = $node->textContent;
|
Chris@0
|
572 }
|
Chris@0
|
573
|
Chris@0
|
574 if (empty($value['type'])) {
|
Chris@0
|
575 $value['type'] = 'literal';
|
Chris@0
|
576 if ($datatype) {
|
Chris@0
|
577 $value['datatype'] = $datatype;
|
Chris@0
|
578 } elseif (isset($datetime) or $node->nodeName === 'time') {
|
Chris@0
|
579 $value['datatype'] = $this->guessTimeDatatype($value['value']);
|
Chris@0
|
580 }
|
Chris@0
|
581
|
Chris@0
|
582 if (empty($value['datatype']) and $lang) {
|
Chris@0
|
583 $value['lang'] = $lang;
|
Chris@0
|
584 }
|
Chris@0
|
585 }
|
Chris@0
|
586
|
Chris@0
|
587 // Add each of the properties
|
Chris@0
|
588 foreach ($this->processUriList($node, $context, $property) as $prop) {
|
Chris@0
|
589 if ($node->hasAttribute('inlist')) {
|
Chris@0
|
590 $this->addToList($listMapping, $prop, $value);
|
Chris@0
|
591 } elseif ($subject) {
|
Chris@0
|
592 $this->addTriple($subject, $prop, $value);
|
Chris@0
|
593 }
|
Chris@0
|
594 }
|
Chris@0
|
595 }
|
Chris@0
|
596
|
Chris@0
|
597 // Step 12: Complete the incomplete triples from the evaluation context
|
Chris@0
|
598 if (!$skip and $subject and ($context['incompleteRels'] or $context['incompleteRevs'])) {
|
Chris@0
|
599 foreach ($context['incompleteRels'] as $prop) {
|
Chris@0
|
600 $this->addTriple(
|
Chris@0
|
601 $context['subject'],
|
Chris@0
|
602 $prop,
|
Chris@0
|
603 array('type' => 'uri', 'value' => $subject)
|
Chris@0
|
604 );
|
Chris@0
|
605 }
|
Chris@0
|
606
|
Chris@0
|
607 foreach ($context['incompleteRevs'] as $prop) {
|
Chris@0
|
608 $this->addTriple(
|
Chris@0
|
609 $subject,
|
Chris@0
|
610 $prop,
|
Chris@0
|
611 array('type' => 'uri', 'value' => $context['subject'])
|
Chris@0
|
612 );
|
Chris@0
|
613 }
|
Chris@0
|
614 }
|
Chris@0
|
615 }
|
Chris@0
|
616
|
Chris@0
|
617 // Step 13: create a new evaluation context and proceed recursively
|
Chris@0
|
618 if ($node->hasChildNodes()) {
|
Chris@0
|
619 if ($skip) {
|
Chris@0
|
620 $newContext = $context;
|
Chris@0
|
621 } else {
|
Chris@0
|
622 // Prepare a new evaluation context
|
Chris@0
|
623 $newContext = $context;
|
Chris@0
|
624 if ($object) {
|
Chris@0
|
625 $newContext['object'] = $object;
|
Chris@0
|
626 } elseif ($subject) {
|
Chris@0
|
627 $newContext['object'] = $subject;
|
Chris@0
|
628 } else {
|
Chris@0
|
629 $newContext['object'] = $context['subject'];
|
Chris@0
|
630 }
|
Chris@0
|
631 if ($subject) {
|
Chris@0
|
632 $newContext['subject'] = $subject;
|
Chris@0
|
633 }
|
Chris@0
|
634 $newContext['incompleteRels'] = $incompleteRels;
|
Chris@0
|
635 $newContext['incompleteRevs'] = $incompleteRevs;
|
Chris@0
|
636 if (isset($listMapping)) {
|
Chris@0
|
637 $newContext['listMapping'] = $listMapping;
|
Chris@0
|
638 }
|
Chris@0
|
639 }
|
Chris@0
|
640
|
Chris@0
|
641 // The language is always updated, even if skip is set
|
Chris@0
|
642 $newContext['lang'] = $lang;
|
Chris@0
|
643
|
Chris@0
|
644 foreach ($node->childNodes as $child) {
|
Chris@0
|
645 if ($child->nodeType === XML_ELEMENT_NODE) {
|
Chris@0
|
646 $this->processNode($child, $newContext, $depth+1);
|
Chris@0
|
647 }
|
Chris@0
|
648 }
|
Chris@0
|
649 }
|
Chris@0
|
650
|
Chris@0
|
651 // Step 14: create triples for lists
|
Chris@0
|
652 if (!empty($listMapping)) {
|
Chris@0
|
653 foreach ($listMapping as $prop => $list) {
|
Chris@0
|
654 if ($context['listMapping'] !== $listMapping) {
|
Chris@0
|
655 if ($this->debug) {
|
Chris@0
|
656 print "Need to create triples for $prop => ".count($list)." items\n";
|
Chris@0
|
657 }
|
Chris@0
|
658 $this->generateList($subject, $prop, $list);
|
Chris@0
|
659 }
|
Chris@0
|
660 }
|
Chris@0
|
661 }
|
Chris@0
|
662 }
|
Chris@0
|
663
|
Chris@0
|
664 /**
|
Chris@0
|
665 * Parse RDFa 1.1 into an EasyRdf_Graph
|
Chris@0
|
666 *
|
Chris@0
|
667 * @param object EasyRdf_Graph $graph the graph to load the data into
|
Chris@0
|
668 * @param string $data the RDF document data
|
Chris@0
|
669 * @param string $format the format of the input data
|
Chris@0
|
670 * @param string $baseUri the base URI of the data being parsed
|
Chris@0
|
671 * @return integer The number of triples added to the graph
|
Chris@0
|
672 */
|
Chris@0
|
673 public function parse($graph, $data, $format, $baseUri)
|
Chris@0
|
674 {
|
Chris@0
|
675 parent::checkParseParams($graph, $data, $format, $baseUri);
|
Chris@0
|
676
|
Chris@0
|
677 if ($format != 'rdfa') {
|
Chris@0
|
678 throw new EasyRdf_Exception(
|
Chris@0
|
679 "EasyRdf_Parser_Rdfa does not support: $format"
|
Chris@0
|
680 );
|
Chris@0
|
681 }
|
Chris@0
|
682
|
Chris@0
|
683 // Initialise evaluation context.
|
Chris@0
|
684 $context = $this->initialContext();
|
Chris@0
|
685
|
Chris@0
|
686 libxml_use_internal_errors(true);
|
Chris@0
|
687
|
Chris@0
|
688 // Parse the document into DOM
|
Chris@0
|
689 $doc = new DOMDocument();
|
Chris@0
|
690 // Attempt to parse the document as strict XML, and fall back to HTML
|
Chris@0
|
691 // if XML parsing fails.
|
Chris@0
|
692 if ($doc->loadXML($data, LIBXML_NONET)) {
|
Chris@0
|
693 if ($this->debug) {
|
Chris@0
|
694 print "Document was parsed as XML.";
|
Chris@0
|
695 }
|
Chris@0
|
696 // Collect all xmlns namespaces defined throughout the document.
|
Chris@0
|
697 $sxe = simplexml_import_dom($doc);
|
Chris@0
|
698 $context['xmlns'] = $sxe->getDocNamespaces(true);
|
Chris@0
|
699 unset($context['xmlns']['']);
|
Chris@0
|
700 } else {
|
Chris@0
|
701 $doc->loadHTML($data);
|
Chris@0
|
702 if ($this->debug) {
|
Chris@0
|
703 print "Document was parsed as HTML.";
|
Chris@0
|
704 }
|
Chris@0
|
705 }
|
Chris@0
|
706
|
Chris@0
|
707 // Establish the base for both XHTML and HTML documents.
|
Chris@0
|
708 $xpath = new DOMXPath($doc);
|
Chris@0
|
709 $xpath->registerNamespace('xh', "http://www.w3.org/1999/xhtml");
|
Chris@0
|
710 $nodeList = $xpath->query('/xh:html/xh:head/xh:base');
|
Chris@0
|
711 if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) {
|
Chris@0
|
712 $this->baseUri = new EasyRdf_ParsedUri($href);
|
Chris@0
|
713 }
|
Chris@0
|
714 $nodeList = $xpath->query('/html/head/base');
|
Chris@0
|
715 if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) {
|
Chris@0
|
716 $this->baseUri = new EasyRdf_ParsedUri($href);
|
Chris@0
|
717 }
|
Chris@0
|
718
|
Chris@0
|
719 // Remove the fragment from the base URI
|
Chris@0
|
720 $this->baseUri->setFragment(null);
|
Chris@0
|
721
|
Chris@0
|
722 // Recursively process XML nodes
|
Chris@0
|
723 $this->processNode($doc, $context);
|
Chris@0
|
724
|
Chris@0
|
725 return $this->tripleCount;
|
Chris@0
|
726 }
|
Chris@0
|
727 }
|