annotate vendor/easyrdf/easyrdf/lib/EasyRdf/Parser/Rdfa.php @ 9:1fc0ff908d1f

Add another data file
author Chris Cannam
date Mon, 05 Feb 2018 12:34:32 +0000
parents 4c8ae668cc8c
children
rev   line source
Chris@0 1 <?php
Chris@0 2
Chris@0 3 /**
Chris@0 4 * EasyRdf
Chris@0 5 *
Chris@0 6 * LICENSE
Chris@0 7 *
Chris@0 8 * Copyright (c) 2012-2013 Nicholas J Humfrey.
Chris@0 9 * All rights reserved.
Chris@0 10 *
Chris@0 11 * Redistribution and use in source and binary forms, with or without
Chris@0 12 * modification, are permitted provided that the following conditions are met:
Chris@0 13 * 1. Redistributions of source code must retain the above copyright
Chris@0 14 * notice, this list of conditions and the following disclaimer.
Chris@0 15 * 2. Redistributions in binary form must reproduce the above copyright notice,
Chris@0 16 * this list of conditions and the following disclaimer in the documentation
Chris@0 17 * and/or other materials provided with the distribution.
Chris@0 18 * 3. The name of the author 'Nicholas J Humfrey" may be used to endorse or
Chris@0 19 * promote products derived from this software without specific prior
Chris@0 20 * written permission.
Chris@0 21 *
Chris@0 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Chris@0 23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Chris@0 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Chris@0 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Chris@0 26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Chris@0 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Chris@0 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Chris@0 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Chris@0 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Chris@0 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Chris@0 32 * POSSIBILITY OF SUCH DAMAGE.
Chris@0 33 *
Chris@0 34 * @package EasyRdf
Chris@0 35 * @copyright Copyright (c) 2009-2013 Nicholas J Humfrey
Chris@0 36 * Copyright (c) 1997-2006 Aduna (http://www.aduna-software.com/)
Chris@0 37 * @license http://www.opensource.org/licenses/bsd-license.php
Chris@0 38 */
Chris@0 39
Chris@0 40 /**
Chris@0 41 * Class to parse RDFa 1.1 with no external dependancies.
Chris@0 42 *
Chris@0 43 * http://www.w3.org/TR/rdfa-core/
Chris@0 44 *
Chris@0 45 * @package EasyRdf
Chris@0 46 * @copyright Copyright (c) 2012-2013 Nicholas J Humfrey
Chris@0 47 * @license http://www.opensource.org/licenses/bsd-license.php
Chris@0 48 */
Chris@0 49 class EasyRdf_Parser_Rdfa extends EasyRdf_Parser
Chris@0 50 {
Chris@0 51 const XML_NS = 'http://www.w3.org/XML/1998/namespace';
Chris@0 52 const RDF_XML_LITERAL = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral';
Chris@0 53 const TERM_REGEXP = '/^([a-zA-Z_])([0-9a-zA-Z_\.-]*)$/';
Chris@0 54
Chris@0 55 public $debug = false;
Chris@0 56
Chris@0 57 /**
Chris@0 58 * Constructor
Chris@0 59 *
Chris@0 60 * @return object EasyRdf_Parser_Rdfa
Chris@0 61 */
Chris@0 62 public function __construct()
Chris@0 63 {
Chris@0 64 }
Chris@0 65
Chris@0 66 protected function addTriple($resource, $property, $value)
Chris@0 67 {
Chris@0 68 if ($this->debug) {
Chris@0 69 print "Adding triple: $resource -> $property -> ".$value['type'].':'.$value['value']."\n";
Chris@0 70 }
Chris@0 71 $count = $this->graph->add($resource, $property, $value);
Chris@0 72 $this->tripleCount += $count;
Chris@0 73 return $count;
Chris@0 74 }
Chris@0 75
Chris@0 76 protected function generateList($subject, $property, $list)
Chris@0 77 {
Chris@0 78 $current = $subject;
Chris@0 79 $prop = $property;
Chris@0 80
Chris@0 81 // Output a blank node for each item in the list
Chris@0 82 foreach ($list as $item) {
Chris@0 83 $newNode = $this->graph->newBNodeId();
Chris@0 84 $this->addTriple($current, $prop, array('type' => 'bnode', 'value' => $newNode));
Chris@0 85 $this->addTriple($newNode, 'rdf:first', $item);
Chris@0 86
Chris@0 87 $current = $newNode;
Chris@0 88 $prop = 'rdf:rest';
Chris@0 89 }
Chris@0 90
Chris@0 91 // Finally, terminate the list
Chris@0 92 $this->addTriple(
Chris@0 93 $current,
Chris@0 94 $prop,
Chris@0 95 array('type' => 'uri', 'value' => EasyRdf_Namespace::expand('rdf:nil'))
Chris@0 96 );
Chris@0 97 }
Chris@0 98
Chris@0 99 protected function addToList($listMapping, $property, $value)
Chris@0 100 {
Chris@0 101 if ($this->debug) {
Chris@0 102 print "Adding to list: $property -> ".$value['type'].':'.$value['value']."\n";
Chris@0 103 }
Chris@0 104
Chris@0 105 // Create property in the list mapping if it doesn't already exist
Chris@0 106 if (!isset($listMapping->$property)) {
Chris@0 107 $listMapping->$property = array();
Chris@0 108 }
Chris@0 109 array_push($listMapping->$property, $value);
Chris@0 110 }
Chris@0 111
Chris@0 112 protected function printNode($node, $depth)
Chris@0 113 {
Chris@0 114 $indent = str_repeat(' ', $depth);
Chris@0 115 print $indent;
Chris@0 116 switch($node->nodeType) {
Chris@0 117 case XML_ELEMENT_NODE:
Chris@0 118 print 'node';
Chris@0 119 break;
Chris@0 120 case XML_ATTRIBUTE_NODE:
Chris@0 121 print 'attr';
Chris@0 122 break;
Chris@0 123 case XML_TEXT_NODE:
Chris@0 124 print 'text';
Chris@0 125 break;
Chris@0 126 case XML_CDATA_SECTION_NODE:
Chris@0 127 print 'cdata';
Chris@0 128 break;
Chris@0 129 case XML_ENTITY_REF_NODE:
Chris@0 130 print 'entref';
Chris@0 131 break;
Chris@0 132 case XML_ENTITY_NODE:
Chris@0 133 print 'entity';
Chris@0 134 break;
Chris@0 135 case XML_PI_NODE:
Chris@0 136 print 'pi';
Chris@0 137 break;
Chris@0 138 case XML_COMMENT_NODE:
Chris@0 139 print 'comment';
Chris@0 140 break;
Chris@0 141 case XML_DOCUMENT_NODE:
Chris@0 142 print 'doc';
Chris@0 143 break;
Chris@0 144 case XML_DOCUMENT_TYPE_NODE:
Chris@0 145 print 'doctype';
Chris@0 146 break;
Chris@0 147 case XML_HTML_DOCUMENT_NODE:
Chris@0 148 print 'html';
Chris@0 149 break;
Chris@0 150 default:
Chris@0 151 throw new EasyRdf_Exception("unknown node type: ".$node->nodeType);
Chris@0 152 break;
Chris@0 153 }
Chris@0 154 print ' '.$node->nodeName."\n";
Chris@0 155
Chris@0 156 if ($node->hasAttributes()) {
Chris@0 157 foreach ($node->attributes as $attr) {
Chris@0 158 print $indent.' '.$attr->nodeName." => ".$attr->nodeValue."\n";
Chris@0 159 }
Chris@0 160 }
Chris@0 161 }
Chris@0 162
Chris@0 163 protected function guessTimeDatatype($value)
Chris@0 164 {
Chris@0 165 if (preg_match('/^-?\d{4}-\d{2}-\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
Chris@0 166 return 'http://www.w3.org/2001/XMLSchema#date';
Chris@0 167 } elseif (preg_match('/^\d{2}:\d{2}:\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
Chris@0 168 return 'http://www.w3.org/2001/XMLSchema#time';
Chris@0 169 } elseif (preg_match('/^-?\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(Z|[\-\+]\d{2}:\d{2})?$/', $value)) {
Chris@0 170 return 'http://www.w3.org/2001/XMLSchema#dateTime';
Chris@0 171 } elseif (preg_match('/^P(\d+Y)?(\d+M)?(\d+D)?T?(\d+H)?(\d+M)?(\d+S)?$/', $value)) {
Chris@0 172 return 'http://www.w3.org/2001/XMLSchema#duration';
Chris@0 173 } elseif (preg_match('/^\d{4}$/', $value)) {
Chris@0 174 return 'http://www.w3.org/2001/XMLSchema#gYear';
Chris@0 175 } elseif (preg_match('/^\d{4}-\d{2}$/', $value)) {
Chris@0 176 return 'http://www.w3.org/2001/XMLSchema#gYearMonth';
Chris@0 177 } else {
Chris@0 178 return null;
Chris@0 179 }
Chris@0 180 }
Chris@0 181
Chris@0 182 protected function initialContext()
Chris@0 183 {
Chris@0 184 $context = array(
Chris@0 185 'prefixes' => array(),
Chris@0 186 'vocab' => null,
Chris@0 187 'subject' => $this->baseUri,
Chris@0 188 'property' => null,
Chris@0 189 'object' => null,
Chris@0 190 'terms' => array(),
Chris@0 191 'incompleteRels' => array(),
Chris@0 192 'incompleteRevs' => array(),
Chris@0 193 'listMapping' => null,
Chris@0 194 'lang' => null,
Chris@0 195 'path' => '',
Chris@0 196 'xmlns' => array(),
Chris@0 197 );
Chris@0 198
Chris@0 199 // Set the default prefix
Chris@0 200 $context['prefixes'][''] = 'http://www.w3.org/1999/xhtml/vocab#';
Chris@0 201
Chris@0 202 // RDFa 1.1 default term mapping
Chris@0 203 $context['terms']['describedby'] = 'http://www.w3.org/2007/05/powder-s#describedby';
Chris@0 204 $context['terms']['license'] = 'http://www.w3.org/1999/xhtml/vocab#license';
Chris@0 205 $context['terms']['role'] = 'http://www.w3.org/1999/xhtml/vocab#role';
Chris@0 206
Chris@0 207 return $context;
Chris@0 208 }
Chris@0 209
Chris@0 210 protected function expandCurie($node, &$context, $value)
Chris@0 211 {
Chris@0 212 if (preg_match('/^(\w*?):(.*)$/', $value, $matches)) {
Chris@0 213 list (, $prefix, $local) = $matches;
Chris@0 214 $prefix = strtolower($prefix);
Chris@0 215 if ($prefix === '_') {
Chris@0 216 // It is a bnode
Chris@0 217 return $this->remapBnode(substr($value, 2));
Chris@0 218 } elseif (empty($prefix) and $context['vocab']) {
Chris@0 219 // Empty prefix
Chris@0 220 return $context['vocab'] . $local;
Chris@0 221 } elseif (isset($context['prefixes'][$prefix])) {
Chris@0 222 return $context['prefixes'][$prefix] . $local;
Chris@0 223 } elseif ($uri = $node->lookupNamespaceURI($prefix)) {
Chris@0 224 return $uri . $local;
Chris@0 225 } elseif (!empty($prefix) and $uri = EasyRdf_Namespace::get($prefix)) {
Chris@0 226 // Expand using well-known prefixes
Chris@0 227 return $uri . $local;
Chris@0 228 }
Chris@0 229 }
Chris@0 230 }
Chris@0 231
Chris@0 232 protected function processUri($node, &$context, $value, $isProp = false)
Chris@0 233 {
Chris@0 234 if (preg_match('/^\[(.*)\]$/', $value, $matches)) {
Chris@0 235 // Safe CURIE
Chris@0 236 return $this->expandCurie($node, $context, $matches[1]);
Chris@0 237 } elseif (preg_match(self::TERM_REGEXP, $value) and $isProp) {
Chris@0 238 $term = strtolower($value);
Chris@0 239 if ($context['vocab']) {
Chris@0 240 return $context['vocab'] . $value;
Chris@0 241 } elseif (isset($context['terms'][$term])) {
Chris@0 242 return $context['terms'][$term];
Chris@0 243 }
Chris@0 244 } elseif (substr($value, 0, 2) === '_:' and $isProp) {
Chris@0 245 return null;
Chris@0 246 } else {
Chris@0 247 $uri = $this->expandCurie($node, $context, $value);
Chris@0 248 if ($uri) {
Chris@0 249 return $uri;
Chris@0 250 } else {
Chris@0 251 $parsed = new EasyRdf_ParsedUri($value);
Chris@0 252 if ($parsed->isAbsolute()) {
Chris@0 253 return $value;
Chris@0 254 } elseif ($isProp) {
Chris@0 255 // Properties can't be relative URIs
Chris@0 256 return null;
Chris@0 257 } elseif ($this->baseUri) {
Chris@0 258 return $this->baseUri->resolve($parsed);
Chris@0 259 }
Chris@0 260 }
Chris@0 261 }
Chris@0 262 }
Chris@0 263
Chris@0 264 protected function processUriList($node, $context, $values)
Chris@0 265 {
Chris@0 266 if (!$values) {
Chris@0 267 return array();
Chris@0 268 }
Chris@0 269
Chris@0 270 $uris = array();
Chris@0 271 foreach (preg_split('/\s+/', $values) as $value) {
Chris@0 272 $uri = $this->processUri($node, $context, $value, true);
Chris@0 273 if ($uri) {
Chris@0 274 array_push($uris, $uri);
Chris@0 275 }
Chris@0 276 }
Chris@0 277 return $uris;
Chris@0 278 }
Chris@0 279
Chris@0 280 protected function getUriAttribute($node, &$context, $attributes)
Chris@0 281 {
Chris@0 282 if (!is_array($attributes)) {
Chris@0 283 $attributes = array($attributes);
Chris@0 284 }
Chris@0 285
Chris@0 286 // Find the first attribute that returns a valid URI
Chris@0 287 foreach ($attributes as $attribute) {
Chris@0 288 if ($node->hasAttribute($attribute)) {
Chris@0 289 $value = $node->getAttribute($attribute);
Chris@0 290 $uri = $this->processUri($node, $context, $value);
Chris@0 291 if ($uri) {
Chris@0 292 return $uri;
Chris@0 293 }
Chris@0 294 }
Chris@0 295 }
Chris@0 296 }
Chris@0 297
Chris@0 298 protected function processNode($node, &$context, $depth = 1)
Chris@0 299 {
Chris@0 300 if ($this->debug) {
Chris@0 301 $this->printNode($node, $depth);
Chris@0 302 }
Chris@0 303
Chris@0 304 // Step 1: establish local variables
Chris@0 305 $skip = false;
Chris@0 306 $subject = null;
Chris@0 307 $typedResource = null;
Chris@0 308 $object = null;
Chris@0 309 $rels = array();
Chris@0 310 $revs = array();
Chris@0 311 $lang = $context['lang'];
Chris@0 312 $incompleteRels = array();
Chris@0 313 $incompleteRevs = array();
Chris@0 314
Chris@0 315 if ($node->nodeType === XML_ELEMENT_NODE) {
Chris@0 316 $context['path'] .= '/' . $node->nodeName;
Chris@0 317
Chris@0 318 $content = $node->hasAttribute('content') ? $node->getAttribute('content') : null;
Chris@0 319 $datatype = $node->hasAttribute('datatype') ? $node->getAttribute('datatype') : null;
Chris@0 320 $property = $node->getAttribute('property') ? $node->getAttribute('property') : null;
Chris@0 321 $typeof = $node->getAttribute('typeof') ? $node->getAttribute('typeof') : null;
Chris@0 322
Chris@0 323 // Step 2: Default vocabulary
Chris@0 324 if ($node->hasAttribute('vocab')) {
Chris@0 325 $context['vocab'] = $node->getAttribute('vocab');
Chris@0 326 if ($context['vocab']) {
Chris@0 327 $this->addTriple(
Chris@0 328 $this->baseUri,
Chris@0 329 'rdfa:usesVocabulary',
Chris@0 330 array('type' => 'uri', 'value' => $context['vocab'])
Chris@0 331 );
Chris@0 332 }
Chris@0 333 }
Chris@0 334
Chris@0 335 // Step 3: Set prefix mappings
Chris@0 336 // Support for deprecated xmlns if present in document
Chris@0 337 foreach ($context['xmlns'] as $prefix => $uri) {
Chris@0 338 if ($node->hasAttribute('xmlns:' . $prefix)) {
Chris@0 339 $context['prefixes'][$prefix] = $node->getAttribute('xmlns:' . $prefix);
Chris@0 340 if ($this->debug) {
Chris@0 341 print "Prefix (xmlns): $prefix => $uri\n";
Chris@0 342 }
Chris@0 343 }
Chris@0 344 }
Chris@0 345 if ($node->hasAttribute('prefix')) {
Chris@0 346 $mappings = preg_split('/\s+/', $node->getAttribute('prefix'));
Chris@0 347 while (count($mappings)) {
Chris@0 348 $prefix = strtolower(array_shift($mappings));
Chris@0 349 $uri = array_shift($mappings);
Chris@0 350
Chris@0 351 if (substr($prefix, -1) === ':') {
Chris@0 352 $prefix = substr($prefix, 0, -1);
Chris@0 353 } else {
Chris@0 354 continue;
Chris@0 355 }
Chris@0 356
Chris@0 357 if ($prefix === '_') {
Chris@0 358 continue;
Chris@0 359 } elseif (!empty($prefix)) {
Chris@0 360 $context['prefixes'][$prefix] = $uri;
Chris@0 361 if ($this->debug) {
Chris@0 362 print "Prefix: $prefix => $uri\n";
Chris@0 363 }
Chris@0 364 }
Chris@0 365 }
Chris@0 366 }
Chris@0 367
Chris@0 368 // Step 4
Chris@0 369 if ($node->hasAttributeNS(self::XML_NS, 'lang')) {
Chris@0 370 $lang = $node->getAttributeNS(self::XML_NS, 'lang');
Chris@0 371 } elseif ($node->hasAttribute('lang')) {
Chris@0 372 $lang = $node->getAttribute('lang');
Chris@0 373 }
Chris@0 374
Chris@0 375 // HTML+RDFa 1.1: ignore rel and rev unless they contain CURIEs.
Chris@0 376 foreach (array('rel', 'rev') as $attr) {
Chris@0 377 if ($node->hasAttribute('property') and $node->hasAttribute($attr)) {
Chris@0 378 // Quick check in case there are no CURIEs to deal with.
Chris@0 379 if (strpos($node->getAttribute($attr), ':') === false) {
Chris@0 380 $node->removeAttribute($attr);
Chris@0 381 } else {
Chris@0 382 // Only keep CURIEs.
Chris@0 383 $curies = array();
Chris@0 384 foreach (preg_split('/\s+/', $node->getAttribute($attr)) as $token) {
Chris@0 385 if (strpos($token, ':')) {
Chris@0 386 $curies[] = $token;
Chris@0 387 }
Chris@0 388 }
Chris@0 389 $node->setAttribute($attr, implode(' ', $curies));
Chris@0 390 }
Chris@0 391 }
Chris@0 392 }
Chris@0 393
Chris@0 394 $rels = $this->processUriList($node, $context, $node->getAttribute('rel'));
Chris@0 395 $revs = $this->processUriList($node, $context, $node->getAttribute('rev'));
Chris@0 396
Chris@0 397 if (!$node->hasAttribute('rel') and !$node->hasAttribute('rev')) {
Chris@0 398 // Step 5: Establish a new subject if no rel/rev
Chris@0 399 if ($property and is_null($content) and is_null($datatype)) {
Chris@0 400 $subject = $this->getUriAttribute($node, $context, 'about');
Chris@0 401 if ($typeof and !$subject) {
Chris@0 402 $typedResource = $this->getUriAttribute(
Chris@0 403 $node,
Chris@0 404 $context,
Chris@0 405 array('resource', 'href', 'src')
Chris@0 406 );
Chris@0 407 if (!$typedResource) {
Chris@0 408 $typedResource = $this->graph->newBNodeId();
Chris@0 409 }
Chris@0 410 $object = $typedResource;
Chris@0 411 }
Chris@0 412 } else {
Chris@0 413 $subject = $this->getUriAttribute(
Chris@0 414 $node,
Chris@0 415 $context,
Chris@0 416 array('about', 'resource', 'href', 'src')
Chris@0 417 );
Chris@0 418 }
Chris@0 419
Chris@0 420 // Establish a subject if there isn't one
Chris@0 421 # FIXME: refactor this
Chris@0 422 if (is_null($subject)) {
Chris@0 423 if ($context['path'] === '/html/head') {
Chris@0 424 $subject = $context['object'];
Chris@0 425 } elseif ($depth <= 2) {
Chris@0 426 $subject = $this->baseUri;
Chris@0 427 } elseif ($typeof and !$property) {
Chris@0 428 $subject = $this->graph->newBNodeId();
Chris@0 429 } else {
Chris@0 430 if (!$property) {
Chris@0 431 $skip = true;
Chris@0 432 }
Chris@0 433 $subject = $context['object'];
Chris@0 434 }
Chris@0 435 }
Chris@0 436
Chris@0 437 } else {
Chris@0 438 // Step 6
Chris@0 439 // If the current element does contain a @rel or @rev attribute, then the next step is to
Chris@0 440 // establish both a value for new subject and a value for current object resource:
Chris@0 441
Chris@0 442 $subject = $this->getUriAttribute($node, $context, 'about');
Chris@0 443
Chris@0 444 $object = $this->getUriAttribute(
Chris@0 445 $node,
Chris@0 446 $context,
Chris@0 447 array('resource', 'href', 'src')
Chris@0 448 );
Chris@0 449
Chris@0 450 if ($typeof) {
Chris@0 451 if (!$object and !$subject) {
Chris@0 452 $object = $this->graph->newBNodeId();
Chris@0 453 }
Chris@0 454 $typedResource = $subject ? $subject : $object;
Chris@0 455 }
Chris@0 456
Chris@0 457 # FIXME: if the element is the root element of the document
Chris@0 458 # then act as if there is an empty @about present
Chris@0 459 if (!$subject) {
Chris@0 460 $subject = $context['object'];
Chris@0 461 }
Chris@0 462
Chris@0 463 }
Chris@0 464
Chris@0 465 # FIXME: better place for this?
Chris@0 466 if ($typeof and $subject and !$typedResource) {
Chris@0 467 $typedResource = $subject;
Chris@0 468 }
Chris@0 469
Chris@0 470 // Step 7: Process @typeof if there is a subject
Chris@0 471 if ($typedResource) {
Chris@0 472 foreach ($this->processUriList($node, $context, $typeof) as $type) {
Chris@0 473 $this->addTriple(
Chris@0 474 $typedResource,
Chris@0 475 'rdf:type',
Chris@0 476 array('type' => 'uri', 'value' => $type)
Chris@0 477 );
Chris@0 478 }
Chris@0 479 }
Chris@0 480
Chris@0 481 // Step 8: Create new List mapping if the subject has changed
Chris@0 482 if ($subject and $subject !== $context['subject']) {
Chris@0 483 $listMapping = new StdClass();
Chris@0 484 } else {
Chris@0 485 $listMapping = $context['listMapping'];
Chris@0 486 }
Chris@0 487
Chris@0 488 // Step 9: Generate triples with given object
Chris@0 489 if ($subject and $object) {
Chris@0 490 foreach ($rels as $prop) {
Chris@0 491 $obj = array('type' => 'uri', 'value' => $object);
Chris@0 492 if ($node->hasAttribute('inlist')) {
Chris@0 493 $this->addToList($listMapping, $prop, $obj);
Chris@0 494 } else {
Chris@0 495 $this->addTriple($subject, $prop, $obj);
Chris@0 496 }
Chris@0 497 }
Chris@0 498
Chris@0 499 foreach ($revs as $prop) {
Chris@0 500 $this->addTriple(
Chris@0 501 $object,
Chris@0 502 $prop,
Chris@0 503 array('type' => 'uri', 'value' => $subject)
Chris@0 504 );
Chris@0 505 }
Chris@0 506 } elseif ($rels or $revs) {
Chris@0 507 // Step 10: Incomplete triples and bnode creation
Chris@0 508 $object = $this->graph->newBNodeId();
Chris@0 509 if ($rels) {
Chris@0 510 if ($node->hasAttribute('inlist')) {
Chris@0 511 foreach ($rels as $prop) {
Chris@0 512 # FIXME: add support for incomplete lists
Chris@0 513 if (!isset($listMapping->$prop)) {
Chris@0 514 $listMapping->$prop = array();
Chris@0 515 }
Chris@0 516 }
Chris@0 517 } else {
Chris@0 518 $incompleteRels = $rels;
Chris@0 519 if ($this->debug) {
Chris@0 520 print "Incomplete rels: ".implode(',', $rels)."\n";
Chris@0 521 }
Chris@0 522 }
Chris@0 523 }
Chris@0 524
Chris@0 525 if ($revs) {
Chris@0 526 $incompleteRevs = $revs;
Chris@0 527 if ($this->debug) {
Chris@0 528 print "Incomplete revs: ".implode(',', $revs)."\n";
Chris@0 529 }
Chris@0 530 }
Chris@0 531 }
Chris@0 532
Chris@0 533 // Step 11: establish current property value
Chris@0 534 if ($subject and $property) {
Chris@0 535 $value = array();
Chris@0 536
Chris@0 537 if ($datatype) {
Chris@0 538 $datatype = $this->processUri($node, $context, $datatype, true);
Chris@0 539 }
Chris@0 540
Chris@0 541 if ($content !== null) {
Chris@0 542 $value['value'] = $content;
Chris@0 543 } elseif ($node->hasAttribute('datetime')) {
Chris@0 544 $value['value'] = $node->getAttribute('datetime');
Chris@0 545 $datetime = true;
Chris@0 546 } elseif ($datatype === '') {
Chris@0 547 $value['value'] = $node->textContent;
Chris@0 548 } elseif ($datatype === self::RDF_XML_LITERAL) {
Chris@0 549 $value['value'] = '';
Chris@0 550 foreach ($node->childNodes as $child) {
Chris@0 551 $value['value'] .= $child->C14N();
Chris@0 552 }
Chris@0 553 } elseif (is_null($datatype) and empty($rels) and empty($revs)) {
Chris@0 554 $value['value'] = $this->getUriAttribute(
Chris@0 555 $node,
Chris@0 556 $context,
Chris@0 557 array('resource', 'href', 'src')
Chris@0 558 );
Chris@0 559
Chris@0 560 if ($value['value']) {
Chris@0 561 $value['type'] = 'uri';
Chris@0 562 }
Chris@0 563 }
Chris@0 564
Chris@0 565 if (empty($value['value']) and $typedResource and !$node->hasAttribute('about')) {
Chris@0 566 $value['type'] = 'uri';
Chris@0 567 $value['value'] = $typedResource;
Chris@0 568 }
Chris@0 569
Chris@0 570 if (empty($value['value'])) {
Chris@0 571 $value['value'] = $node->textContent;
Chris@0 572 }
Chris@0 573
Chris@0 574 if (empty($value['type'])) {
Chris@0 575 $value['type'] = 'literal';
Chris@0 576 if ($datatype) {
Chris@0 577 $value['datatype'] = $datatype;
Chris@0 578 } elseif (isset($datetime) or $node->nodeName === 'time') {
Chris@0 579 $value['datatype'] = $this->guessTimeDatatype($value['value']);
Chris@0 580 }
Chris@0 581
Chris@0 582 if (empty($value['datatype']) and $lang) {
Chris@0 583 $value['lang'] = $lang;
Chris@0 584 }
Chris@0 585 }
Chris@0 586
Chris@0 587 // Add each of the properties
Chris@0 588 foreach ($this->processUriList($node, $context, $property) as $prop) {
Chris@0 589 if ($node->hasAttribute('inlist')) {
Chris@0 590 $this->addToList($listMapping, $prop, $value);
Chris@0 591 } elseif ($subject) {
Chris@0 592 $this->addTriple($subject, $prop, $value);
Chris@0 593 }
Chris@0 594 }
Chris@0 595 }
Chris@0 596
Chris@0 597 // Step 12: Complete the incomplete triples from the evaluation context
Chris@0 598 if (!$skip and $subject and ($context['incompleteRels'] or $context['incompleteRevs'])) {
Chris@0 599 foreach ($context['incompleteRels'] as $prop) {
Chris@0 600 $this->addTriple(
Chris@0 601 $context['subject'],
Chris@0 602 $prop,
Chris@0 603 array('type' => 'uri', 'value' => $subject)
Chris@0 604 );
Chris@0 605 }
Chris@0 606
Chris@0 607 foreach ($context['incompleteRevs'] as $prop) {
Chris@0 608 $this->addTriple(
Chris@0 609 $subject,
Chris@0 610 $prop,
Chris@0 611 array('type' => 'uri', 'value' => $context['subject'])
Chris@0 612 );
Chris@0 613 }
Chris@0 614 }
Chris@0 615 }
Chris@0 616
Chris@0 617 // Step 13: create a new evaluation context and proceed recursively
Chris@0 618 if ($node->hasChildNodes()) {
Chris@0 619 if ($skip) {
Chris@0 620 $newContext = $context;
Chris@0 621 } else {
Chris@0 622 // Prepare a new evaluation context
Chris@0 623 $newContext = $context;
Chris@0 624 if ($object) {
Chris@0 625 $newContext['object'] = $object;
Chris@0 626 } elseif ($subject) {
Chris@0 627 $newContext['object'] = $subject;
Chris@0 628 } else {
Chris@0 629 $newContext['object'] = $context['subject'];
Chris@0 630 }
Chris@0 631 if ($subject) {
Chris@0 632 $newContext['subject'] = $subject;
Chris@0 633 }
Chris@0 634 $newContext['incompleteRels'] = $incompleteRels;
Chris@0 635 $newContext['incompleteRevs'] = $incompleteRevs;
Chris@0 636 if (isset($listMapping)) {
Chris@0 637 $newContext['listMapping'] = $listMapping;
Chris@0 638 }
Chris@0 639 }
Chris@0 640
Chris@0 641 // The language is always updated, even if skip is set
Chris@0 642 $newContext['lang'] = $lang;
Chris@0 643
Chris@0 644 foreach ($node->childNodes as $child) {
Chris@0 645 if ($child->nodeType === XML_ELEMENT_NODE) {
Chris@0 646 $this->processNode($child, $newContext, $depth+1);
Chris@0 647 }
Chris@0 648 }
Chris@0 649 }
Chris@0 650
Chris@0 651 // Step 14: create triples for lists
Chris@0 652 if (!empty($listMapping)) {
Chris@0 653 foreach ($listMapping as $prop => $list) {
Chris@0 654 if ($context['listMapping'] !== $listMapping) {
Chris@0 655 if ($this->debug) {
Chris@0 656 print "Need to create triples for $prop => ".count($list)." items\n";
Chris@0 657 }
Chris@0 658 $this->generateList($subject, $prop, $list);
Chris@0 659 }
Chris@0 660 }
Chris@0 661 }
Chris@0 662 }
Chris@0 663
Chris@0 664 /**
Chris@0 665 * Parse RDFa 1.1 into an EasyRdf_Graph
Chris@0 666 *
Chris@0 667 * @param object EasyRdf_Graph $graph the graph to load the data into
Chris@0 668 * @param string $data the RDF document data
Chris@0 669 * @param string $format the format of the input data
Chris@0 670 * @param string $baseUri the base URI of the data being parsed
Chris@0 671 * @return integer The number of triples added to the graph
Chris@0 672 */
Chris@0 673 public function parse($graph, $data, $format, $baseUri)
Chris@0 674 {
Chris@0 675 parent::checkParseParams($graph, $data, $format, $baseUri);
Chris@0 676
Chris@0 677 if ($format != 'rdfa') {
Chris@0 678 throw new EasyRdf_Exception(
Chris@0 679 "EasyRdf_Parser_Rdfa does not support: $format"
Chris@0 680 );
Chris@0 681 }
Chris@0 682
Chris@0 683 // Initialise evaluation context.
Chris@0 684 $context = $this->initialContext();
Chris@0 685
Chris@0 686 libxml_use_internal_errors(true);
Chris@0 687
Chris@0 688 // Parse the document into DOM
Chris@0 689 $doc = new DOMDocument();
Chris@0 690 // Attempt to parse the document as strict XML, and fall back to HTML
Chris@0 691 // if XML parsing fails.
Chris@0 692 if ($doc->loadXML($data, LIBXML_NONET)) {
Chris@0 693 if ($this->debug) {
Chris@0 694 print "Document was parsed as XML.";
Chris@0 695 }
Chris@0 696 // Collect all xmlns namespaces defined throughout the document.
Chris@0 697 $sxe = simplexml_import_dom($doc);
Chris@0 698 $context['xmlns'] = $sxe->getDocNamespaces(true);
Chris@0 699 unset($context['xmlns']['']);
Chris@0 700 } else {
Chris@0 701 $doc->loadHTML($data);
Chris@0 702 if ($this->debug) {
Chris@0 703 print "Document was parsed as HTML.";
Chris@0 704 }
Chris@0 705 }
Chris@0 706
Chris@0 707 // Establish the base for both XHTML and HTML documents.
Chris@0 708 $xpath = new DOMXPath($doc);
Chris@0 709 $xpath->registerNamespace('xh', "http://www.w3.org/1999/xhtml");
Chris@0 710 $nodeList = $xpath->query('/xh:html/xh:head/xh:base');
Chris@0 711 if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) {
Chris@0 712 $this->baseUri = new EasyRdf_ParsedUri($href);
Chris@0 713 }
Chris@0 714 $nodeList = $xpath->query('/html/head/base');
Chris@0 715 if ($node = $nodeList->item(0) and $href = $node->getAttribute('href')) {
Chris@0 716 $this->baseUri = new EasyRdf_ParsedUri($href);
Chris@0 717 }
Chris@0 718
Chris@0 719 // Remove the fragment from the base URI
Chris@0 720 $this->baseUri->setFragment(null);
Chris@0 721
Chris@0 722 // Recursively process XML nodes
Chris@0 723 $this->processNode($doc, $context);
Chris@0 724
Chris@0 725 return $this->tripleCount;
Chris@0 726 }
Chris@0 727 }