annotate sites/all/libraries/ARC2/arc/parsers/ARC2_SPOGParser.php @ 4:ce11bbd8f642

added modules
author danieleb <danielebarchiesi@me.com>
date Thu, 19 Sep 2013 10:38:44 +0100
parents
children
rev   line source
danielebarchiesi@4 1 <?php
danielebarchiesi@4 2 /**
danielebarchiesi@4 3 * ARC2 streaming SPOG parser
danielebarchiesi@4 4 *
danielebarchiesi@4 5 * @author Benjamin Nowack
danielebarchiesi@4 6 * @license <http://arc.semsol.org/license>
danielebarchiesi@4 7 * @homepage <http://arc.semsol.org/>
danielebarchiesi@4 8 * @package ARC2
danielebarchiesi@4 9 * @version 2010-11-16
danielebarchiesi@4 10 */
danielebarchiesi@4 11
danielebarchiesi@4 12 ARC2::inc('RDFParser');
danielebarchiesi@4 13
danielebarchiesi@4 14 class ARC2_SPOGParser extends ARC2_RDFParser {
danielebarchiesi@4 15
danielebarchiesi@4 16 function __construct($a, &$caller) {
danielebarchiesi@4 17 parent::__construct($a, $caller);
danielebarchiesi@4 18 }
danielebarchiesi@4 19
danielebarchiesi@4 20 function __init() {/* reader */
danielebarchiesi@4 21 parent::__init();
danielebarchiesi@4 22 $this->encoding = $this->v('encoding', false, $this->a);
danielebarchiesi@4 23 $this->xml = 'http://www.w3.org/XML/1998/namespace';
danielebarchiesi@4 24 $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
danielebarchiesi@4 25 $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf');
danielebarchiesi@4 26 $this->target_encoding = '';
danielebarchiesi@4 27 }
danielebarchiesi@4 28
danielebarchiesi@4 29 /* */
danielebarchiesi@4 30
danielebarchiesi@4 31 function parse($path, $data = '', $iso_fallback = false) {
danielebarchiesi@4 32 $this->state = 0;
danielebarchiesi@4 33 /* reader */
danielebarchiesi@4 34 if (!$this->v('reader')) {
danielebarchiesi@4 35 ARC2::inc('Reader');
danielebarchiesi@4 36 $this->reader = new ARC2_Reader($this->a, $this);
danielebarchiesi@4 37 }
danielebarchiesi@4 38 $this->reader->setAcceptHeader('Accept: sparql-results+xml; q=0.9, */*; q=0.1');
danielebarchiesi@4 39 $this->reader->activate($path, $data);
danielebarchiesi@4 40 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
danielebarchiesi@4 41 /* xml parser */
danielebarchiesi@4 42 $this->initXMLParser();
danielebarchiesi@4 43 /* parse */
danielebarchiesi@4 44 $first = true;
danielebarchiesi@4 45 while ($d = $this->reader->readStream()) {
danielebarchiesi@4 46 if ($iso_fallback && $first) {
danielebarchiesi@4 47 $d = '<?xml version="1.0" encoding="ISO-8859-1"?>' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d);
danielebarchiesi@4 48 $first = false;
danielebarchiesi@4 49 }
danielebarchiesi@4 50 if (!xml_parse($this->xml_parser, $d, false)) {
danielebarchiesi@4 51 $error_str = xml_error_string(xml_get_error_code($this->xml_parser));
danielebarchiesi@4 52 $line = xml_get_current_line_number($this->xml_parser);
danielebarchiesi@4 53 $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')';
danielebarchiesi@4 54 $this->tmp_error .= $d . urlencode($d);
danielebarchiesi@4 55 if (0 && !$iso_fallback && preg_match("/Invalid character/i", $error_str)) {
danielebarchiesi@4 56 xml_parser_free($this->xml_parser);
danielebarchiesi@4 57 unset($this->xml_parser);
danielebarchiesi@4 58 $this->reader->closeStream();
danielebarchiesi@4 59 $this->__init();
danielebarchiesi@4 60 $this->encoding = 'ISO-8859-1';
danielebarchiesi@4 61 unset($this->xml_parser);
danielebarchiesi@4 62 unset($this->reader);
danielebarchiesi@4 63 return $this->parse($path, $data, true);
danielebarchiesi@4 64 }
danielebarchiesi@4 65 else {
danielebarchiesi@4 66 return $this->addError($this->tmp_error);
danielebarchiesi@4 67 }
danielebarchiesi@4 68 }
danielebarchiesi@4 69 }
danielebarchiesi@4 70 $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING);
danielebarchiesi@4 71 xml_parser_free($this->xml_parser);
danielebarchiesi@4 72 $this->reader->closeStream();
danielebarchiesi@4 73 unset($this->reader);
danielebarchiesi@4 74 return $this->done();
danielebarchiesi@4 75 }
danielebarchiesi@4 76
danielebarchiesi@4 77 /* */
danielebarchiesi@4 78
danielebarchiesi@4 79 function initXMLParser() {
danielebarchiesi@4 80 if (!isset($this->xml_parser)) {
danielebarchiesi@4 81 $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8';
danielebarchiesi@4 82 $parser = xml_parser_create($enc);
danielebarchiesi@4 83 xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0);
danielebarchiesi@4 84 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
danielebarchiesi@4 85 xml_set_element_handler($parser, 'open', 'close');
danielebarchiesi@4 86 xml_set_character_data_handler($parser, 'cdata');
danielebarchiesi@4 87 xml_set_start_namespace_decl_handler($parser, 'nsDecl');
danielebarchiesi@4 88 xml_set_object($parser, $this);
danielebarchiesi@4 89 $this->xml_parser = $parser;
danielebarchiesi@4 90 }
danielebarchiesi@4 91 }
danielebarchiesi@4 92
danielebarchiesi@4 93 /* */
danielebarchiesi@4 94
danielebarchiesi@4 95 function getEncoding($src = 'config') {
danielebarchiesi@4 96 if ($src == 'parser') {
danielebarchiesi@4 97 return $this->target_encoding;
danielebarchiesi@4 98 }
danielebarchiesi@4 99 elseif (($src == 'config') && $this->encoding) {
danielebarchiesi@4 100 return $this->encoding;
danielebarchiesi@4 101 }
danielebarchiesi@4 102 return $this->reader->getEncoding();
danielebarchiesi@4 103 return 'UTF-8';
danielebarchiesi@4 104 }
danielebarchiesi@4 105
danielebarchiesi@4 106 /* */
danielebarchiesi@4 107
danielebarchiesi@4 108 function getTriples() {
danielebarchiesi@4 109 return $this->v('triples', array());
danielebarchiesi@4 110 }
danielebarchiesi@4 111
danielebarchiesi@4 112 function countTriples() {
danielebarchiesi@4 113 return $this->t_count;
danielebarchiesi@4 114 }
danielebarchiesi@4 115
danielebarchiesi@4 116 function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '', $g = '') {
danielebarchiesi@4 117 if (!($s && $p && $o)) return 0;
danielebarchiesi@4 118 //echo "-----\nadding $s / $p / $o\n-----\n";
danielebarchiesi@4 119 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang, 'g' => $g);
danielebarchiesi@4 120 if ($this->skip_dupes) {
danielebarchiesi@4 121 $h = md5(serialize($t));
danielebarchiesi@4 122 if (!isset($this->added_triples[$h])) {
danielebarchiesi@4 123 $this->triples[$this->t_count] = $t;
danielebarchiesi@4 124 $this->t_count++;
danielebarchiesi@4 125 $this->added_triples[$h] = true;
danielebarchiesi@4 126 }
danielebarchiesi@4 127 }
danielebarchiesi@4 128 else {
danielebarchiesi@4 129 $this->triples[$this->t_count] = $t;
danielebarchiesi@4 130 $this->t_count++;
danielebarchiesi@4 131 }
danielebarchiesi@4 132 }
danielebarchiesi@4 133
danielebarchiesi@4 134 /* */
danielebarchiesi@4 135
danielebarchiesi@4 136 function open($p, $t, $a) {
danielebarchiesi@4 137 $this->state = $t;
danielebarchiesi@4 138 if ($t == 'result') {
danielebarchiesi@4 139 $this->t = array();
danielebarchiesi@4 140 }
danielebarchiesi@4 141 elseif ($t == 'binding') {
danielebarchiesi@4 142 $this->binding = $a['name'];
danielebarchiesi@4 143 $this->t[$this->binding] = '';
danielebarchiesi@4 144 }
danielebarchiesi@4 145 elseif ($t == 'literal') {
danielebarchiesi@4 146 $this->t[$this->binding . '_dt'] = $this->v('datatype', '', $a);
danielebarchiesi@4 147 $this->t[$this->binding . '_lang'] = $this->v('xml:lang', '', $a);
danielebarchiesi@4 148 $this->t[$this->binding . '_type'] = 'literal';
danielebarchiesi@4 149 }
danielebarchiesi@4 150 elseif ($t == 'uri') {
danielebarchiesi@4 151 $this->t[$this->binding . '_type'] = 'uri';
danielebarchiesi@4 152 }
danielebarchiesi@4 153 elseif ($t == 'bnode') {
danielebarchiesi@4 154 $this->t[$this->binding . '_type'] = 'bnode';
danielebarchiesi@4 155 $this->t[$this->binding] = '_:';
danielebarchiesi@4 156 }
danielebarchiesi@4 157 }
danielebarchiesi@4 158
danielebarchiesi@4 159 function close($p, $t) {
danielebarchiesi@4 160 $this->prev_state = $this->state;
danielebarchiesi@4 161 $this->state = '';
danielebarchiesi@4 162 if ($t == 'result') {
danielebarchiesi@4 163 $this->addT(
danielebarchiesi@4 164 $this->v('s', '', $this->t),
danielebarchiesi@4 165 $this->v('p', '', $this->t),
danielebarchiesi@4 166 $this->v('o', '', $this->t),
danielebarchiesi@4 167 $this->v('s_type', '', $this->t),
danielebarchiesi@4 168 $this->v('o_type', '', $this->t),
danielebarchiesi@4 169 $this->v('o_dt', '', $this->t),
danielebarchiesi@4 170 $this->v('o_lang', '', $this->t),
danielebarchiesi@4 171 $this->v('g', '', $this->t)
danielebarchiesi@4 172 );
danielebarchiesi@4 173 }
danielebarchiesi@4 174 }
danielebarchiesi@4 175
danielebarchiesi@4 176 function cData($p, $d) {
danielebarchiesi@4 177 if (in_array($this->state, array('uri', 'bnode', 'literal'))) {
danielebarchiesi@4 178 $this->t[$this->binding] .= $d;
danielebarchiesi@4 179 }
danielebarchiesi@4 180 }
danielebarchiesi@4 181
danielebarchiesi@4 182 function nsDecl($p, $prf, $uri) {
danielebarchiesi@4 183 $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf;
danielebarchiesi@4 184 }
danielebarchiesi@4 185
danielebarchiesi@4 186 /* */
danielebarchiesi@4 187
danielebarchiesi@4 188 }