danielebarchiesi@4: danielebarchiesi@4: * @homepage danielebarchiesi@4: * @package ARC2 danielebarchiesi@4: * @version 2010-11-16 danielebarchiesi@4: */ danielebarchiesi@4: danielebarchiesi@4: ARC2::inc('RDFParser'); danielebarchiesi@4: danielebarchiesi@4: class ARC2_SPOGParser extends ARC2_RDFParser { danielebarchiesi@4: danielebarchiesi@4: function __construct($a, &$caller) { danielebarchiesi@4: parent::__construct($a, $caller); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function __init() {/* reader */ danielebarchiesi@4: parent::__init(); danielebarchiesi@4: $this->encoding = $this->v('encoding', false, $this->a); danielebarchiesi@4: $this->xml = 'http://www.w3.org/XML/1998/namespace'; danielebarchiesi@4: $this->rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; danielebarchiesi@4: $this->nsp = array($this->xml => 'xml', $this->rdf => 'rdf'); danielebarchiesi@4: $this->target_encoding = ''; danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function parse($path, $data = '', $iso_fallback = false) { danielebarchiesi@4: $this->state = 0; danielebarchiesi@4: /* reader */ danielebarchiesi@4: if (!$this->v('reader')) { danielebarchiesi@4: ARC2::inc('Reader'); danielebarchiesi@4: $this->reader = new ARC2_Reader($this->a, $this); danielebarchiesi@4: } danielebarchiesi@4: $this->reader->setAcceptHeader('Accept: sparql-results+xml; q=0.9, */*; q=0.1'); danielebarchiesi@4: $this->reader->activate($path, $data); danielebarchiesi@4: $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; danielebarchiesi@4: /* xml parser */ danielebarchiesi@4: $this->initXMLParser(); danielebarchiesi@4: /* parse */ danielebarchiesi@4: $first = true; danielebarchiesi@4: while ($d = $this->reader->readStream()) { danielebarchiesi@4: if ($iso_fallback && $first) { danielebarchiesi@4: $d = '' . "\n" . preg_replace('/^\<\?xml [^\>]+\?\>\s*/s', '', $d); danielebarchiesi@4: $first = false; danielebarchiesi@4: } danielebarchiesi@4: if (!xml_parse($this->xml_parser, $d, false)) { danielebarchiesi@4: $error_str = xml_error_string(xml_get_error_code($this->xml_parser)); danielebarchiesi@4: $line = xml_get_current_line_number($this->xml_parser); danielebarchiesi@4: $this->tmp_error = 'XML error: "' . $error_str . '" at line ' . $line . ' (parsing as ' . $this->getEncoding() . ')'; danielebarchiesi@4: $this->tmp_error .= $d . urlencode($d); danielebarchiesi@4: if (0 && !$iso_fallback && preg_match("/Invalid character/i", $error_str)) { danielebarchiesi@4: xml_parser_free($this->xml_parser); danielebarchiesi@4: unset($this->xml_parser); danielebarchiesi@4: $this->reader->closeStream(); danielebarchiesi@4: $this->__init(); danielebarchiesi@4: $this->encoding = 'ISO-8859-1'; danielebarchiesi@4: unset($this->xml_parser); danielebarchiesi@4: unset($this->reader); danielebarchiesi@4: return $this->parse($path, $data, true); danielebarchiesi@4: } danielebarchiesi@4: else { danielebarchiesi@4: return $this->addError($this->tmp_error); danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: $this->target_encoding = xml_parser_get_option($this->xml_parser, XML_OPTION_TARGET_ENCODING); danielebarchiesi@4: xml_parser_free($this->xml_parser); danielebarchiesi@4: $this->reader->closeStream(); danielebarchiesi@4: unset($this->reader); danielebarchiesi@4: return $this->done(); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function initXMLParser() { danielebarchiesi@4: if (!isset($this->xml_parser)) { danielebarchiesi@4: $enc = preg_match('/^(utf\-8|iso\-8859\-1|us\-ascii)$/i', $this->getEncoding(), $m) ? $m[1] : 'UTF-8'; danielebarchiesi@4: $parser = xml_parser_create($enc); danielebarchiesi@4: xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 0); danielebarchiesi@4: xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); danielebarchiesi@4: xml_set_element_handler($parser, 'open', 'close'); danielebarchiesi@4: xml_set_character_data_handler($parser, 'cdata'); danielebarchiesi@4: xml_set_start_namespace_decl_handler($parser, 'nsDecl'); danielebarchiesi@4: xml_set_object($parser, $this); danielebarchiesi@4: $this->xml_parser = $parser; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function getEncoding($src = 'config') { danielebarchiesi@4: if ($src == 'parser') { danielebarchiesi@4: return $this->target_encoding; danielebarchiesi@4: } danielebarchiesi@4: elseif (($src == 'config') && $this->encoding) { danielebarchiesi@4: return $this->encoding; danielebarchiesi@4: } danielebarchiesi@4: return $this->reader->getEncoding(); danielebarchiesi@4: return 'UTF-8'; danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function getTriples() { danielebarchiesi@4: return $this->v('triples', array()); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function countTriples() { danielebarchiesi@4: return $this->t_count; danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '', $g = '') { danielebarchiesi@4: if (!($s && $p && $o)) return 0; danielebarchiesi@4: //echo "-----\nadding $s / $p / $o\n-----\n"; danielebarchiesi@4: $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang, 'g' => $g); danielebarchiesi@4: if ($this->skip_dupes) { danielebarchiesi@4: $h = md5(serialize($t)); danielebarchiesi@4: if (!isset($this->added_triples[$h])) { danielebarchiesi@4: $this->triples[$this->t_count] = $t; danielebarchiesi@4: $this->t_count++; danielebarchiesi@4: $this->added_triples[$h] = true; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: else { danielebarchiesi@4: $this->triples[$this->t_count] = $t; danielebarchiesi@4: $this->t_count++; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function open($p, $t, $a) { danielebarchiesi@4: $this->state = $t; danielebarchiesi@4: if ($t == 'result') { danielebarchiesi@4: $this->t = array(); danielebarchiesi@4: } danielebarchiesi@4: elseif ($t == 'binding') { danielebarchiesi@4: $this->binding = $a['name']; danielebarchiesi@4: $this->t[$this->binding] = ''; danielebarchiesi@4: } danielebarchiesi@4: elseif ($t == 'literal') { danielebarchiesi@4: $this->t[$this->binding . '_dt'] = $this->v('datatype', '', $a); danielebarchiesi@4: $this->t[$this->binding . '_lang'] = $this->v('xml:lang', '', $a); danielebarchiesi@4: $this->t[$this->binding . '_type'] = 'literal'; danielebarchiesi@4: } danielebarchiesi@4: elseif ($t == 'uri') { danielebarchiesi@4: $this->t[$this->binding . '_type'] = 'uri'; danielebarchiesi@4: } danielebarchiesi@4: elseif ($t == 'bnode') { danielebarchiesi@4: $this->t[$this->binding . '_type'] = 'bnode'; danielebarchiesi@4: $this->t[$this->binding] = '_:'; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function close($p, $t) { danielebarchiesi@4: $this->prev_state = $this->state; danielebarchiesi@4: $this->state = ''; danielebarchiesi@4: if ($t == 'result') { danielebarchiesi@4: $this->addT( danielebarchiesi@4: $this->v('s', '', $this->t), danielebarchiesi@4: $this->v('p', '', $this->t), danielebarchiesi@4: $this->v('o', '', $this->t), danielebarchiesi@4: $this->v('s_type', '', $this->t), danielebarchiesi@4: $this->v('o_type', '', $this->t), danielebarchiesi@4: $this->v('o_dt', '', $this->t), danielebarchiesi@4: $this->v('o_lang', '', $this->t), danielebarchiesi@4: $this->v('g', '', $this->t) danielebarchiesi@4: ); danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function cData($p, $d) { danielebarchiesi@4: if (in_array($this->state, array('uri', 'bnode', 'literal'))) { danielebarchiesi@4: $this->t[$this->binding] .= $d; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function nsDecl($p, $prf, $uri) { danielebarchiesi@4: $this->nsp[$uri] = isset($this->nsp[$uri]) ? $this->nsp[$uri] : $prf; danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: }