Mercurial > hg > rr-repo
diff sites/all/libraries/ARC2/arc/parsers/ARC2_JSONParser.php @ 4:ce11bbd8f642
added modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Thu, 19 Sep 2013 10:38:44 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/all/libraries/ARC2/arc/parsers/ARC2_JSONParser.php Thu Sep 19 10:38:44 2013 +0100 @@ -0,0 +1,165 @@ +<?php +/** + * ARC2 JSON Parser + * Does not extract triples, needs sub-class for RDF extraction + * + * @author Benjamin Nowack <bnowack@semsol.com> + * @license http://arc.semsol.org/license + * @homepage <http://arc.semsol.org/> + * @package ARC2 + * @version 2010-11-16 +*/ + +ARC2::inc('RDFParser'); + +class ARC2_JSONParser extends ARC2_RDFParser { + + function __construct($a, &$caller) { + parent::__construct($a, $caller); + } + + function __init() { + parent::__init(); + } + + /* */ + + function x($re, $v, $options = 'si') { + while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */ + $v = $m[2]; + } + $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code; + return ARC2::x($re, $v, $options); + } + + function parse($path, $data = '') { + $this->state = 0; + /* reader */ + if (!$this->v('reader')) { + ARC2::inc('Reader'); + $this->reader = new ARC2_Reader($this->a, $this); + } + $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1'); + $this->reader->activate($path, $data); + $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; + /* parse */ + $doc = ''; + while ($d = $this->reader->readStream()) { + $doc .= $d; + } + $this->reader->closeStream(); + unset($this->reader); + $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc); + $this->unparsed_code = $doc; + list($this->struct, $rest) = $this->extractObject($doc); + return $this->done(); + } + + /* */ + + function extractObject($v) { + if (function_exists('json_decode')) return array(json_decode($v, 1), ''); + $r = array(); + /* sub-object */ + if ($sub_r = $this->x('\{', $v)) { + $v = $sub_r[1]; + while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) { + $r[$sub_r['key']] = $sub_r['value']; + } + if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1]; + } + /* sub-list */ + elseif ($sub_r = $this->x('\[', $v)) { + $v = $sub_r[1]; + while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) { + $r[] = $sub_r; + $v = ltrim($v, ','); + } + if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1]; + } + /* sub-value */ + elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) { + $r = $sub_r; + } + return array($r, $v); + } + + function extractEntry($v) { + if ($r = $this->x('\,', $v)) $v = $r[1]; + /* k */ + if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) { + $k = $r[1]; + $sub_v = $r[2]; + if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) { + return array( + array('key' => $k, 'value' => $sub_r), + $sub_v + ); + } + } + return array(0, $v); + } + + function extractValue($v) { + if ($r = $this->x('\,', $v)) $v = $r[1]; + if ($sub_r = $this->x('null', $v)) { + return array(null, $sub_r[1]); + } + if ($sub_r = $this->x('(true|false)', $v)) { + return array($sub_r[1], $sub_r[2]); + } + if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) { + return array($sub_r[1], $sub_r[2]); + } + if ($sub_r = $this->x('\"', $v)) { + $rest = $sub_r[1]; + if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) { + $val = $m[1]; + /* unescape chars (single-byte) */ + $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val); + //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val); + /* other escaped chars */ + $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/'); + $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/"); + $val = str_replace($from, $to, $val); + return array($val, $m[2]); + } + } + return array(false, $v); + } + + /* */ + + function getObject() { + return $this->v('struct', array()); + } + + function getTriples() { + return $this->v('triples', array()); + } + + function countTriples() { + return $this->t_count; + } + + function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') { + $o = $this->toUTF8($o); + //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n"); + $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); + if ($this->skip_dupes) { + $h = md5(serialize($t)); + if (!isset($this->added_triples[$h])) { + $this->triples[$this->t_count] = $t; + $this->t_count++; + $this->added_triples[$h] = true; + } + } + else { + $this->triples[$this->t_count] = $t; + $this->t_count++; + } + } + + /* */ + +}