danielebarchiesi@4: danielebarchiesi@4: * @license http://arc.semsol.org/license danielebarchiesi@4: * @homepage danielebarchiesi@4: * @package ARC2 danielebarchiesi@4: * @version 2010-11-16 danielebarchiesi@4: */ danielebarchiesi@4: danielebarchiesi@4: ARC2::inc('RDFParser'); danielebarchiesi@4: danielebarchiesi@4: class ARC2_JSONParser extends ARC2_RDFParser { danielebarchiesi@4: danielebarchiesi@4: function __construct($a, &$caller) { danielebarchiesi@4: parent::__construct($a, $caller); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function __init() { danielebarchiesi@4: parent::__init(); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function x($re, $v, $options = 'si') { danielebarchiesi@4: while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */ danielebarchiesi@4: $v = $m[2]; danielebarchiesi@4: } danielebarchiesi@4: $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code; danielebarchiesi@4: return ARC2::x($re, $v, $options); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function parse($path, $data = '') { danielebarchiesi@4: $this->state = 0; danielebarchiesi@4: /* reader */ danielebarchiesi@4: if (!$this->v('reader')) { danielebarchiesi@4: ARC2::inc('Reader'); danielebarchiesi@4: $this->reader = new ARC2_Reader($this->a, $this); danielebarchiesi@4: } danielebarchiesi@4: $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1'); danielebarchiesi@4: $this->reader->activate($path, $data); danielebarchiesi@4: $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; danielebarchiesi@4: /* parse */ danielebarchiesi@4: $doc = ''; danielebarchiesi@4: while ($d = $this->reader->readStream()) { danielebarchiesi@4: $doc .= $d; danielebarchiesi@4: } danielebarchiesi@4: $this->reader->closeStream(); danielebarchiesi@4: unset($this->reader); danielebarchiesi@4: $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc); danielebarchiesi@4: $this->unparsed_code = $doc; danielebarchiesi@4: list($this->struct, $rest) = $this->extractObject($doc); danielebarchiesi@4: return $this->done(); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function extractObject($v) { danielebarchiesi@4: if (function_exists('json_decode')) return array(json_decode($v, 1), ''); danielebarchiesi@4: $r = array(); danielebarchiesi@4: /* sub-object */ danielebarchiesi@4: if ($sub_r = $this->x('\{', $v)) { danielebarchiesi@4: $v = $sub_r[1]; danielebarchiesi@4: while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) { danielebarchiesi@4: $r[$sub_r['key']] = $sub_r['value']; danielebarchiesi@4: } danielebarchiesi@4: if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1]; danielebarchiesi@4: } danielebarchiesi@4: /* sub-list */ danielebarchiesi@4: elseif ($sub_r = $this->x('\[', $v)) { danielebarchiesi@4: $v = $sub_r[1]; danielebarchiesi@4: while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) { danielebarchiesi@4: $r[] = $sub_r; danielebarchiesi@4: $v = ltrim($v, ','); danielebarchiesi@4: } danielebarchiesi@4: if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1]; danielebarchiesi@4: } danielebarchiesi@4: /* sub-value */ danielebarchiesi@4: elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) { danielebarchiesi@4: $r = $sub_r; danielebarchiesi@4: } danielebarchiesi@4: return array($r, $v); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function extractEntry($v) { danielebarchiesi@4: if ($r = $this->x('\,', $v)) $v = $r[1]; danielebarchiesi@4: /* k */ danielebarchiesi@4: if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) { danielebarchiesi@4: $k = $r[1]; danielebarchiesi@4: $sub_v = $r[2]; danielebarchiesi@4: if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) { danielebarchiesi@4: return array( danielebarchiesi@4: array('key' => $k, 'value' => $sub_r), danielebarchiesi@4: $sub_v danielebarchiesi@4: ); danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: return array(0, $v); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function extractValue($v) { danielebarchiesi@4: if ($r = $this->x('\,', $v)) $v = $r[1]; danielebarchiesi@4: if ($sub_r = $this->x('null', $v)) { danielebarchiesi@4: return array(null, $sub_r[1]); danielebarchiesi@4: } danielebarchiesi@4: if ($sub_r = $this->x('(true|false)', $v)) { danielebarchiesi@4: return array($sub_r[1], $sub_r[2]); danielebarchiesi@4: } danielebarchiesi@4: if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) { danielebarchiesi@4: return array($sub_r[1], $sub_r[2]); danielebarchiesi@4: } danielebarchiesi@4: if ($sub_r = $this->x('\"', $v)) { danielebarchiesi@4: $rest = $sub_r[1]; danielebarchiesi@4: if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) { danielebarchiesi@4: $val = $m[1]; danielebarchiesi@4: /* unescape chars (single-byte) */ danielebarchiesi@4: $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val); danielebarchiesi@4: //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val); danielebarchiesi@4: /* other escaped chars */ danielebarchiesi@4: $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/'); danielebarchiesi@4: $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/"); danielebarchiesi@4: $val = str_replace($from, $to, $val); danielebarchiesi@4: return array($val, $m[2]); danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: return array(false, $v); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: function getObject() { danielebarchiesi@4: return $this->v('struct', array()); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function getTriples() { danielebarchiesi@4: return $this->v('triples', array()); danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function countTriples() { danielebarchiesi@4: return $this->t_count; danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') { danielebarchiesi@4: $o = $this->toUTF8($o); danielebarchiesi@4: //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n"); danielebarchiesi@4: $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); danielebarchiesi@4: if ($this->skip_dupes) { danielebarchiesi@4: $h = md5(serialize($t)); danielebarchiesi@4: if (!isset($this->added_triples[$h])) { danielebarchiesi@4: $this->triples[$this->t_count] = $t; danielebarchiesi@4: $this->t_count++; danielebarchiesi@4: $this->added_triples[$h] = true; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: else { danielebarchiesi@4: $this->triples[$this->t_count] = $t; danielebarchiesi@4: $this->t_count++; danielebarchiesi@4: } danielebarchiesi@4: } danielebarchiesi@4: danielebarchiesi@4: /* */ danielebarchiesi@4: danielebarchiesi@4: }