annotate sites/all/libraries/ARC2/arc/parsers/ARC2_JSONParser.php @ 4:ce11bbd8f642

added modules
author danieleb <danielebarchiesi@me.com>
date Thu, 19 Sep 2013 10:38:44 +0100
parents
children
rev   line source
danielebarchiesi@4 1 <?php
danielebarchiesi@4 2 /**
danielebarchiesi@4 3 * ARC2 JSON Parser
danielebarchiesi@4 4 * Does not extract triples, needs sub-class for RDF extraction
danielebarchiesi@4 5 *
danielebarchiesi@4 6 * @author Benjamin Nowack <bnowack@semsol.com>
danielebarchiesi@4 7 * @license http://arc.semsol.org/license
danielebarchiesi@4 8 * @homepage <http://arc.semsol.org/>
danielebarchiesi@4 9 * @package ARC2
danielebarchiesi@4 10 * @version 2010-11-16
danielebarchiesi@4 11 */
danielebarchiesi@4 12
danielebarchiesi@4 13 ARC2::inc('RDFParser');
danielebarchiesi@4 14
danielebarchiesi@4 15 class ARC2_JSONParser extends ARC2_RDFParser {
danielebarchiesi@4 16
danielebarchiesi@4 17 function __construct($a, &$caller) {
danielebarchiesi@4 18 parent::__construct($a, $caller);
danielebarchiesi@4 19 }
danielebarchiesi@4 20
danielebarchiesi@4 21 function __init() {
danielebarchiesi@4 22 parent::__init();
danielebarchiesi@4 23 }
danielebarchiesi@4 24
danielebarchiesi@4 25 /* */
danielebarchiesi@4 26
danielebarchiesi@4 27 function x($re, $v, $options = 'si') {
danielebarchiesi@4 28 while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */
danielebarchiesi@4 29 $v = $m[2];
danielebarchiesi@4 30 }
danielebarchiesi@4 31 $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code;
danielebarchiesi@4 32 return ARC2::x($re, $v, $options);
danielebarchiesi@4 33 }
danielebarchiesi@4 34
danielebarchiesi@4 35 function parse($path, $data = '') {
danielebarchiesi@4 36 $this->state = 0;
danielebarchiesi@4 37 /* reader */
danielebarchiesi@4 38 if (!$this->v('reader')) {
danielebarchiesi@4 39 ARC2::inc('Reader');
danielebarchiesi@4 40 $this->reader = new ARC2_Reader($this->a, $this);
danielebarchiesi@4 41 }
danielebarchiesi@4 42 $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1');
danielebarchiesi@4 43 $this->reader->activate($path, $data);
danielebarchiesi@4 44 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base;
danielebarchiesi@4 45 /* parse */
danielebarchiesi@4 46 $doc = '';
danielebarchiesi@4 47 while ($d = $this->reader->readStream()) {
danielebarchiesi@4 48 $doc .= $d;
danielebarchiesi@4 49 }
danielebarchiesi@4 50 $this->reader->closeStream();
danielebarchiesi@4 51 unset($this->reader);
danielebarchiesi@4 52 $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc);
danielebarchiesi@4 53 $this->unparsed_code = $doc;
danielebarchiesi@4 54 list($this->struct, $rest) = $this->extractObject($doc);
danielebarchiesi@4 55 return $this->done();
danielebarchiesi@4 56 }
danielebarchiesi@4 57
danielebarchiesi@4 58 /* */
danielebarchiesi@4 59
danielebarchiesi@4 60 function extractObject($v) {
danielebarchiesi@4 61 if (function_exists('json_decode')) return array(json_decode($v, 1), '');
danielebarchiesi@4 62 $r = array();
danielebarchiesi@4 63 /* sub-object */
danielebarchiesi@4 64 if ($sub_r = $this->x('\{', $v)) {
danielebarchiesi@4 65 $v = $sub_r[1];
danielebarchiesi@4 66 while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) {
danielebarchiesi@4 67 $r[$sub_r['key']] = $sub_r['value'];
danielebarchiesi@4 68 }
danielebarchiesi@4 69 if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1];
danielebarchiesi@4 70 }
danielebarchiesi@4 71 /* sub-list */
danielebarchiesi@4 72 elseif ($sub_r = $this->x('\[', $v)) {
danielebarchiesi@4 73 $v = $sub_r[1];
danielebarchiesi@4 74 while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) {
danielebarchiesi@4 75 $r[] = $sub_r;
danielebarchiesi@4 76 $v = ltrim($v, ',');
danielebarchiesi@4 77 }
danielebarchiesi@4 78 if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1];
danielebarchiesi@4 79 }
danielebarchiesi@4 80 /* sub-value */
danielebarchiesi@4 81 elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) {
danielebarchiesi@4 82 $r = $sub_r;
danielebarchiesi@4 83 }
danielebarchiesi@4 84 return array($r, $v);
danielebarchiesi@4 85 }
danielebarchiesi@4 86
danielebarchiesi@4 87 function extractEntry($v) {
danielebarchiesi@4 88 if ($r = $this->x('\,', $v)) $v = $r[1];
danielebarchiesi@4 89 /* k */
danielebarchiesi@4 90 if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) {
danielebarchiesi@4 91 $k = $r[1];
danielebarchiesi@4 92 $sub_v = $r[2];
danielebarchiesi@4 93 if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) {
danielebarchiesi@4 94 return array(
danielebarchiesi@4 95 array('key' => $k, 'value' => $sub_r),
danielebarchiesi@4 96 $sub_v
danielebarchiesi@4 97 );
danielebarchiesi@4 98 }
danielebarchiesi@4 99 }
danielebarchiesi@4 100 return array(0, $v);
danielebarchiesi@4 101 }
danielebarchiesi@4 102
danielebarchiesi@4 103 function extractValue($v) {
danielebarchiesi@4 104 if ($r = $this->x('\,', $v)) $v = $r[1];
danielebarchiesi@4 105 if ($sub_r = $this->x('null', $v)) {
danielebarchiesi@4 106 return array(null, $sub_r[1]);
danielebarchiesi@4 107 }
danielebarchiesi@4 108 if ($sub_r = $this->x('(true|false)', $v)) {
danielebarchiesi@4 109 return array($sub_r[1], $sub_r[2]);
danielebarchiesi@4 110 }
danielebarchiesi@4 111 if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) {
danielebarchiesi@4 112 return array($sub_r[1], $sub_r[2]);
danielebarchiesi@4 113 }
danielebarchiesi@4 114 if ($sub_r = $this->x('\"', $v)) {
danielebarchiesi@4 115 $rest = $sub_r[1];
danielebarchiesi@4 116 if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) {
danielebarchiesi@4 117 $val = $m[1];
danielebarchiesi@4 118 /* unescape chars (single-byte) */
danielebarchiesi@4 119 $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val);
danielebarchiesi@4 120 //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val);
danielebarchiesi@4 121 /* other escaped chars */
danielebarchiesi@4 122 $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/');
danielebarchiesi@4 123 $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/");
danielebarchiesi@4 124 $val = str_replace($from, $to, $val);
danielebarchiesi@4 125 return array($val, $m[2]);
danielebarchiesi@4 126 }
danielebarchiesi@4 127 }
danielebarchiesi@4 128 return array(false, $v);
danielebarchiesi@4 129 }
danielebarchiesi@4 130
danielebarchiesi@4 131 /* */
danielebarchiesi@4 132
danielebarchiesi@4 133 function getObject() {
danielebarchiesi@4 134 return $this->v('struct', array());
danielebarchiesi@4 135 }
danielebarchiesi@4 136
danielebarchiesi@4 137 function getTriples() {
danielebarchiesi@4 138 return $this->v('triples', array());
danielebarchiesi@4 139 }
danielebarchiesi@4 140
danielebarchiesi@4 141 function countTriples() {
danielebarchiesi@4 142 return $this->t_count;
danielebarchiesi@4 143 }
danielebarchiesi@4 144
danielebarchiesi@4 145 function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') {
danielebarchiesi@4 146 $o = $this->toUTF8($o);
danielebarchiesi@4 147 //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n");
danielebarchiesi@4 148 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang);
danielebarchiesi@4 149 if ($this->skip_dupes) {
danielebarchiesi@4 150 $h = md5(serialize($t));
danielebarchiesi@4 151 if (!isset($this->added_triples[$h])) {
danielebarchiesi@4 152 $this->triples[$this->t_count] = $t;
danielebarchiesi@4 153 $this->t_count++;
danielebarchiesi@4 154 $this->added_triples[$h] = true;
danielebarchiesi@4 155 }
danielebarchiesi@4 156 }
danielebarchiesi@4 157 else {
danielebarchiesi@4 158 $this->triples[$this->t_count] = $t;
danielebarchiesi@4 159 $this->t_count++;
danielebarchiesi@4 160 }
danielebarchiesi@4 161 }
danielebarchiesi@4 162
danielebarchiesi@4 163 /* */
danielebarchiesi@4 164
danielebarchiesi@4 165 }