Mercurial > hg > rr-repo
comparison sites/all/libraries/ARC2/arc/parsers/ARC2_JSONParser.php @ 4:ce11bbd8f642
added modules
author | danieleb <danielebarchiesi@me.com> |
---|---|
date | Thu, 19 Sep 2013 10:38:44 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:b28be78d8160 | 4:ce11bbd8f642 |
---|---|
1 <?php | |
2 /** | |
3 * ARC2 JSON Parser | |
4 * Does not extract triples, needs sub-class for RDF extraction | |
5 * | |
6 * @author Benjamin Nowack <bnowack@semsol.com> | |
7 * @license http://arc.semsol.org/license | |
8 * @homepage <http://arc.semsol.org/> | |
9 * @package ARC2 | |
10 * @version 2010-11-16 | |
11 */ | |
12 | |
13 ARC2::inc('RDFParser'); | |
14 | |
15 class ARC2_JSONParser extends ARC2_RDFParser { | |
16 | |
17 function __construct($a, &$caller) { | |
18 parent::__construct($a, $caller); | |
19 } | |
20 | |
21 function __init() { | |
22 parent::__init(); | |
23 } | |
24 | |
25 /* */ | |
26 | |
27 function x($re, $v, $options = 'si') { | |
28 while (preg_match('/^\s*(\/\*.*\*\/)(.*)$/Usi', $v, $m)) {/* comment removal */ | |
29 $v = $m[2]; | |
30 } | |
31 $this->unparsed_code = (strlen($this->unparsed_code) > strlen($v)) ? $v : $this->unparsed_code; | |
32 return ARC2::x($re, $v, $options); | |
33 } | |
34 | |
35 function parse($path, $data = '') { | |
36 $this->state = 0; | |
37 /* reader */ | |
38 if (!$this->v('reader')) { | |
39 ARC2::inc('Reader'); | |
40 $this->reader = new ARC2_Reader($this->a, $this); | |
41 } | |
42 $this->reader->setAcceptHeader('Accept: application/json; q=0.9, */*; q=0.1'); | |
43 $this->reader->activate($path, $data); | |
44 $this->x_base = isset($this->a['base']) && $this->a['base'] ? $this->a['base'] : $this->reader->base; | |
45 /* parse */ | |
46 $doc = ''; | |
47 while ($d = $this->reader->readStream()) { | |
48 $doc .= $d; | |
49 } | |
50 $this->reader->closeStream(); | |
51 unset($this->reader); | |
52 $doc = preg_replace('/^[^\{]*(.*\})[^\}]*$/is', '\\1', $doc); | |
53 $this->unparsed_code = $doc; | |
54 list($this->struct, $rest) = $this->extractObject($doc); | |
55 return $this->done(); | |
56 } | |
57 | |
58 /* */ | |
59 | |
60 function extractObject($v) { | |
61 if (function_exists('json_decode')) return array(json_decode($v, 1), ''); | |
62 $r = array(); | |
63 /* sub-object */ | |
64 if ($sub_r = $this->x('\{', $v)) { | |
65 $v = $sub_r[1]; | |
66 while ((list($sub_r, $v) = $this->extractEntry($v)) && $sub_r) { | |
67 $r[$sub_r['key']] = $sub_r['value']; | |
68 } | |
69 if ($sub_r = $this->x('\}', $v)) $v = $sub_r[1]; | |
70 } | |
71 /* sub-list */ | |
72 elseif ($sub_r = $this->x('\[', $v)) { | |
73 $v = $sub_r[1]; | |
74 while ((list($sub_r, $v) = $this->extractObject($v)) && $sub_r) { | |
75 $r[] = $sub_r; | |
76 $v = ltrim($v, ','); | |
77 } | |
78 if ($sub_r = $this->x('\]', $v)) $v = $sub_r[1]; | |
79 } | |
80 /* sub-value */ | |
81 elseif ((list($sub_r, $v) = $this->extractValue($v)) && ($sub_r !== false)) { | |
82 $r = $sub_r; | |
83 } | |
84 return array($r, $v); | |
85 } | |
86 | |
87 function extractEntry($v) { | |
88 if ($r = $this->x('\,', $v)) $v = $r[1]; | |
89 /* k */ | |
90 if ($r = $this->x('\"([^\"]+)\"\s*\:', $v)) { | |
91 $k = $r[1]; | |
92 $sub_v = $r[2]; | |
93 if (list($sub_r, $sub_v) = $this->extractObject($sub_v)) { | |
94 return array( | |
95 array('key' => $k, 'value' => $sub_r), | |
96 $sub_v | |
97 ); | |
98 } | |
99 } | |
100 return array(0, $v); | |
101 } | |
102 | |
103 function extractValue($v) { | |
104 if ($r = $this->x('\,', $v)) $v = $r[1]; | |
105 if ($sub_r = $this->x('null', $v)) { | |
106 return array(null, $sub_r[1]); | |
107 } | |
108 if ($sub_r = $this->x('(true|false)', $v)) { | |
109 return array($sub_r[1], $sub_r[2]); | |
110 } | |
111 if ($sub_r = $this->x('([\-\+]?[0-9\.]+)', $v)) { | |
112 return array($sub_r[1], $sub_r[2]); | |
113 } | |
114 if ($sub_r = $this->x('\"', $v)) { | |
115 $rest = $sub_r[1]; | |
116 if (preg_match('/^([^\x5c]*|.*[^\x5c]|.*\x5c{2})\"(.*)$/sU', $rest, $m)) { | |
117 $val = $m[1]; | |
118 /* unescape chars (single-byte) */ | |
119 $val = preg_replace('/\\\u(.{4})/e', 'chr(hexdec("\\1"))', $val); | |
120 //$val = preg_replace('/\\\u00(.{2})/e', 'rawurldecode("%\\1")', $val); | |
121 /* other escaped chars */ | |
122 $from = array('\\\\', '\r', '\t', '\n', '\"', '\b', '\f', '\/'); | |
123 $to = array("\\", "\r", "\t", "\n", '"', "\b", "\f", "/"); | |
124 $val = str_replace($from, $to, $val); | |
125 return array($val, $m[2]); | |
126 } | |
127 } | |
128 return array(false, $v); | |
129 } | |
130 | |
131 /* */ | |
132 | |
133 function getObject() { | |
134 return $this->v('struct', array()); | |
135 } | |
136 | |
137 function getTriples() { | |
138 return $this->v('triples', array()); | |
139 } | |
140 | |
141 function countTriples() { | |
142 return $this->t_count; | |
143 } | |
144 | |
145 function addT($s = '', $p = '', $o = '', $s_type = '', $o_type = '', $o_dt = '', $o_lang = '') { | |
146 $o = $this->toUTF8($o); | |
147 //echo str_replace($this->base, '', "-----\n adding $s / $p / $o\n-----\n"); | |
148 $t = array('s' => $s, 'p' => $p, 'o' => $o, 's_type' => $s_type, 'o_type' => $o_type, 'o_datatype' => $o_dt, 'o_lang' => $o_lang); | |
149 if ($this->skip_dupes) { | |
150 $h = md5(serialize($t)); | |
151 if (!isset($this->added_triples[$h])) { | |
152 $this->triples[$this->t_count] = $t; | |
153 $this->t_count++; | |
154 $this->added_triples[$h] = true; | |
155 } | |
156 } | |
157 else { | |
158 $this->triples[$this->t_count] = $t; | |
159 $this->t_count++; | |
160 } | |
161 } | |
162 | |
163 /* */ | |
164 | |
165 } |