cannam@150
|
1 /* Copyright (c) 2013 Dropbox, Inc.
|
cannam@150
|
2 *
|
cannam@150
|
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
|
cannam@150
|
4 * of this software and associated documentation files (the "Software"), to deal
|
cannam@150
|
5 * in the Software without restriction, including without limitation the rights
|
cannam@150
|
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
cannam@150
|
7 * copies of the Software, and to permit persons to whom the Software is
|
cannam@150
|
8 * furnished to do so, subject to the following conditions:
|
cannam@150
|
9 *
|
cannam@150
|
10 * The above copyright notice and this permission notice shall be included in
|
cannam@150
|
11 * all copies or substantial portions of the Software.
|
cannam@150
|
12 *
|
cannam@150
|
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
cannam@150
|
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
cannam@150
|
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
cannam@150
|
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
cannam@150
|
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
cannam@150
|
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
cannam@150
|
19 * THE SOFTWARE.
|
cannam@150
|
20 */
|
cannam@150
|
21
|
cannam@150
|
22 #include "json11.hpp"
|
cannam@150
|
23 #include <cassert>
|
cannam@150
|
24 #include <cmath>
|
cannam@150
|
25 #include <cstdlib>
|
cannam@150
|
26 #include <cstdio>
|
cannam@150
|
27 #include <limits>
|
cannam@150
|
28
|
cannam@150
|
29 namespace json11 {
|
cannam@150
|
30
|
cannam@150
|
31 static const int max_depth = 200;
|
cannam@150
|
32
|
cannam@150
|
33 using std::string;
|
cannam@150
|
34 using std::vector;
|
cannam@150
|
35 using std::map;
|
cannam@150
|
36 using std::make_shared;
|
cannam@150
|
37 using std::initializer_list;
|
cannam@150
|
38 using std::move;
|
cannam@150
|
39
|
cannam@150
|
40 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
41 * Serialization
|
cannam@150
|
42 */
|
cannam@150
|
43
|
cannam@150
|
44 static void dump(std::nullptr_t, string &out) {
|
cannam@150
|
45 out += "null";
|
cannam@150
|
46 }
|
cannam@150
|
47
|
cannam@150
|
48 static void dump(double value, string &out) {
|
cannam@150
|
49 if (std::isfinite(value)) {
|
cannam@150
|
50 char buf[32];
|
cannam@150
|
51 snprintf(buf, sizeof buf, "%.17g", value);
|
cannam@150
|
52 out += buf;
|
cannam@150
|
53 } else {
|
cannam@150
|
54 out += "null";
|
cannam@150
|
55 }
|
cannam@150
|
56 }
|
cannam@150
|
57
|
cannam@150
|
58 static void dump(int value, string &out) {
|
cannam@150
|
59 char buf[32];
|
cannam@150
|
60 snprintf(buf, sizeof buf, "%d", value);
|
cannam@150
|
61 out += buf;
|
cannam@150
|
62 }
|
cannam@150
|
63
|
cannam@150
|
64 static void dump(bool value, string &out) {
|
cannam@150
|
65 out += value ? "true" : "false";
|
cannam@150
|
66 }
|
cannam@150
|
67
|
cannam@150
|
68 static void dump(const string &value, string &out) {
|
cannam@150
|
69 out += '"';
|
cannam@150
|
70 for (size_t i = 0; i < value.length(); i++) {
|
cannam@150
|
71 const char ch = value[i];
|
cannam@150
|
72 if (ch == '\\') {
|
cannam@150
|
73 out += "\\\\";
|
cannam@150
|
74 } else if (ch == '"') {
|
cannam@150
|
75 out += "\\\"";
|
cannam@150
|
76 } else if (ch == '\b') {
|
cannam@150
|
77 out += "\\b";
|
cannam@150
|
78 } else if (ch == '\f') {
|
cannam@150
|
79 out += "\\f";
|
cannam@150
|
80 } else if (ch == '\n') {
|
cannam@150
|
81 out += "\\n";
|
cannam@150
|
82 } else if (ch == '\r') {
|
cannam@150
|
83 out += "\\r";
|
cannam@150
|
84 } else if (ch == '\t') {
|
cannam@150
|
85 out += "\\t";
|
cannam@150
|
86 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
|
cannam@150
|
87 char buf[8];
|
cannam@150
|
88 snprintf(buf, sizeof buf, "\\u%04x", ch);
|
cannam@150
|
89 out += buf;
|
cannam@150
|
90 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
91 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
|
cannam@150
|
92 out += "\\u2028";
|
cannam@150
|
93 i += 2;
|
cannam@150
|
94 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
95 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
|
cannam@150
|
96 out += "\\u2029";
|
cannam@150
|
97 i += 2;
|
cannam@150
|
98 } else {
|
cannam@150
|
99 out += ch;
|
cannam@150
|
100 }
|
cannam@150
|
101 }
|
cannam@150
|
102 out += '"';
|
cannam@150
|
103 }
|
cannam@150
|
104
|
cannam@150
|
105 static void dump(const Json::array &values, string &out) {
|
cannam@150
|
106 bool first = true;
|
cannam@150
|
107 out += "[";
|
cannam@150
|
108 for (const auto &value : values) {
|
cannam@150
|
109 if (!first)
|
cannam@150
|
110 out += ", ";
|
cannam@150
|
111 value.dump(out);
|
cannam@150
|
112 first = false;
|
cannam@150
|
113 }
|
cannam@150
|
114 out += "]";
|
cannam@150
|
115 }
|
cannam@150
|
116
|
cannam@150
|
117 static void dump(const Json::object &values, string &out) {
|
cannam@150
|
118 bool first = true;
|
cannam@150
|
119 out += "{";
|
cannam@150
|
120 for (const auto &kv : values) {
|
cannam@150
|
121 if (!first)
|
cannam@150
|
122 out += ", ";
|
cannam@150
|
123 dump(kv.first, out);
|
cannam@150
|
124 out += ": ";
|
cannam@150
|
125 kv.second.dump(out);
|
cannam@150
|
126 first = false;
|
cannam@150
|
127 }
|
cannam@150
|
128 out += "}";
|
cannam@150
|
129 }
|
cannam@150
|
130
|
cannam@150
|
131 void Json::dump(string &out) const {
|
cannam@150
|
132 m_ptr->dump(out);
|
cannam@150
|
133 }
|
cannam@150
|
134
|
cannam@150
|
135 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
136 * Value wrappers
|
cannam@150
|
137 */
|
cannam@150
|
138
|
cannam@150
|
139 template <Json::Type tag, typename T>
|
cannam@150
|
140 class Value : public JsonValue {
|
cannam@150
|
141 protected:
|
cannam@150
|
142
|
cannam@150
|
143 // Constructors
|
cannam@150
|
144 explicit Value(const T &value) : m_value(value) {}
|
cannam@150
|
145 explicit Value(T &&value) : m_value(move(value)) {}
|
cannam@150
|
146
|
cannam@150
|
147 // Get type tag
|
cannam@150
|
148 Json::Type type() const override {
|
cannam@150
|
149 return tag;
|
cannam@150
|
150 }
|
cannam@150
|
151
|
cannam@150
|
152 // Comparisons
|
cannam@150
|
153 bool equals(const JsonValue * other) const override {
|
cannam@150
|
154 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
155 }
|
cannam@150
|
156 bool less(const JsonValue * other) const override {
|
cannam@150
|
157 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
158 }
|
cannam@150
|
159
|
cannam@150
|
160 const T m_value;
|
cannam@150
|
161 void dump(string &out) const override { json11::dump(m_value, out); }
|
cannam@150
|
162 };
|
cannam@150
|
163
|
cannam@150
|
164 class JsonDouble final : public Value<Json::NUMBER, double> {
|
cannam@150
|
165 double number_value() const override { return m_value; }
|
cannam@150
|
166 int int_value() const override { return static_cast<int>(m_value); }
|
cannam@150
|
167 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
168 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
169 public:
|
cannam@150
|
170 explicit JsonDouble(double value) : Value(value) {}
|
cannam@150
|
171 };
|
cannam@150
|
172
|
cannam@150
|
173 class JsonInt final : public Value<Json::NUMBER, int> {
|
cannam@150
|
174 double number_value() const override { return m_value; }
|
cannam@150
|
175 int int_value() const override { return m_value; }
|
cannam@150
|
176 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
177 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
178 public:
|
cannam@150
|
179 explicit JsonInt(int value) : Value(value) {}
|
cannam@150
|
180 };
|
cannam@150
|
181
|
cannam@150
|
182 class JsonBoolean final : public Value<Json::BOOL, bool> {
|
cannam@150
|
183 bool bool_value() const override { return m_value; }
|
cannam@150
|
184 public:
|
cannam@150
|
185 explicit JsonBoolean(bool value) : Value(value) {}
|
cannam@150
|
186 };
|
cannam@150
|
187
|
cannam@150
|
188 class JsonString final : public Value<Json::STRING, string> {
|
cannam@150
|
189 const string &string_value() const override { return m_value; }
|
cannam@150
|
190 public:
|
cannam@150
|
191 explicit JsonString(const string &value) : Value(value) {}
|
cannam@150
|
192 explicit JsonString(string &&value) : Value(move(value)) {}
|
cannam@150
|
193 };
|
cannam@150
|
194
|
cannam@150
|
195 class JsonArray final : public Value<Json::ARRAY, Json::array> {
|
cannam@150
|
196 const Json::array &array_items() const override { return m_value; }
|
cannam@150
|
197 const Json & operator[](size_t i) const override;
|
cannam@150
|
198 public:
|
cannam@150
|
199 explicit JsonArray(const Json::array &value) : Value(value) {}
|
cannam@150
|
200 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
|
cannam@150
|
201 };
|
cannam@150
|
202
|
cannam@150
|
203 class JsonObject final : public Value<Json::OBJECT, Json::object> {
|
cannam@150
|
204 const Json::object &object_items() const override { return m_value; }
|
cannam@150
|
205 const Json & operator[](const string &key) const override;
|
cannam@150
|
206 public:
|
cannam@150
|
207 explicit JsonObject(const Json::object &value) : Value(value) {}
|
cannam@150
|
208 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
|
cannam@150
|
209 };
|
cannam@150
|
210
|
cannam@150
|
211 class JsonNull final : public Value<Json::NUL, std::nullptr_t> {
|
cannam@150
|
212 public:
|
cannam@150
|
213 JsonNull() : Value(nullptr) {}
|
cannam@150
|
214 };
|
cannam@150
|
215
|
cannam@150
|
216 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
217 * Static globals - static-init-safe
|
cannam@150
|
218 */
|
cannam@150
|
219 struct Statics {
|
cannam@150
|
220 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
|
cannam@150
|
221 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
|
cannam@150
|
222 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
|
cannam@150
|
223 const string empty_string;
|
cannam@150
|
224 const vector<Json> empty_vector;
|
cannam@150
|
225 const map<string, Json> empty_map;
|
cannam@150
|
226 Statics() {}
|
cannam@150
|
227 };
|
cannam@150
|
228
|
cannam@150
|
229 static const Statics & statics() {
|
cannam@150
|
230 static const Statics s {};
|
cannam@150
|
231 return s;
|
cannam@150
|
232 }
|
cannam@150
|
233
|
cannam@150
|
234 static const Json & static_null() {
|
cannam@150
|
235 // This has to be separate, not in Statics, because Json() accesses statics().null.
|
cannam@150
|
236 static const Json json_null;
|
cannam@150
|
237 return json_null;
|
cannam@150
|
238 }
|
cannam@150
|
239
|
cannam@150
|
240 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
241 * Constructors
|
cannam@150
|
242 */
|
cannam@150
|
243
|
cannam@150
|
244 Json::Json() noexcept : m_ptr(statics().null) {}
|
cannam@150
|
245 Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
|
cannam@150
|
246 Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
|
cannam@150
|
247 Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
|
cannam@150
|
248 Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
|
cannam@150
|
249 Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
250 Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
|
cannam@150
|
251 Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
252 Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
|
cannam@150
|
253 Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
|
cannam@150
|
254 Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
|
cannam@150
|
255 Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
|
cannam@150
|
256
|
cannam@150
|
257 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
258 * Accessors
|
cannam@150
|
259 */
|
cannam@150
|
260
|
cannam@150
|
261 Json::Type Json::type() const { return m_ptr->type(); }
|
cannam@150
|
262 double Json::number_value() const { return m_ptr->number_value(); }
|
cannam@150
|
263 int Json::int_value() const { return m_ptr->int_value(); }
|
cannam@150
|
264 bool Json::bool_value() const { return m_ptr->bool_value(); }
|
cannam@150
|
265 const string & Json::string_value() const { return m_ptr->string_value(); }
|
cannam@150
|
266 const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
|
cannam@150
|
267 const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
|
cannam@150
|
268 const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
|
cannam@150
|
269 const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
|
cannam@150
|
270
|
cannam@150
|
271 double JsonValue::number_value() const { return 0; }
|
cannam@150
|
272 int JsonValue::int_value() const { return 0; }
|
cannam@150
|
273 bool JsonValue::bool_value() const { return false; }
|
cannam@150
|
274 const string & JsonValue::string_value() const { return statics().empty_string; }
|
cannam@150
|
275 const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
|
cannam@150
|
276 const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
|
cannam@150
|
277 const Json & JsonValue::operator[] (size_t) const { return static_null(); }
|
cannam@150
|
278 const Json & JsonValue::operator[] (const string &) const { return static_null(); }
|
cannam@150
|
279
|
cannam@150
|
280 const Json & JsonObject::operator[] (const string &key) const {
|
cannam@150
|
281 auto iter = m_value.find(key);
|
cannam@150
|
282 return (iter == m_value.end()) ? static_null() : iter->second;
|
cannam@150
|
283 }
|
cannam@150
|
284 const Json & JsonArray::operator[] (size_t i) const {
|
cannam@150
|
285 if (i >= m_value.size()) return static_null();
|
cannam@150
|
286 else return m_value[i];
|
cannam@150
|
287 }
|
cannam@150
|
288
|
cannam@150
|
289 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
290 * Comparison
|
cannam@150
|
291 */
|
cannam@150
|
292
|
cannam@150
|
293 bool Json::operator== (const Json &other) const {
|
cannam@150
|
294 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
295 return false;
|
cannam@150
|
296
|
cannam@150
|
297 return m_ptr->equals(other.m_ptr.get());
|
cannam@150
|
298 }
|
cannam@150
|
299
|
cannam@150
|
300 bool Json::operator< (const Json &other) const {
|
cannam@150
|
301 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
302 return m_ptr->type() < other.m_ptr->type();
|
cannam@150
|
303
|
cannam@150
|
304 return m_ptr->less(other.m_ptr.get());
|
cannam@150
|
305 }
|
cannam@150
|
306
|
cannam@150
|
307 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
308 * Parsing
|
cannam@150
|
309 */
|
cannam@150
|
310
|
cannam@150
|
311 /* esc(c)
|
cannam@150
|
312 *
|
cannam@150
|
313 * Format char c suitable for printing in an error message.
|
cannam@150
|
314 */
|
cannam@150
|
315 static inline string esc(char c) {
|
cannam@150
|
316 char buf[12];
|
cannam@150
|
317 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
|
cannam@150
|
318 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
|
cannam@150
|
319 } else {
|
cannam@150
|
320 snprintf(buf, sizeof buf, "(%d)", c);
|
cannam@150
|
321 }
|
cannam@150
|
322 return string(buf);
|
cannam@150
|
323 }
|
cannam@150
|
324
|
cannam@150
|
325 static inline bool in_range(long x, long lower, long upper) {
|
cannam@150
|
326 return (x >= lower && x <= upper);
|
cannam@150
|
327 }
|
cannam@150
|
328
|
cannam@150
|
329 /* JsonParser
|
cannam@150
|
330 *
|
cannam@150
|
331 * Object that tracks all state of an in-progress parse.
|
cannam@150
|
332 */
|
cannam@150
|
333 struct JsonParser {
|
cannam@150
|
334
|
cannam@150
|
335 /* State
|
cannam@150
|
336 */
|
cannam@150
|
337 const string &str;
|
cannam@150
|
338 size_t i;
|
cannam@150
|
339 string &err;
|
cannam@150
|
340 bool failed;
|
cannam@150
|
341 const JsonParse strategy;
|
cannam@150
|
342
|
cannam@150
|
343 /* fail(msg, err_ret = Json())
|
cannam@150
|
344 *
|
cannam@150
|
345 * Mark this parse as failed.
|
cannam@150
|
346 */
|
cannam@150
|
347 Json fail(string &&msg) {
|
cannam@150
|
348 return fail(move(msg), Json());
|
cannam@150
|
349 }
|
cannam@150
|
350
|
cannam@150
|
351 template <typename T>
|
cannam@150
|
352 T fail(string &&msg, const T err_ret) {
|
cannam@150
|
353 if (!failed)
|
cannam@150
|
354 err = std::move(msg);
|
cannam@150
|
355 failed = true;
|
cannam@150
|
356 return err_ret;
|
cannam@150
|
357 }
|
cannam@150
|
358
|
cannam@150
|
359 /* consume_whitespace()
|
cannam@150
|
360 *
|
cannam@150
|
361 * Advance until the current character is non-whitespace.
|
cannam@150
|
362 */
|
cannam@150
|
363 void consume_whitespace() {
|
cannam@150
|
364 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
|
cannam@150
|
365 i++;
|
cannam@150
|
366 }
|
cannam@150
|
367
|
cannam@150
|
368 /* consume_comment()
|
cannam@150
|
369 *
|
cannam@150
|
370 * Advance comments (c-style inline and multiline).
|
cannam@150
|
371 */
|
cannam@150
|
372 bool consume_comment() {
|
cannam@150
|
373 bool comment_found = false;
|
cannam@150
|
374 if (str[i] == '/') {
|
cannam@150
|
375 i++;
|
cannam@150
|
376 if (i == str.size())
|
cannam@150
|
377 return fail("unexpected end of input inside comment", 0);
|
cannam@150
|
378 if (str[i] == '/') { // inline comment
|
cannam@150
|
379 i++;
|
cannam@150
|
380 if (i == str.size())
|
cannam@150
|
381 return fail("unexpected end of input inside inline comment", 0);
|
cannam@150
|
382 // advance until next line
|
cannam@150
|
383 while (str[i] != '\n') {
|
cannam@150
|
384 i++;
|
cannam@150
|
385 if (i == str.size())
|
cannam@150
|
386 return fail("unexpected end of input inside inline comment", 0);
|
cannam@150
|
387 }
|
cannam@150
|
388 comment_found = true;
|
cannam@150
|
389 }
|
cannam@150
|
390 else if (str[i] == '*') { // multiline comment
|
cannam@150
|
391 i++;
|
cannam@150
|
392 if (i > str.size()-2)
|
cannam@150
|
393 return fail("unexpected end of input inside multi-line comment", 0);
|
cannam@150
|
394 // advance until closing tokens
|
cannam@150
|
395 while (!(str[i] == '*' && str[i+1] == '/')) {
|
cannam@150
|
396 i++;
|
cannam@150
|
397 if (i > str.size()-2)
|
cannam@150
|
398 return fail(
|
cannam@150
|
399 "unexpected end of input inside multi-line comment", 0);
|
cannam@150
|
400 }
|
cannam@150
|
401 i += 2;
|
cannam@150
|
402 if (i == str.size())
|
cannam@150
|
403 return fail(
|
cannam@150
|
404 "unexpected end of input inside multi-line comment", 0);
|
cannam@150
|
405 comment_found = true;
|
cannam@150
|
406 }
|
cannam@150
|
407 else
|
cannam@150
|
408 return fail("malformed comment", 0);
|
cannam@150
|
409 }
|
cannam@150
|
410 return comment_found;
|
cannam@150
|
411 }
|
cannam@150
|
412
|
cannam@150
|
413 /* consume_garbage()
|
cannam@150
|
414 *
|
cannam@150
|
415 * Advance until the current character is non-whitespace and non-comment.
|
cannam@150
|
416 */
|
cannam@150
|
417 void consume_garbage() {
|
cannam@150
|
418 consume_whitespace();
|
cannam@150
|
419 if(strategy == JsonParse::COMMENTS) {
|
cannam@150
|
420 bool comment_found = false;
|
cannam@150
|
421 do {
|
cannam@150
|
422 comment_found = consume_comment();
|
cannam@150
|
423 consume_whitespace();
|
cannam@150
|
424 }
|
cannam@150
|
425 while(comment_found);
|
cannam@150
|
426 }
|
cannam@150
|
427 }
|
cannam@150
|
428
|
cannam@150
|
429 /* get_next_token()
|
cannam@150
|
430 *
|
cannam@150
|
431 * Return the next non-whitespace character. If the end of the input is reached,
|
cannam@150
|
432 * flag an error and return 0.
|
cannam@150
|
433 */
|
cannam@150
|
434 char get_next_token() {
|
cannam@150
|
435 consume_garbage();
|
cannam@150
|
436 if (i == str.size())
|
cannam@150
|
437 return fail("unexpected end of input", 0);
|
cannam@150
|
438
|
cannam@150
|
439 return str[i++];
|
cannam@150
|
440 }
|
cannam@150
|
441
|
cannam@150
|
442 /* encode_utf8(pt, out)
|
cannam@150
|
443 *
|
cannam@150
|
444 * Encode pt as UTF-8 and add it to out.
|
cannam@150
|
445 */
|
cannam@150
|
446 void encode_utf8(long pt, string & out) {
|
cannam@150
|
447 if (pt < 0)
|
cannam@150
|
448 return;
|
cannam@150
|
449
|
cannam@150
|
450 if (pt < 0x80) {
|
cannam@150
|
451 out += static_cast<char>(pt);
|
cannam@150
|
452 } else if (pt < 0x800) {
|
cannam@150
|
453 out += static_cast<char>((pt >> 6) | 0xC0);
|
cannam@150
|
454 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
455 } else if (pt < 0x10000) {
|
cannam@150
|
456 out += static_cast<char>((pt >> 12) | 0xE0);
|
cannam@150
|
457 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
458 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
459 } else {
|
cannam@150
|
460 out += static_cast<char>((pt >> 18) | 0xF0);
|
cannam@150
|
461 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
|
cannam@150
|
462 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
463 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
464 }
|
cannam@150
|
465 }
|
cannam@150
|
466
|
cannam@150
|
467 /* parse_string()
|
cannam@150
|
468 *
|
cannam@150
|
469 * Parse a string, starting at the current position.
|
cannam@150
|
470 */
|
cannam@150
|
471 string parse_string() {
|
cannam@150
|
472 string out;
|
cannam@150
|
473 long last_escaped_codepoint = -1;
|
cannam@150
|
474 while (true) {
|
cannam@150
|
475 if (i == str.size())
|
cannam@150
|
476 return fail("unexpected end of input in string", "");
|
cannam@150
|
477
|
cannam@150
|
478 char ch = str[i++];
|
cannam@150
|
479
|
cannam@150
|
480 if (ch == '"') {
|
cannam@150
|
481 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
482 return out;
|
cannam@150
|
483 }
|
cannam@150
|
484
|
cannam@150
|
485 if (in_range(ch, 0, 0x1f))
|
cannam@150
|
486 return fail("unescaped " + esc(ch) + " in string", "");
|
cannam@150
|
487
|
cannam@150
|
488 // The usual case: non-escaped characters
|
cannam@150
|
489 if (ch != '\\') {
|
cannam@150
|
490 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
491 last_escaped_codepoint = -1;
|
cannam@150
|
492 out += ch;
|
cannam@150
|
493 continue;
|
cannam@150
|
494 }
|
cannam@150
|
495
|
cannam@150
|
496 // Handle escapes
|
cannam@150
|
497 if (i == str.size())
|
cannam@150
|
498 return fail("unexpected end of input in string", "");
|
cannam@150
|
499
|
cannam@150
|
500 ch = str[i++];
|
cannam@150
|
501
|
cannam@150
|
502 if (ch == 'u') {
|
cannam@150
|
503 // Extract 4-byte escape sequence
|
cannam@150
|
504 string esc = str.substr(i, 4);
|
cannam@150
|
505 // Explicitly check length of the substring. The following loop
|
cannam@150
|
506 // relies on std::string returning the terminating NUL when
|
cannam@150
|
507 // accessing str[length]. Checking here reduces brittleness.
|
cannam@150
|
508 if (esc.length() < 4) {
|
cannam@150
|
509 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
510 }
|
cannam@150
|
511 for (int j = 0; j < 4; j++) {
|
cannam@150
|
512 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
|
cannam@150
|
513 && !in_range(esc[j], '0', '9'))
|
cannam@150
|
514 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
515 }
|
cannam@150
|
516
|
cannam@150
|
517 long codepoint = strtol(esc.data(), nullptr, 16);
|
cannam@150
|
518
|
cannam@150
|
519 // JSON specifies that characters outside the BMP shall be encoded as a pair
|
cannam@150
|
520 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
|
cannam@150
|
521 // whether we're in the middle of such a beast: the previous codepoint was an
|
cannam@150
|
522 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
|
cannam@150
|
523 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
|
cannam@150
|
524 && in_range(codepoint, 0xDC00, 0xDFFF)) {
|
cannam@150
|
525 // Reassemble the two surrogate pairs into one astral-plane character, per
|
cannam@150
|
526 // the UTF-16 algorithm.
|
cannam@150
|
527 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
|
cannam@150
|
528 | (codepoint - 0xDC00)) + 0x10000, out);
|
cannam@150
|
529 last_escaped_codepoint = -1;
|
cannam@150
|
530 } else {
|
cannam@150
|
531 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
532 last_escaped_codepoint = codepoint;
|
cannam@150
|
533 }
|
cannam@150
|
534
|
cannam@150
|
535 i += 4;
|
cannam@150
|
536 continue;
|
cannam@150
|
537 }
|
cannam@150
|
538
|
cannam@150
|
539 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
540 last_escaped_codepoint = -1;
|
cannam@150
|
541
|
cannam@150
|
542 if (ch == 'b') {
|
cannam@150
|
543 out += '\b';
|
cannam@150
|
544 } else if (ch == 'f') {
|
cannam@150
|
545 out += '\f';
|
cannam@150
|
546 } else if (ch == 'n') {
|
cannam@150
|
547 out += '\n';
|
cannam@150
|
548 } else if (ch == 'r') {
|
cannam@150
|
549 out += '\r';
|
cannam@150
|
550 } else if (ch == 't') {
|
cannam@150
|
551 out += '\t';
|
cannam@150
|
552 } else if (ch == '"' || ch == '\\' || ch == '/') {
|
cannam@150
|
553 out += ch;
|
cannam@150
|
554 } else {
|
cannam@150
|
555 return fail("invalid escape character " + esc(ch), "");
|
cannam@150
|
556 }
|
cannam@150
|
557 }
|
cannam@150
|
558 }
|
cannam@150
|
559
|
cannam@150
|
560 /* parse_number()
|
cannam@150
|
561 *
|
cannam@150
|
562 * Parse a double.
|
cannam@150
|
563 */
|
cannam@150
|
564 Json parse_number() {
|
cannam@150
|
565 size_t start_pos = i;
|
cannam@150
|
566
|
cannam@150
|
567 if (str[i] == '-')
|
cannam@150
|
568 i++;
|
cannam@150
|
569
|
cannam@150
|
570 // Integer part
|
cannam@150
|
571 if (str[i] == '0') {
|
cannam@150
|
572 i++;
|
cannam@150
|
573 if (in_range(str[i], '0', '9'))
|
cannam@150
|
574 return fail("leading 0s not permitted in numbers");
|
cannam@150
|
575 } else if (in_range(str[i], '1', '9')) {
|
cannam@150
|
576 i++;
|
cannam@150
|
577 while (in_range(str[i], '0', '9'))
|
cannam@150
|
578 i++;
|
cannam@150
|
579 } else {
|
cannam@150
|
580 return fail("invalid " + esc(str[i]) + " in number");
|
cannam@150
|
581 }
|
cannam@150
|
582
|
cannam@150
|
583 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
|
cannam@150
|
584 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
|
cannam@150
|
585 return std::atoi(str.c_str() + start_pos);
|
cannam@150
|
586 }
|
cannam@150
|
587
|
cannam@150
|
588 // Decimal part
|
cannam@150
|
589 if (str[i] == '.') {
|
cannam@150
|
590 i++;
|
cannam@150
|
591 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
592 return fail("at least one digit required in fractional part");
|
cannam@150
|
593
|
cannam@150
|
594 while (in_range(str[i], '0', '9'))
|
cannam@150
|
595 i++;
|
cannam@150
|
596 }
|
cannam@150
|
597
|
cannam@150
|
598 // Exponent part
|
cannam@150
|
599 if (str[i] == 'e' || str[i] == 'E') {
|
cannam@150
|
600 i++;
|
cannam@150
|
601
|
cannam@150
|
602 if (str[i] == '+' || str[i] == '-')
|
cannam@150
|
603 i++;
|
cannam@150
|
604
|
cannam@150
|
605 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
606 return fail("at least one digit required in exponent");
|
cannam@150
|
607
|
cannam@150
|
608 while (in_range(str[i], '0', '9'))
|
cannam@150
|
609 i++;
|
cannam@150
|
610 }
|
cannam@150
|
611
|
cannam@150
|
612 return std::strtod(str.c_str() + start_pos, nullptr);
|
cannam@150
|
613 }
|
cannam@150
|
614
|
cannam@150
|
615 /* expect(str, res)
|
cannam@150
|
616 *
|
cannam@150
|
617 * Expect that 'str' starts at the character that was just read. If it does, advance
|
cannam@150
|
618 * the input and return res. If not, flag an error.
|
cannam@150
|
619 */
|
cannam@150
|
620 Json expect(const string &expected, Json res) {
|
cannam@150
|
621 assert(i != 0);
|
cannam@150
|
622 i--;
|
cannam@150
|
623 if (str.compare(i, expected.length(), expected) == 0) {
|
cannam@150
|
624 i += expected.length();
|
cannam@150
|
625 return res;
|
cannam@150
|
626 } else {
|
cannam@150
|
627 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
|
cannam@150
|
628 }
|
cannam@150
|
629 }
|
cannam@150
|
630
|
cannam@150
|
631 /* parse_json()
|
cannam@150
|
632 *
|
cannam@150
|
633 * Parse a JSON object.
|
cannam@150
|
634 */
|
cannam@150
|
635 Json parse_json(int depth) {
|
cannam@150
|
636 if (depth > max_depth) {
|
cannam@150
|
637 return fail("exceeded maximum nesting depth");
|
cannam@150
|
638 }
|
cannam@150
|
639
|
cannam@150
|
640 char ch = get_next_token();
|
cannam@150
|
641 if (failed)
|
cannam@150
|
642 return Json();
|
cannam@150
|
643
|
cannam@150
|
644 if (ch == '-' || (ch >= '0' && ch <= '9')) {
|
cannam@150
|
645 i--;
|
cannam@150
|
646 return parse_number();
|
cannam@150
|
647 }
|
cannam@150
|
648
|
cannam@150
|
649 if (ch == 't')
|
cannam@150
|
650 return expect("true", true);
|
cannam@150
|
651
|
cannam@150
|
652 if (ch == 'f')
|
cannam@150
|
653 return expect("false", false);
|
cannam@150
|
654
|
cannam@150
|
655 if (ch == 'n')
|
cannam@150
|
656 return expect("null", Json());
|
cannam@150
|
657
|
cannam@150
|
658 if (ch == '"')
|
cannam@150
|
659 return parse_string();
|
cannam@150
|
660
|
cannam@150
|
661 if (ch == '{') {
|
cannam@150
|
662 map<string, Json> data;
|
cannam@150
|
663 ch = get_next_token();
|
cannam@150
|
664 if (ch == '}')
|
cannam@150
|
665 return data;
|
cannam@150
|
666
|
cannam@150
|
667 while (1) {
|
cannam@150
|
668 if (ch != '"')
|
cannam@150
|
669 return fail("expected '\"' in object, got " + esc(ch));
|
cannam@150
|
670
|
cannam@150
|
671 string key = parse_string();
|
cannam@150
|
672 if (failed)
|
cannam@150
|
673 return Json();
|
cannam@150
|
674
|
cannam@150
|
675 ch = get_next_token();
|
cannam@150
|
676 if (ch != ':')
|
cannam@150
|
677 return fail("expected ':' in object, got " + esc(ch));
|
cannam@150
|
678
|
cannam@150
|
679 data[std::move(key)] = parse_json(depth + 1);
|
cannam@150
|
680 if (failed)
|
cannam@150
|
681 return Json();
|
cannam@150
|
682
|
cannam@150
|
683 ch = get_next_token();
|
cannam@150
|
684 if (ch == '}')
|
cannam@150
|
685 break;
|
cannam@150
|
686 if (ch != ',')
|
cannam@150
|
687 return fail("expected ',' in object, got " + esc(ch));
|
cannam@150
|
688
|
cannam@150
|
689 ch = get_next_token();
|
cannam@150
|
690 }
|
cannam@150
|
691 return data;
|
cannam@150
|
692 }
|
cannam@150
|
693
|
cannam@150
|
694 if (ch == '[') {
|
cannam@150
|
695 vector<Json> data;
|
cannam@150
|
696 ch = get_next_token();
|
cannam@150
|
697 if (ch == ']')
|
cannam@150
|
698 return data;
|
cannam@150
|
699
|
cannam@150
|
700 while (1) {
|
cannam@150
|
701 i--;
|
cannam@150
|
702 data.push_back(parse_json(depth + 1));
|
cannam@150
|
703 if (failed)
|
cannam@150
|
704 return Json();
|
cannam@150
|
705
|
cannam@150
|
706 ch = get_next_token();
|
cannam@150
|
707 if (ch == ']')
|
cannam@150
|
708 break;
|
cannam@150
|
709 if (ch != ',')
|
cannam@150
|
710 return fail("expected ',' in list, got " + esc(ch));
|
cannam@150
|
711
|
cannam@150
|
712 ch = get_next_token();
|
cannam@150
|
713 (void)ch;
|
cannam@150
|
714 }
|
cannam@150
|
715 return data;
|
cannam@150
|
716 }
|
cannam@150
|
717
|
cannam@150
|
718 return fail("expected value, got " + esc(ch));
|
cannam@150
|
719 }
|
cannam@150
|
720 };
|
cannam@150
|
721
|
cannam@150
|
722 Json Json::parse(const string &in, string &err, JsonParse strategy) {
|
cannam@150
|
723 JsonParser parser { in, 0, err, false, strategy };
|
cannam@150
|
724 Json result = parser.parse_json(0);
|
cannam@150
|
725
|
cannam@150
|
726 // Check for any trailing garbage
|
cannam@150
|
727 parser.consume_garbage();
|
cannam@150
|
728 if (parser.i != in.size())
|
cannam@150
|
729 return parser.fail("unexpected trailing " + esc(in[parser.i]));
|
cannam@150
|
730
|
cannam@150
|
731 return result;
|
cannam@150
|
732 }
|
cannam@150
|
733
|
cannam@150
|
734 // Documented in json11.hpp
|
cannam@150
|
735 vector<Json> Json::parse_multi(const string &in,
|
cannam@150
|
736 string &err,
|
cannam@150
|
737 JsonParse strategy) {
|
cannam@150
|
738 JsonParser parser { in, 0, err, false, strategy };
|
cannam@150
|
739
|
cannam@150
|
740 vector<Json> json_vec;
|
cannam@150
|
741 while (parser.i != in.size() && !parser.failed) {
|
cannam@150
|
742 json_vec.push_back(parser.parse_json(0));
|
cannam@150
|
743 // Check for another object
|
cannam@150
|
744 parser.consume_garbage();
|
cannam@150
|
745 }
|
cannam@150
|
746 return json_vec;
|
cannam@150
|
747 }
|
cannam@150
|
748
|
cannam@150
|
749 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
750 * Shape-checking
|
cannam@150
|
751 */
|
cannam@150
|
752
|
cannam@150
|
753 bool Json::has_shape(const shape & types, string & err) const {
|
cannam@150
|
754 if (!is_object()) {
|
cannam@150
|
755 err = "expected JSON object, got " + dump();
|
cannam@150
|
756 return false;
|
cannam@150
|
757 }
|
cannam@150
|
758
|
cannam@150
|
759 for (auto & item : types) {
|
cannam@150
|
760 if ((*this)[item.first].type() != item.second) {
|
cannam@150
|
761 err = "bad type for " + item.first + " in " + dump();
|
cannam@150
|
762 return false;
|
cannam@150
|
763 }
|
cannam@150
|
764 }
|
cannam@150
|
765
|
cannam@150
|
766 return true;
|
cannam@150
|
767 }
|
cannam@150
|
768
|
cannam@150
|
769 } // namespace json11
|