comparison json/json11/json11.cpp @ 5:6e8607ebad03

Promote the more successful experiments (todo: get them to build again)
author Chris Cannam <c.cannam@qmul.ac.uk>
date Fri, 13 May 2016 13:48:59 +0100
parents
children
comparison
equal deleted inserted replaced
4:25499f505d0e 5:6e8607ebad03
1 /* Copyright (c) 2013 Dropbox, Inc.
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 */
21
22 #include "json11.hpp"
23 #include <cassert>
24 #include <cmath>
25 #include <cstdlib>
26 #include <cstdio>
27 #include <limits>
28
29 namespace json11 {
30
31 static const int max_depth = 200;
32
33 using std::string;
34 using std::vector;
35 using std::map;
36 using std::make_shared;
37 using std::initializer_list;
38 using std::move;
39
40 /* * * * * * * * * * * * * * * * * * * *
41 * Serialization
42 */
43
44 static void dump(std::nullptr_t, string &out) {
45 out += "null";
46 }
47
48 static void dump(double value, string &out) {
49 if (std::isfinite(value)) {
50 char buf[32];
51 snprintf(buf, sizeof buf, "%.17g", value);
52 out += buf;
53 } else {
54 out += "null";
55 }
56 }
57
58 static void dump(int value, string &out) {
59 char buf[32];
60 snprintf(buf, sizeof buf, "%d", value);
61 out += buf;
62 }
63
64 static void dump(bool value, string &out) {
65 out += value ? "true" : "false";
66 }
67
68 static void dump(const string &value, string &out) {
69 out += '"';
70 for (size_t i = 0; i < value.length(); i++) {
71 const char ch = value[i];
72 if (ch == '\\') {
73 out += "\\\\";
74 } else if (ch == '"') {
75 out += "\\\"";
76 } else if (ch == '\b') {
77 out += "\\b";
78 } else if (ch == '\f') {
79 out += "\\f";
80 } else if (ch == '\n') {
81 out += "\\n";
82 } else if (ch == '\r') {
83 out += "\\r";
84 } else if (ch == '\t') {
85 out += "\\t";
86 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
87 char buf[8];
88 snprintf(buf, sizeof buf, "\\u%04x", ch);
89 out += buf;
90 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
91 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
92 out += "\\u2028";
93 i += 2;
94 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
95 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
96 out += "\\u2029";
97 i += 2;
98 } else {
99 out += ch;
100 }
101 }
102 out += '"';
103 }
104
105 static void dump(const Json::array &values, string &out) {
106 bool first = true;
107 out += "[";
108 for (const auto &value : values) {
109 if (!first)
110 out += ", ";
111 value.dump(out);
112 first = false;
113 }
114 out += "]";
115 }
116
117 static void dump(const Json::object &values, string &out) {
118 bool first = true;
119 out += "{";
120 for (const auto &kv : values) {
121 if (!first)
122 out += ", ";
123 dump(kv.first, out);
124 out += ": ";
125 kv.second.dump(out);
126 first = false;
127 }
128 out += "}";
129 }
130
131 void Json::dump(string &out) const {
132 m_ptr->dump(out);
133 }
134
135 /* * * * * * * * * * * * * * * * * * * *
136 * Value wrappers
137 */
138
139 template <Json::Type tag, typename T>
140 class Value : public JsonValue {
141 protected:
142
143 // Constructors
144 explicit Value(const T &value) : m_value(value) {}
145 explicit Value(T &&value) : m_value(move(value)) {}
146
147 // Get type tag
148 Json::Type type() const override {
149 return tag;
150 }
151
152 // Comparisons
153 bool equals(const JsonValue * other) const override {
154 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
155 }
156 bool less(const JsonValue * other) const override {
157 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
158 }
159
160 const T m_value;
161 void dump(string &out) const override { json11::dump(m_value, out); }
162 };
163
164 class JsonDouble final : public Value<Json::NUMBER, double> {
165 double number_value() const override { return m_value; }
166 int int_value() const override { return static_cast<int>(m_value); }
167 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
168 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
169 public:
170 explicit JsonDouble(double value) : Value(value) {}
171 };
172
173 class JsonInt final : public Value<Json::NUMBER, int> {
174 double number_value() const override { return m_value; }
175 int int_value() const override { return m_value; }
176 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
177 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
178 public:
179 explicit JsonInt(int value) : Value(value) {}
180 };
181
182 class JsonBoolean final : public Value<Json::BOOL, bool> {
183 bool bool_value() const override { return m_value; }
184 public:
185 explicit JsonBoolean(bool value) : Value(value) {}
186 };
187
188 class JsonString final : public Value<Json::STRING, string> {
189 const string &string_value() const override { return m_value; }
190 public:
191 explicit JsonString(const string &value) : Value(value) {}
192 explicit JsonString(string &&value) : Value(move(value)) {}
193 };
194
195 class JsonArray final : public Value<Json::ARRAY, Json::array> {
196 const Json::array &array_items() const override { return m_value; }
197 const Json & operator[](size_t i) const override;
198 public:
199 explicit JsonArray(const Json::array &value) : Value(value) {}
200 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
201 };
202
203 class JsonObject final : public Value<Json::OBJECT, Json::object> {
204 const Json::object &object_items() const override { return m_value; }
205 const Json & operator[](const string &key) const override;
206 public:
207 explicit JsonObject(const Json::object &value) : Value(value) {}
208 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
209 };
210
211 class JsonNull final : public Value<Json::NUL, std::nullptr_t> {
212 public:
213 JsonNull() : Value(nullptr) {}
214 };
215
216 /* * * * * * * * * * * * * * * * * * * *
217 * Static globals - static-init-safe
218 */
219 struct Statics {
220 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
221 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
222 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
223 const string empty_string;
224 const vector<Json> empty_vector;
225 const map<string, Json> empty_map;
226 Statics() {}
227 };
228
229 static const Statics & statics() {
230 static const Statics s {};
231 return s;
232 }
233
234 static const Json & static_null() {
235 // This has to be separate, not in Statics, because Json() accesses statics().null.
236 static const Json json_null;
237 return json_null;
238 }
239
240 /* * * * * * * * * * * * * * * * * * * *
241 * Constructors
242 */
243
244 Json::Json() noexcept : m_ptr(statics().null) {}
245 Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
246 Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
247 Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
248 Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
249 Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
250 Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
251 Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
252 Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
253 Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
254 Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
255 Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
256
257 /* * * * * * * * * * * * * * * * * * * *
258 * Accessors
259 */
260
261 Json::Type Json::type() const { return m_ptr->type(); }
262 double Json::number_value() const { return m_ptr->number_value(); }
263 int Json::int_value() const { return m_ptr->int_value(); }
264 bool Json::bool_value() const { return m_ptr->bool_value(); }
265 const string & Json::string_value() const { return m_ptr->string_value(); }
266 const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
267 const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
268 const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
269 const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
270
271 double JsonValue::number_value() const { return 0; }
272 int JsonValue::int_value() const { return 0; }
273 bool JsonValue::bool_value() const { return false; }
274 const string & JsonValue::string_value() const { return statics().empty_string; }
275 const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
276 const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
277 const Json & JsonValue::operator[] (size_t) const { return static_null(); }
278 const Json & JsonValue::operator[] (const string &) const { return static_null(); }
279
280 const Json & JsonObject::operator[] (const string &key) const {
281 auto iter = m_value.find(key);
282 return (iter == m_value.end()) ? static_null() : iter->second;
283 }
284 const Json & JsonArray::operator[] (size_t i) const {
285 if (i >= m_value.size()) return static_null();
286 else return m_value[i];
287 }
288
289 /* * * * * * * * * * * * * * * * * * * *
290 * Comparison
291 */
292
293 bool Json::operator== (const Json &other) const {
294 if (m_ptr->type() != other.m_ptr->type())
295 return false;
296
297 return m_ptr->equals(other.m_ptr.get());
298 }
299
300 bool Json::operator< (const Json &other) const {
301 if (m_ptr->type() != other.m_ptr->type())
302 return m_ptr->type() < other.m_ptr->type();
303
304 return m_ptr->less(other.m_ptr.get());
305 }
306
307 /* * * * * * * * * * * * * * * * * * * *
308 * Parsing
309 */
310
311 /* esc(c)
312 *
313 * Format char c suitable for printing in an error message.
314 */
315 static inline string esc(char c) {
316 char buf[12];
317 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
318 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
319 } else {
320 snprintf(buf, sizeof buf, "(%d)", c);
321 }
322 return string(buf);
323 }
324
325 static inline bool in_range(long x, long lower, long upper) {
326 return (x >= lower && x <= upper);
327 }
328
329 /* JsonParser
330 *
331 * Object that tracks all state of an in-progress parse.
332 */
333 struct JsonParser {
334
335 /* State
336 */
337 const string &str;
338 size_t i;
339 string &err;
340 bool failed;
341 const JsonParse strategy;
342
343 /* fail(msg, err_ret = Json())
344 *
345 * Mark this parse as failed.
346 */
347 Json fail(string &&msg) {
348 return fail(move(msg), Json());
349 }
350
351 template <typename T>
352 T fail(string &&msg, const T err_ret) {
353 if (!failed)
354 err = std::move(msg);
355 failed = true;
356 return err_ret;
357 }
358
359 /* consume_whitespace()
360 *
361 * Advance until the current character is non-whitespace.
362 */
363 void consume_whitespace() {
364 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
365 i++;
366 }
367
368 /* consume_comment()
369 *
370 * Advance comments (c-style inline and multiline).
371 */
372 bool consume_comment() {
373 bool comment_found = false;
374 if (str[i] == '/') {
375 i++;
376 if (i == str.size())
377 return fail("unexpected end of input inside comment", 0);
378 if (str[i] == '/') { // inline comment
379 i++;
380 if (i == str.size())
381 return fail("unexpected end of input inside inline comment", 0);
382 // advance until next line
383 while (str[i] != '\n') {
384 i++;
385 if (i == str.size())
386 return fail("unexpected end of input inside inline comment", 0);
387 }
388 comment_found = true;
389 }
390 else if (str[i] == '*') { // multiline comment
391 i++;
392 if (i > str.size()-2)
393 return fail("unexpected end of input inside multi-line comment", 0);
394 // advance until closing tokens
395 while (!(str[i] == '*' && str[i+1] == '/')) {
396 i++;
397 if (i > str.size()-2)
398 return fail(
399 "unexpected end of input inside multi-line comment", 0);
400 }
401 i += 2;
402 if (i == str.size())
403 return fail(
404 "unexpected end of input inside multi-line comment", 0);
405 comment_found = true;
406 }
407 else
408 return fail("malformed comment", 0);
409 }
410 return comment_found;
411 }
412
413 /* consume_garbage()
414 *
415 * Advance until the current character is non-whitespace and non-comment.
416 */
417 void consume_garbage() {
418 consume_whitespace();
419 if(strategy == JsonParse::COMMENTS) {
420 bool comment_found = false;
421 do {
422 comment_found = consume_comment();
423 consume_whitespace();
424 }
425 while(comment_found);
426 }
427 }
428
429 /* get_next_token()
430 *
431 * Return the next non-whitespace character. If the end of the input is reached,
432 * flag an error and return 0.
433 */
434 char get_next_token() {
435 consume_garbage();
436 if (i == str.size())
437 return fail("unexpected end of input", 0);
438
439 return str[i++];
440 }
441
442 /* encode_utf8(pt, out)
443 *
444 * Encode pt as UTF-8 and add it to out.
445 */
446 void encode_utf8(long pt, string & out) {
447 if (pt < 0)
448 return;
449
450 if (pt < 0x80) {
451 out += static_cast<char>(pt);
452 } else if (pt < 0x800) {
453 out += static_cast<char>((pt >> 6) | 0xC0);
454 out += static_cast<char>((pt & 0x3F) | 0x80);
455 } else if (pt < 0x10000) {
456 out += static_cast<char>((pt >> 12) | 0xE0);
457 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
458 out += static_cast<char>((pt & 0x3F) | 0x80);
459 } else {
460 out += static_cast<char>((pt >> 18) | 0xF0);
461 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
462 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
463 out += static_cast<char>((pt & 0x3F) | 0x80);
464 }
465 }
466
467 /* parse_string()
468 *
469 * Parse a string, starting at the current position.
470 */
471 string parse_string() {
472 string out;
473 long last_escaped_codepoint = -1;
474 while (true) {
475 if (i == str.size())
476 return fail("unexpected end of input in string", "");
477
478 char ch = str[i++];
479
480 if (ch == '"') {
481 encode_utf8(last_escaped_codepoint, out);
482 return out;
483 }
484
485 if (in_range(ch, 0, 0x1f))
486 return fail("unescaped " + esc(ch) + " in string", "");
487
488 // The usual case: non-escaped characters
489 if (ch != '\\') {
490 encode_utf8(last_escaped_codepoint, out);
491 last_escaped_codepoint = -1;
492 out += ch;
493 continue;
494 }
495
496 // Handle escapes
497 if (i == str.size())
498 return fail("unexpected end of input in string", "");
499
500 ch = str[i++];
501
502 if (ch == 'u') {
503 // Extract 4-byte escape sequence
504 string esc = str.substr(i, 4);
505 // Explicitly check length of the substring. The following loop
506 // relies on std::string returning the terminating NUL when
507 // accessing str[length]. Checking here reduces brittleness.
508 if (esc.length() < 4) {
509 return fail("bad \\u escape: " + esc, "");
510 }
511 for (int j = 0; j < 4; j++) {
512 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
513 && !in_range(esc[j], '0', '9'))
514 return fail("bad \\u escape: " + esc, "");
515 }
516
517 long codepoint = strtol(esc.data(), nullptr, 16);
518
519 // JSON specifies that characters outside the BMP shall be encoded as a pair
520 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
521 // whether we're in the middle of such a beast: the previous codepoint was an
522 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
523 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
524 && in_range(codepoint, 0xDC00, 0xDFFF)) {
525 // Reassemble the two surrogate pairs into one astral-plane character, per
526 // the UTF-16 algorithm.
527 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
528 | (codepoint - 0xDC00)) + 0x10000, out);
529 last_escaped_codepoint = -1;
530 } else {
531 encode_utf8(last_escaped_codepoint, out);
532 last_escaped_codepoint = codepoint;
533 }
534
535 i += 4;
536 continue;
537 }
538
539 encode_utf8(last_escaped_codepoint, out);
540 last_escaped_codepoint = -1;
541
542 if (ch == 'b') {
543 out += '\b';
544 } else if (ch == 'f') {
545 out += '\f';
546 } else if (ch == 'n') {
547 out += '\n';
548 } else if (ch == 'r') {
549 out += '\r';
550 } else if (ch == 't') {
551 out += '\t';
552 } else if (ch == '"' || ch == '\\' || ch == '/') {
553 out += ch;
554 } else {
555 return fail("invalid escape character " + esc(ch), "");
556 }
557 }
558 }
559
560 /* parse_number()
561 *
562 * Parse a double.
563 */
564 Json parse_number() {
565 size_t start_pos = i;
566
567 if (str[i] == '-')
568 i++;
569
570 // Integer part
571 if (str[i] == '0') {
572 i++;
573 if (in_range(str[i], '0', '9'))
574 return fail("leading 0s not permitted in numbers");
575 } else if (in_range(str[i], '1', '9')) {
576 i++;
577 while (in_range(str[i], '0', '9'))
578 i++;
579 } else {
580 return fail("invalid " + esc(str[i]) + " in number");
581 }
582
583 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
584 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
585 return std::atoi(str.c_str() + start_pos);
586 }
587
588 // Decimal part
589 if (str[i] == '.') {
590 i++;
591 if (!in_range(str[i], '0', '9'))
592 return fail("at least one digit required in fractional part");
593
594 while (in_range(str[i], '0', '9'))
595 i++;
596 }
597
598 // Exponent part
599 if (str[i] == 'e' || str[i] == 'E') {
600 i++;
601
602 if (str[i] == '+' || str[i] == '-')
603 i++;
604
605 if (!in_range(str[i], '0', '9'))
606 return fail("at least one digit required in exponent");
607
608 while (in_range(str[i], '0', '9'))
609 i++;
610 }
611
612 return std::strtod(str.c_str() + start_pos, nullptr);
613 }
614
615 /* expect(str, res)
616 *
617 * Expect that 'str' starts at the character that was just read. If it does, advance
618 * the input and return res. If not, flag an error.
619 */
620 Json expect(const string &expected, Json res) {
621 assert(i != 0);
622 i--;
623 if (str.compare(i, expected.length(), expected) == 0) {
624 i += expected.length();
625 return res;
626 } else {
627 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
628 }
629 }
630
631 /* parse_json()
632 *
633 * Parse a JSON object.
634 */
635 Json parse_json(int depth) {
636 if (depth > max_depth) {
637 return fail("exceeded maximum nesting depth");
638 }
639
640 char ch = get_next_token();
641 if (failed)
642 return Json();
643
644 if (ch == '-' || (ch >= '0' && ch <= '9')) {
645 i--;
646 return parse_number();
647 }
648
649 if (ch == 't')
650 return expect("true", true);
651
652 if (ch == 'f')
653 return expect("false", false);
654
655 if (ch == 'n')
656 return expect("null", Json());
657
658 if (ch == '"')
659 return parse_string();
660
661 if (ch == '{') {
662 map<string, Json> data;
663 ch = get_next_token();
664 if (ch == '}')
665 return data;
666
667 while (1) {
668 if (ch != '"')
669 return fail("expected '\"' in object, got " + esc(ch));
670
671 string key = parse_string();
672 if (failed)
673 return Json();
674
675 ch = get_next_token();
676 if (ch != ':')
677 return fail("expected ':' in object, got " + esc(ch));
678
679 data[std::move(key)] = parse_json(depth + 1);
680 if (failed)
681 return Json();
682
683 ch = get_next_token();
684 if (ch == '}')
685 break;
686 if (ch != ',')
687 return fail("expected ',' in object, got " + esc(ch));
688
689 ch = get_next_token();
690 }
691 return data;
692 }
693
694 if (ch == '[') {
695 vector<Json> data;
696 ch = get_next_token();
697 if (ch == ']')
698 return data;
699
700 while (1) {
701 i--;
702 data.push_back(parse_json(depth + 1));
703 if (failed)
704 return Json();
705
706 ch = get_next_token();
707 if (ch == ']')
708 break;
709 if (ch != ',')
710 return fail("expected ',' in list, got " + esc(ch));
711
712 ch = get_next_token();
713 (void)ch;
714 }
715 return data;
716 }
717
718 return fail("expected value, got " + esc(ch));
719 }
720 };
721
722 Json Json::parse(const string &in, string &err, JsonParse strategy) {
723 JsonParser parser { in, 0, err, false, strategy };
724 Json result = parser.parse_json(0);
725
726 // Check for any trailing garbage
727 parser.consume_garbage();
728 if (parser.i != in.size())
729 return parser.fail("unexpected trailing " + esc(in[parser.i]));
730
731 return result;
732 }
733
734 // Documented in json11.hpp
735 vector<Json> Json::parse_multi(const string &in,
736 string &err,
737 JsonParse strategy) {
738 JsonParser parser { in, 0, err, false, strategy };
739
740 vector<Json> json_vec;
741 while (parser.i != in.size() && !parser.failed) {
742 json_vec.push_back(parser.parse_json(0));
743 // Check for another object
744 parser.consume_garbage();
745 }
746 return json_vec;
747 }
748
749 /* * * * * * * * * * * * * * * * * * * *
750 * Shape-checking
751 */
752
753 bool Json::has_shape(const shape & types, string & err) const {
754 if (!is_object()) {
755 err = "expected JSON object, got " + dump();
756 return false;
757 }
758
759 for (auto & item : types) {
760 if ((*this)[item.first].type() != item.second) {
761 err = "bad type for " + item.first + " in " + dump();
762 return false;
763 }
764 }
765
766 return true;
767 }
768
769 } // namespace json11