annotate ext/json11/json11.cpp @ 242:d607ae858682

Update json11 code
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 13 Jun 2017 17:16:03 +0100
parents bf8e3e7dd7de
children de5dc40f1830
rev   line source
cannam@150 1 /* Copyright (c) 2013 Dropbox, Inc.
cannam@150 2 *
cannam@150 3 * Permission is hereby granted, free of charge, to any person obtaining a copy
cannam@150 4 * of this software and associated documentation files (the "Software"), to deal
cannam@150 5 * in the Software without restriction, including without limitation the rights
cannam@150 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
cannam@150 7 * copies of the Software, and to permit persons to whom the Software is
cannam@150 8 * furnished to do so, subject to the following conditions:
cannam@150 9 *
cannam@150 10 * The above copyright notice and this permission notice shall be included in
cannam@150 11 * all copies or substantial portions of the Software.
cannam@150 12 *
cannam@150 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
cannam@150 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
cannam@150 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
cannam@150 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
cannam@150 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
cannam@150 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
cannam@150 19 * THE SOFTWARE.
cannam@150 20 */
cannam@150 21
cannam@150 22 #include "json11.hpp"
cannam@150 23 #include <cassert>
cannam@150 24 #include <cmath>
cannam@150 25 #include <cstdlib>
cannam@150 26 #include <cstdio>
cannam@150 27 #include <limits>
cannam@150 28
cannam@150 29 namespace json11 {
cannam@150 30
cannam@150 31 static const int max_depth = 200;
cannam@150 32
cannam@150 33 using std::string;
cannam@150 34 using std::vector;
cannam@150 35 using std::map;
cannam@150 36 using std::make_shared;
cannam@150 37 using std::initializer_list;
cannam@150 38 using std::move;
cannam@150 39
cannam@242 40 /* Helper for representing null - just a do-nothing struct, plus comparison
cannam@242 41 * operators so the helpers in JsonValue work. We can't use nullptr_t because
cannam@242 42 * it may not be orderable.
cannam@242 43 */
cannam@242 44 struct NullStruct {
cannam@242 45 bool operator==(NullStruct) const { return true; }
cannam@242 46 bool operator<(NullStruct) const { return false; }
cannam@242 47 };
cannam@242 48
cannam@150 49 /* * * * * * * * * * * * * * * * * * * *
cannam@150 50 * Serialization
cannam@150 51 */
cannam@150 52
cannam@242 53 static void dump(NullStruct, string &out) {
cannam@150 54 out += "null";
cannam@150 55 }
cannam@150 56
cannam@150 57 static void dump(double value, string &out) {
cannam@150 58 if (std::isfinite(value)) {
cannam@150 59 char buf[32];
cannam@150 60 snprintf(buf, sizeof buf, "%.17g", value);
cannam@150 61 out += buf;
cannam@150 62 } else {
cannam@150 63 out += "null";
cannam@150 64 }
cannam@150 65 }
cannam@150 66
cannam@150 67 static void dump(int value, string &out) {
cannam@150 68 char buf[32];
cannam@150 69 snprintf(buf, sizeof buf, "%d", value);
cannam@150 70 out += buf;
cannam@150 71 }
cannam@150 72
cannam@150 73 static void dump(bool value, string &out) {
cannam@150 74 out += value ? "true" : "false";
cannam@150 75 }
cannam@150 76
cannam@150 77 static void dump(const string &value, string &out) {
cannam@150 78 out += '"';
cannam@150 79 for (size_t i = 0; i < value.length(); i++) {
cannam@150 80 const char ch = value[i];
cannam@150 81 if (ch == '\\') {
cannam@150 82 out += "\\\\";
cannam@150 83 } else if (ch == '"') {
cannam@150 84 out += "\\\"";
cannam@150 85 } else if (ch == '\b') {
cannam@150 86 out += "\\b";
cannam@150 87 } else if (ch == '\f') {
cannam@150 88 out += "\\f";
cannam@150 89 } else if (ch == '\n') {
cannam@150 90 out += "\\n";
cannam@150 91 } else if (ch == '\r') {
cannam@150 92 out += "\\r";
cannam@150 93 } else if (ch == '\t') {
cannam@150 94 out += "\\t";
cannam@150 95 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
cannam@150 96 char buf[8];
cannam@150 97 snprintf(buf, sizeof buf, "\\u%04x", ch);
cannam@150 98 out += buf;
cannam@150 99 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
cannam@150 100 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
cannam@150 101 out += "\\u2028";
cannam@150 102 i += 2;
cannam@150 103 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
cannam@150 104 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
cannam@150 105 out += "\\u2029";
cannam@150 106 i += 2;
cannam@150 107 } else {
cannam@150 108 out += ch;
cannam@150 109 }
cannam@150 110 }
cannam@150 111 out += '"';
cannam@150 112 }
cannam@150 113
cannam@150 114 static void dump(const Json::array &values, string &out) {
cannam@150 115 bool first = true;
cannam@150 116 out += "[";
cannam@150 117 for (const auto &value : values) {
cannam@150 118 if (!first)
cannam@150 119 out += ", ";
cannam@150 120 value.dump(out);
cannam@150 121 first = false;
cannam@150 122 }
cannam@150 123 out += "]";
cannam@150 124 }
cannam@150 125
cannam@150 126 static void dump(const Json::object &values, string &out) {
cannam@150 127 bool first = true;
cannam@150 128 out += "{";
cannam@150 129 for (const auto &kv : values) {
cannam@150 130 if (!first)
cannam@150 131 out += ", ";
cannam@150 132 dump(kv.first, out);
cannam@150 133 out += ": ";
cannam@150 134 kv.second.dump(out);
cannam@150 135 first = false;
cannam@150 136 }
cannam@150 137 out += "}";
cannam@150 138 }
cannam@150 139
cannam@150 140 void Json::dump(string &out) const {
cannam@150 141 m_ptr->dump(out);
cannam@150 142 }
cannam@150 143
cannam@150 144 /* * * * * * * * * * * * * * * * * * * *
cannam@150 145 * Value wrappers
cannam@150 146 */
cannam@150 147
cannam@150 148 template <Json::Type tag, typename T>
cannam@150 149 class Value : public JsonValue {
cannam@150 150 protected:
cannam@150 151
cannam@150 152 // Constructors
cannam@150 153 explicit Value(const T &value) : m_value(value) {}
cannam@150 154 explicit Value(T &&value) : m_value(move(value)) {}
cannam@150 155
cannam@150 156 // Get type tag
cannam@150 157 Json::Type type() const override {
cannam@150 158 return tag;
cannam@150 159 }
cannam@150 160
cannam@150 161 // Comparisons
cannam@150 162 bool equals(const JsonValue * other) const override {
cannam@150 163 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
cannam@150 164 }
cannam@150 165 bool less(const JsonValue * other) const override {
cannam@150 166 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
cannam@150 167 }
cannam@150 168
cannam@150 169 const T m_value;
cannam@150 170 void dump(string &out) const override { json11::dump(m_value, out); }
cannam@150 171 };
cannam@150 172
cannam@150 173 class JsonDouble final : public Value<Json::NUMBER, double> {
cannam@150 174 double number_value() const override { return m_value; }
cannam@150 175 int int_value() const override { return static_cast<int>(m_value); }
cannam@150 176 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
cannam@150 177 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
cannam@150 178 public:
cannam@150 179 explicit JsonDouble(double value) : Value(value) {}
cannam@150 180 };
cannam@150 181
cannam@150 182 class JsonInt final : public Value<Json::NUMBER, int> {
cannam@150 183 double number_value() const override { return m_value; }
cannam@150 184 int int_value() const override { return m_value; }
cannam@150 185 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
cannam@150 186 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
cannam@150 187 public:
cannam@150 188 explicit JsonInt(int value) : Value(value) {}
cannam@150 189 };
cannam@150 190
cannam@150 191 class JsonBoolean final : public Value<Json::BOOL, bool> {
cannam@150 192 bool bool_value() const override { return m_value; }
cannam@150 193 public:
cannam@150 194 explicit JsonBoolean(bool value) : Value(value) {}
cannam@150 195 };
cannam@150 196
cannam@150 197 class JsonString final : public Value<Json::STRING, string> {
cannam@150 198 const string &string_value() const override { return m_value; }
cannam@150 199 public:
cannam@150 200 explicit JsonString(const string &value) : Value(value) {}
cannam@150 201 explicit JsonString(string &&value) : Value(move(value)) {}
cannam@150 202 };
cannam@150 203
cannam@150 204 class JsonArray final : public Value<Json::ARRAY, Json::array> {
cannam@150 205 const Json::array &array_items() const override { return m_value; }
cannam@150 206 const Json & operator[](size_t i) const override;
cannam@150 207 public:
cannam@150 208 explicit JsonArray(const Json::array &value) : Value(value) {}
cannam@150 209 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
cannam@150 210 };
cannam@150 211
cannam@150 212 class JsonObject final : public Value<Json::OBJECT, Json::object> {
cannam@150 213 const Json::object &object_items() const override { return m_value; }
cannam@150 214 const Json & operator[](const string &key) const override;
cannam@150 215 public:
cannam@150 216 explicit JsonObject(const Json::object &value) : Value(value) {}
cannam@150 217 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
cannam@150 218 };
cannam@150 219
cannam@242 220 class JsonNull final : public Value<Json::NUL, NullStruct> {
cannam@150 221 public:
cannam@242 222 JsonNull() : Value({}) {}
cannam@150 223 };
cannam@150 224
cannam@150 225 /* * * * * * * * * * * * * * * * * * * *
cannam@150 226 * Static globals - static-init-safe
cannam@150 227 */
cannam@150 228 struct Statics {
cannam@150 229 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
cannam@150 230 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
cannam@150 231 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
cannam@150 232 const string empty_string;
cannam@150 233 const vector<Json> empty_vector;
cannam@150 234 const map<string, Json> empty_map;
cannam@150 235 Statics() {}
cannam@150 236 };
cannam@150 237
cannam@150 238 static const Statics & statics() {
cannam@150 239 static const Statics s {};
cannam@150 240 return s;
cannam@150 241 }
cannam@150 242
cannam@150 243 static const Json & static_null() {
cannam@150 244 // This has to be separate, not in Statics, because Json() accesses statics().null.
cannam@150 245 static const Json json_null;
cannam@150 246 return json_null;
cannam@150 247 }
cannam@150 248
cannam@150 249 /* * * * * * * * * * * * * * * * * * * *
cannam@150 250 * Constructors
cannam@150 251 */
cannam@150 252
cannam@150 253 Json::Json() noexcept : m_ptr(statics().null) {}
cannam@150 254 Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
cannam@150 255 Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
cannam@150 256 Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
cannam@150 257 Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
cannam@150 258 Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
cannam@150 259 Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
cannam@150 260 Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
cannam@150 261 Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
cannam@150 262 Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
cannam@150 263 Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
cannam@150 264 Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
cannam@150 265
cannam@150 266 /* * * * * * * * * * * * * * * * * * * *
cannam@150 267 * Accessors
cannam@150 268 */
cannam@150 269
cannam@150 270 Json::Type Json::type() const { return m_ptr->type(); }
cannam@150 271 double Json::number_value() const { return m_ptr->number_value(); }
cannam@150 272 int Json::int_value() const { return m_ptr->int_value(); }
cannam@150 273 bool Json::bool_value() const { return m_ptr->bool_value(); }
cannam@150 274 const string & Json::string_value() const { return m_ptr->string_value(); }
cannam@150 275 const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
cannam@150 276 const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
cannam@150 277 const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
cannam@150 278 const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
cannam@150 279
cannam@150 280 double JsonValue::number_value() const { return 0; }
cannam@150 281 int JsonValue::int_value() const { return 0; }
cannam@150 282 bool JsonValue::bool_value() const { return false; }
cannam@150 283 const string & JsonValue::string_value() const { return statics().empty_string; }
cannam@150 284 const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
cannam@150 285 const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
cannam@150 286 const Json & JsonValue::operator[] (size_t) const { return static_null(); }
cannam@150 287 const Json & JsonValue::operator[] (const string &) const { return static_null(); }
cannam@150 288
cannam@150 289 const Json & JsonObject::operator[] (const string &key) const {
cannam@150 290 auto iter = m_value.find(key);
cannam@150 291 return (iter == m_value.end()) ? static_null() : iter->second;
cannam@150 292 }
cannam@150 293 const Json & JsonArray::operator[] (size_t i) const {
cannam@150 294 if (i >= m_value.size()) return static_null();
cannam@150 295 else return m_value[i];
cannam@150 296 }
cannam@150 297
cannam@150 298 /* * * * * * * * * * * * * * * * * * * *
cannam@150 299 * Comparison
cannam@150 300 */
cannam@150 301
cannam@150 302 bool Json::operator== (const Json &other) const {
cannam@242 303 if (m_ptr == other.m_ptr)
cannam@242 304 return true;
cannam@150 305 if (m_ptr->type() != other.m_ptr->type())
cannam@150 306 return false;
cannam@150 307
cannam@150 308 return m_ptr->equals(other.m_ptr.get());
cannam@150 309 }
cannam@150 310
cannam@150 311 bool Json::operator< (const Json &other) const {
cannam@242 312 if (m_ptr == other.m_ptr)
cannam@242 313 return false;
cannam@150 314 if (m_ptr->type() != other.m_ptr->type())
cannam@150 315 return m_ptr->type() < other.m_ptr->type();
cannam@150 316
cannam@150 317 return m_ptr->less(other.m_ptr.get());
cannam@150 318 }
cannam@150 319
cannam@150 320 /* * * * * * * * * * * * * * * * * * * *
cannam@150 321 * Parsing
cannam@150 322 */
cannam@150 323
cannam@150 324 /* esc(c)
cannam@150 325 *
cannam@150 326 * Format char c suitable for printing in an error message.
cannam@150 327 */
cannam@150 328 static inline string esc(char c) {
cannam@150 329 char buf[12];
cannam@150 330 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
cannam@150 331 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
cannam@150 332 } else {
cannam@150 333 snprintf(buf, sizeof buf, "(%d)", c);
cannam@150 334 }
cannam@150 335 return string(buf);
cannam@150 336 }
cannam@150 337
cannam@150 338 static inline bool in_range(long x, long lower, long upper) {
cannam@150 339 return (x >= lower && x <= upper);
cannam@150 340 }
cannam@150 341
cannam@242 342 namespace {
cannam@150 343 /* JsonParser
cannam@150 344 *
cannam@150 345 * Object that tracks all state of an in-progress parse.
cannam@150 346 */
cannam@242 347 struct JsonParser final {
cannam@150 348
cannam@150 349 /* State
cannam@150 350 */
cannam@150 351 const string &str;
cannam@150 352 size_t i;
cannam@150 353 string &err;
cannam@150 354 bool failed;
cannam@150 355 const JsonParse strategy;
cannam@150 356
cannam@150 357 /* fail(msg, err_ret = Json())
cannam@150 358 *
cannam@150 359 * Mark this parse as failed.
cannam@150 360 */
cannam@150 361 Json fail(string &&msg) {
cannam@150 362 return fail(move(msg), Json());
cannam@150 363 }
cannam@150 364
cannam@150 365 template <typename T>
cannam@150 366 T fail(string &&msg, const T err_ret) {
cannam@150 367 if (!failed)
cannam@150 368 err = std::move(msg);
cannam@150 369 failed = true;
cannam@150 370 return err_ret;
cannam@150 371 }
cannam@150 372
cannam@150 373 /* consume_whitespace()
cannam@150 374 *
cannam@150 375 * Advance until the current character is non-whitespace.
cannam@150 376 */
cannam@150 377 void consume_whitespace() {
cannam@150 378 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
cannam@150 379 i++;
cannam@150 380 }
cannam@150 381
cannam@150 382 /* consume_comment()
cannam@150 383 *
cannam@150 384 * Advance comments (c-style inline and multiline).
cannam@150 385 */
cannam@150 386 bool consume_comment() {
cannam@150 387 bool comment_found = false;
cannam@150 388 if (str[i] == '/') {
cannam@150 389 i++;
cannam@150 390 if (i == str.size())
cannam@242 391 return fail("unexpected end of input after start of comment", false);
cannam@150 392 if (str[i] == '/') { // inline comment
cannam@150 393 i++;
cannam@242 394 // advance until next line, or end of input
cannam@242 395 while (i < str.size() && str[i] != '\n') {
cannam@150 396 i++;
cannam@150 397 }
cannam@150 398 comment_found = true;
cannam@150 399 }
cannam@150 400 else if (str[i] == '*') { // multiline comment
cannam@150 401 i++;
cannam@150 402 if (i > str.size()-2)
cannam@242 403 return fail("unexpected end of input inside multi-line comment", false);
cannam@150 404 // advance until closing tokens
cannam@150 405 while (!(str[i] == '*' && str[i+1] == '/')) {
cannam@150 406 i++;
cannam@150 407 if (i > str.size()-2)
cannam@150 408 return fail(
cannam@242 409 "unexpected end of input inside multi-line comment", false);
cannam@150 410 }
cannam@150 411 i += 2;
cannam@150 412 comment_found = true;
cannam@150 413 }
cannam@150 414 else
cannam@242 415 return fail("malformed comment", false);
cannam@150 416 }
cannam@150 417 return comment_found;
cannam@150 418 }
cannam@150 419
cannam@150 420 /* consume_garbage()
cannam@150 421 *
cannam@150 422 * Advance until the current character is non-whitespace and non-comment.
cannam@150 423 */
cannam@150 424 void consume_garbage() {
cannam@150 425 consume_whitespace();
cannam@150 426 if(strategy == JsonParse::COMMENTS) {
cannam@150 427 bool comment_found = false;
cannam@150 428 do {
cannam@150 429 comment_found = consume_comment();
cannam@242 430 if (failed) return;
cannam@150 431 consume_whitespace();
cannam@150 432 }
cannam@150 433 while(comment_found);
cannam@150 434 }
cannam@150 435 }
cannam@150 436
cannam@150 437 /* get_next_token()
cannam@150 438 *
cannam@150 439 * Return the next non-whitespace character. If the end of the input is reached,
cannam@150 440 * flag an error and return 0.
cannam@150 441 */
cannam@150 442 char get_next_token() {
cannam@150 443 consume_garbage();
cannam@242 444 if (failed) return (char)0;
cannam@150 445 if (i == str.size())
cannam@242 446 return fail("unexpected end of input", (char)0);
cannam@150 447
cannam@150 448 return str[i++];
cannam@150 449 }
cannam@150 450
cannam@150 451 /* encode_utf8(pt, out)
cannam@150 452 *
cannam@150 453 * Encode pt as UTF-8 and add it to out.
cannam@150 454 */
cannam@150 455 void encode_utf8(long pt, string & out) {
cannam@150 456 if (pt < 0)
cannam@150 457 return;
cannam@150 458
cannam@150 459 if (pt < 0x80) {
cannam@150 460 out += static_cast<char>(pt);
cannam@150 461 } else if (pt < 0x800) {
cannam@150 462 out += static_cast<char>((pt >> 6) | 0xC0);
cannam@150 463 out += static_cast<char>((pt & 0x3F) | 0x80);
cannam@150 464 } else if (pt < 0x10000) {
cannam@150 465 out += static_cast<char>((pt >> 12) | 0xE0);
cannam@150 466 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
cannam@150 467 out += static_cast<char>((pt & 0x3F) | 0x80);
cannam@150 468 } else {
cannam@150 469 out += static_cast<char>((pt >> 18) | 0xF0);
cannam@150 470 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
cannam@150 471 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
cannam@150 472 out += static_cast<char>((pt & 0x3F) | 0x80);
cannam@150 473 }
cannam@150 474 }
cannam@150 475
cannam@150 476 /* parse_string()
cannam@150 477 *
cannam@150 478 * Parse a string, starting at the current position.
cannam@150 479 */
cannam@150 480 string parse_string() {
cannam@150 481 string out;
cannam@150 482 long last_escaped_codepoint = -1;
cannam@150 483 while (true) {
cannam@150 484 if (i == str.size())
cannam@150 485 return fail("unexpected end of input in string", "");
cannam@150 486
cannam@150 487 char ch = str[i++];
cannam@150 488
cannam@150 489 if (ch == '"') {
cannam@150 490 encode_utf8(last_escaped_codepoint, out);
cannam@150 491 return out;
cannam@150 492 }
cannam@150 493
cannam@150 494 if (in_range(ch, 0, 0x1f))
cannam@150 495 return fail("unescaped " + esc(ch) + " in string", "");
cannam@150 496
cannam@150 497 // The usual case: non-escaped characters
cannam@150 498 if (ch != '\\') {
cannam@150 499 encode_utf8(last_escaped_codepoint, out);
cannam@150 500 last_escaped_codepoint = -1;
cannam@150 501 out += ch;
cannam@150 502 continue;
cannam@150 503 }
cannam@150 504
cannam@150 505 // Handle escapes
cannam@150 506 if (i == str.size())
cannam@150 507 return fail("unexpected end of input in string", "");
cannam@150 508
cannam@150 509 ch = str[i++];
cannam@150 510
cannam@150 511 if (ch == 'u') {
cannam@150 512 // Extract 4-byte escape sequence
cannam@150 513 string esc = str.substr(i, 4);
cannam@150 514 // Explicitly check length of the substring. The following loop
cannam@150 515 // relies on std::string returning the terminating NUL when
cannam@150 516 // accessing str[length]. Checking here reduces brittleness.
cannam@150 517 if (esc.length() < 4) {
cannam@150 518 return fail("bad \\u escape: " + esc, "");
cannam@150 519 }
cannam@242 520 for (size_t j = 0; j < 4; j++) {
cannam@150 521 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
cannam@150 522 && !in_range(esc[j], '0', '9'))
cannam@150 523 return fail("bad \\u escape: " + esc, "");
cannam@150 524 }
cannam@150 525
cannam@150 526 long codepoint = strtol(esc.data(), nullptr, 16);
cannam@150 527
cannam@150 528 // JSON specifies that characters outside the BMP shall be encoded as a pair
cannam@150 529 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
cannam@150 530 // whether we're in the middle of such a beast: the previous codepoint was an
cannam@150 531 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
cannam@150 532 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
cannam@150 533 && in_range(codepoint, 0xDC00, 0xDFFF)) {
cannam@150 534 // Reassemble the two surrogate pairs into one astral-plane character, per
cannam@150 535 // the UTF-16 algorithm.
cannam@150 536 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
cannam@150 537 | (codepoint - 0xDC00)) + 0x10000, out);
cannam@150 538 last_escaped_codepoint = -1;
cannam@150 539 } else {
cannam@150 540 encode_utf8(last_escaped_codepoint, out);
cannam@150 541 last_escaped_codepoint = codepoint;
cannam@150 542 }
cannam@150 543
cannam@150 544 i += 4;
cannam@150 545 continue;
cannam@150 546 }
cannam@150 547
cannam@150 548 encode_utf8(last_escaped_codepoint, out);
cannam@150 549 last_escaped_codepoint = -1;
cannam@150 550
cannam@150 551 if (ch == 'b') {
cannam@150 552 out += '\b';
cannam@150 553 } else if (ch == 'f') {
cannam@150 554 out += '\f';
cannam@150 555 } else if (ch == 'n') {
cannam@150 556 out += '\n';
cannam@150 557 } else if (ch == 'r') {
cannam@150 558 out += '\r';
cannam@150 559 } else if (ch == 't') {
cannam@150 560 out += '\t';
cannam@150 561 } else if (ch == '"' || ch == '\\' || ch == '/') {
cannam@150 562 out += ch;
cannam@150 563 } else {
cannam@150 564 return fail("invalid escape character " + esc(ch), "");
cannam@150 565 }
cannam@150 566 }
cannam@150 567 }
cannam@150 568
cannam@150 569 /* parse_number()
cannam@150 570 *
cannam@150 571 * Parse a double.
cannam@150 572 */
cannam@150 573 Json parse_number() {
cannam@150 574 size_t start_pos = i;
cannam@150 575
cannam@150 576 if (str[i] == '-')
cannam@150 577 i++;
cannam@150 578
cannam@150 579 // Integer part
cannam@150 580 if (str[i] == '0') {
cannam@150 581 i++;
cannam@150 582 if (in_range(str[i], '0', '9'))
cannam@150 583 return fail("leading 0s not permitted in numbers");
cannam@150 584 } else if (in_range(str[i], '1', '9')) {
cannam@150 585 i++;
cannam@150 586 while (in_range(str[i], '0', '9'))
cannam@150 587 i++;
cannam@150 588 } else {
cannam@150 589 return fail("invalid " + esc(str[i]) + " in number");
cannam@150 590 }
cannam@150 591
cannam@150 592 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
cannam@150 593 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
cannam@150 594 return std::atoi(str.c_str() + start_pos);
cannam@150 595 }
cannam@150 596
cannam@150 597 // Decimal part
cannam@150 598 if (str[i] == '.') {
cannam@150 599 i++;
cannam@150 600 if (!in_range(str[i], '0', '9'))
cannam@150 601 return fail("at least one digit required in fractional part");
cannam@150 602
cannam@150 603 while (in_range(str[i], '0', '9'))
cannam@150 604 i++;
cannam@150 605 }
cannam@150 606
cannam@150 607 // Exponent part
cannam@150 608 if (str[i] == 'e' || str[i] == 'E') {
cannam@150 609 i++;
cannam@150 610
cannam@150 611 if (str[i] == '+' || str[i] == '-')
cannam@150 612 i++;
cannam@150 613
cannam@150 614 if (!in_range(str[i], '0', '9'))
cannam@150 615 return fail("at least one digit required in exponent");
cannam@150 616
cannam@150 617 while (in_range(str[i], '0', '9'))
cannam@150 618 i++;
cannam@150 619 }
cannam@150 620
cannam@150 621 return std::strtod(str.c_str() + start_pos, nullptr);
cannam@150 622 }
cannam@150 623
cannam@150 624 /* expect(str, res)
cannam@150 625 *
cannam@150 626 * Expect that 'str' starts at the character that was just read. If it does, advance
cannam@150 627 * the input and return res. If not, flag an error.
cannam@150 628 */
cannam@150 629 Json expect(const string &expected, Json res) {
cannam@150 630 assert(i != 0);
cannam@150 631 i--;
cannam@150 632 if (str.compare(i, expected.length(), expected) == 0) {
cannam@150 633 i += expected.length();
cannam@150 634 return res;
cannam@150 635 } else {
cannam@150 636 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
cannam@150 637 }
cannam@150 638 }
cannam@150 639
cannam@150 640 /* parse_json()
cannam@150 641 *
cannam@150 642 * Parse a JSON object.
cannam@150 643 */
cannam@150 644 Json parse_json(int depth) {
cannam@150 645 if (depth > max_depth) {
cannam@150 646 return fail("exceeded maximum nesting depth");
cannam@150 647 }
cannam@150 648
cannam@150 649 char ch = get_next_token();
cannam@150 650 if (failed)
cannam@150 651 return Json();
cannam@150 652
cannam@150 653 if (ch == '-' || (ch >= '0' && ch <= '9')) {
cannam@150 654 i--;
cannam@150 655 return parse_number();
cannam@150 656 }
cannam@150 657
cannam@150 658 if (ch == 't')
cannam@150 659 return expect("true", true);
cannam@150 660
cannam@150 661 if (ch == 'f')
cannam@150 662 return expect("false", false);
cannam@150 663
cannam@150 664 if (ch == 'n')
cannam@150 665 return expect("null", Json());
cannam@150 666
cannam@150 667 if (ch == '"')
cannam@150 668 return parse_string();
cannam@150 669
cannam@150 670 if (ch == '{') {
cannam@150 671 map<string, Json> data;
cannam@150 672 ch = get_next_token();
cannam@150 673 if (ch == '}')
cannam@150 674 return data;
cannam@150 675
cannam@150 676 while (1) {
cannam@150 677 if (ch != '"')
cannam@150 678 return fail("expected '\"' in object, got " + esc(ch));
cannam@150 679
cannam@150 680 string key = parse_string();
cannam@150 681 if (failed)
cannam@150 682 return Json();
cannam@150 683
cannam@150 684 ch = get_next_token();
cannam@150 685 if (ch != ':')
cannam@150 686 return fail("expected ':' in object, got " + esc(ch));
cannam@150 687
cannam@150 688 data[std::move(key)] = parse_json(depth + 1);
cannam@150 689 if (failed)
cannam@150 690 return Json();
cannam@150 691
cannam@150 692 ch = get_next_token();
cannam@150 693 if (ch == '}')
cannam@150 694 break;
cannam@150 695 if (ch != ',')
cannam@150 696 return fail("expected ',' in object, got " + esc(ch));
cannam@150 697
cannam@150 698 ch = get_next_token();
cannam@150 699 }
cannam@150 700 return data;
cannam@150 701 }
cannam@150 702
cannam@150 703 if (ch == '[') {
cannam@150 704 vector<Json> data;
cannam@150 705 ch = get_next_token();
cannam@150 706 if (ch == ']')
cannam@150 707 return data;
cannam@150 708
cannam@150 709 while (1) {
cannam@150 710 i--;
cannam@150 711 data.push_back(parse_json(depth + 1));
cannam@150 712 if (failed)
cannam@150 713 return Json();
cannam@150 714
cannam@150 715 ch = get_next_token();
cannam@150 716 if (ch == ']')
cannam@150 717 break;
cannam@150 718 if (ch != ',')
cannam@150 719 return fail("expected ',' in list, got " + esc(ch));
cannam@150 720
cannam@150 721 ch = get_next_token();
cannam@150 722 (void)ch;
cannam@150 723 }
cannam@150 724 return data;
cannam@150 725 }
cannam@150 726
cannam@150 727 return fail("expected value, got " + esc(ch));
cannam@150 728 }
cannam@150 729 };
cannam@242 730 }//namespace {
cannam@150 731
cannam@150 732 Json Json::parse(const string &in, string &err, JsonParse strategy) {
cannam@150 733 JsonParser parser { in, 0, err, false, strategy };
cannam@150 734 Json result = parser.parse_json(0);
cannam@150 735
cannam@150 736 // Check for any trailing garbage
cannam@150 737 parser.consume_garbage();
cannam@242 738 if (parser.failed)
cannam@242 739 return Json();
cannam@150 740 if (parser.i != in.size())
cannam@150 741 return parser.fail("unexpected trailing " + esc(in[parser.i]));
cannam@150 742
cannam@150 743 return result;
cannam@150 744 }
cannam@150 745
cannam@150 746 // Documented in json11.hpp
cannam@150 747 vector<Json> Json::parse_multi(const string &in,
cannam@242 748 std::string::size_type &parser_stop_pos,
cannam@150 749 string &err,
cannam@150 750 JsonParse strategy) {
cannam@150 751 JsonParser parser { in, 0, err, false, strategy };
cannam@242 752 parser_stop_pos = 0;
cannam@150 753 vector<Json> json_vec;
cannam@150 754 while (parser.i != in.size() && !parser.failed) {
cannam@150 755 json_vec.push_back(parser.parse_json(0));
cannam@242 756 if (parser.failed)
cannam@242 757 break;
cannam@242 758
cannam@150 759 // Check for another object
cannam@150 760 parser.consume_garbage();
cannam@242 761 if (parser.failed)
cannam@242 762 break;
cannam@242 763 parser_stop_pos = parser.i;
cannam@150 764 }
cannam@150 765 return json_vec;
cannam@150 766 }
cannam@150 767
cannam@150 768 /* * * * * * * * * * * * * * * * * * * *
cannam@150 769 * Shape-checking
cannam@150 770 */
cannam@150 771
cannam@150 772 bool Json::has_shape(const shape & types, string & err) const {
cannam@150 773 if (!is_object()) {
cannam@150 774 err = "expected JSON object, got " + dump();
cannam@150 775 return false;
cannam@150 776 }
cannam@150 777
cannam@150 778 for (auto & item : types) {
cannam@150 779 if ((*this)[item.first].type() != item.second) {
cannam@150 780 err = "bad type for " + item.first + " in " + dump();
cannam@150 781 return false;
cannam@150 782 }
cannam@150 783 }
cannam@150 784
cannam@150 785 return true;
cannam@150 786 }
cannam@150 787
cannam@150 788 } // namespace json11