cannam@150
|
1 /* Copyright (c) 2013 Dropbox, Inc.
|
cannam@150
|
2 *
|
cannam@150
|
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
|
cannam@150
|
4 * of this software and associated documentation files (the "Software"), to deal
|
cannam@150
|
5 * in the Software without restriction, including without limitation the rights
|
cannam@150
|
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
cannam@150
|
7 * copies of the Software, and to permit persons to whom the Software is
|
cannam@150
|
8 * furnished to do so, subject to the following conditions:
|
cannam@150
|
9 *
|
cannam@150
|
10 * The above copyright notice and this permission notice shall be included in
|
cannam@150
|
11 * all copies or substantial portions of the Software.
|
cannam@150
|
12 *
|
cannam@150
|
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
cannam@150
|
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
cannam@150
|
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
cannam@150
|
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
cannam@150
|
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
cannam@150
|
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
cannam@150
|
19 * THE SOFTWARE.
|
cannam@150
|
20 */
|
cannam@150
|
21
|
cannam@150
|
22 #include "json11.hpp"
|
cannam@150
|
23 #include <cassert>
|
cannam@150
|
24 #include <cmath>
|
cannam@150
|
25 #include <cstdlib>
|
cannam@150
|
26 #include <cstdio>
|
cannam@150
|
27 #include <limits>
|
cannam@150
|
28
|
cannam@150
|
29 namespace json11 {
|
cannam@150
|
30
|
cannam@150
|
31 static const int max_depth = 200;
|
cannam@150
|
32
|
cannam@150
|
33 using std::string;
|
cannam@150
|
34 using std::vector;
|
cannam@150
|
35 using std::map;
|
cannam@150
|
36 using std::make_shared;
|
cannam@150
|
37 using std::initializer_list;
|
cannam@150
|
38 using std::move;
|
cannam@150
|
39
|
cannam@242
|
40 /* Helper for representing null - just a do-nothing struct, plus comparison
|
cannam@242
|
41 * operators so the helpers in JsonValue work. We can't use nullptr_t because
|
cannam@242
|
42 * it may not be orderable.
|
cannam@242
|
43 */
|
cannam@242
|
44 struct NullStruct {
|
cannam@242
|
45 bool operator==(NullStruct) const { return true; }
|
cannam@242
|
46 bool operator<(NullStruct) const { return false; }
|
cannam@242
|
47 };
|
cannam@242
|
48
|
cannam@150
|
49 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
50 * Serialization
|
cannam@150
|
51 */
|
cannam@150
|
52
|
cannam@242
|
53 static void dump(NullStruct, string &out) {
|
cannam@150
|
54 out += "null";
|
cannam@150
|
55 }
|
cannam@150
|
56
|
cannam@150
|
57 static void dump(double value, string &out) {
|
cannam@150
|
58 if (std::isfinite(value)) {
|
cannam@150
|
59 char buf[32];
|
cannam@150
|
60 snprintf(buf, sizeof buf, "%.17g", value);
|
cannam@150
|
61 out += buf;
|
cannam@150
|
62 } else {
|
cannam@150
|
63 out += "null";
|
cannam@150
|
64 }
|
cannam@150
|
65 }
|
cannam@150
|
66
|
cannam@150
|
67 static void dump(int value, string &out) {
|
cannam@150
|
68 char buf[32];
|
cannam@150
|
69 snprintf(buf, sizeof buf, "%d", value);
|
cannam@150
|
70 out += buf;
|
cannam@150
|
71 }
|
cannam@150
|
72
|
cannam@150
|
73 static void dump(bool value, string &out) {
|
cannam@150
|
74 out += value ? "true" : "false";
|
cannam@150
|
75 }
|
cannam@150
|
76
|
cannam@150
|
77 static void dump(const string &value, string &out) {
|
cannam@150
|
78 out += '"';
|
cannam@150
|
79 for (size_t i = 0; i < value.length(); i++) {
|
cannam@150
|
80 const char ch = value[i];
|
cannam@150
|
81 if (ch == '\\') {
|
cannam@150
|
82 out += "\\\\";
|
cannam@150
|
83 } else if (ch == '"') {
|
cannam@150
|
84 out += "\\\"";
|
cannam@150
|
85 } else if (ch == '\b') {
|
cannam@150
|
86 out += "\\b";
|
cannam@150
|
87 } else if (ch == '\f') {
|
cannam@150
|
88 out += "\\f";
|
cannam@150
|
89 } else if (ch == '\n') {
|
cannam@150
|
90 out += "\\n";
|
cannam@150
|
91 } else if (ch == '\r') {
|
cannam@150
|
92 out += "\\r";
|
cannam@150
|
93 } else if (ch == '\t') {
|
cannam@150
|
94 out += "\\t";
|
cannam@150
|
95 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
|
cannam@150
|
96 char buf[8];
|
cannam@150
|
97 snprintf(buf, sizeof buf, "\\u%04x", ch);
|
cannam@150
|
98 out += buf;
|
cannam@150
|
99 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
100 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
|
cannam@150
|
101 out += "\\u2028";
|
cannam@150
|
102 i += 2;
|
cannam@150
|
103 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
104 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
|
cannam@150
|
105 out += "\\u2029";
|
cannam@150
|
106 i += 2;
|
cannam@150
|
107 } else {
|
cannam@150
|
108 out += ch;
|
cannam@150
|
109 }
|
cannam@150
|
110 }
|
cannam@150
|
111 out += '"';
|
cannam@150
|
112 }
|
cannam@150
|
113
|
cannam@150
|
114 static void dump(const Json::array &values, string &out) {
|
cannam@150
|
115 bool first = true;
|
cannam@150
|
116 out += "[";
|
cannam@150
|
117 for (const auto &value : values) {
|
cannam@150
|
118 if (!first)
|
cannam@150
|
119 out += ", ";
|
cannam@150
|
120 value.dump(out);
|
cannam@150
|
121 first = false;
|
cannam@150
|
122 }
|
cannam@150
|
123 out += "]";
|
cannam@150
|
124 }
|
cannam@150
|
125
|
cannam@150
|
126 static void dump(const Json::object &values, string &out) {
|
cannam@150
|
127 bool first = true;
|
cannam@150
|
128 out += "{";
|
cannam@150
|
129 for (const auto &kv : values) {
|
cannam@150
|
130 if (!first)
|
cannam@150
|
131 out += ", ";
|
cannam@150
|
132 dump(kv.first, out);
|
cannam@150
|
133 out += ": ";
|
cannam@150
|
134 kv.second.dump(out);
|
cannam@150
|
135 first = false;
|
cannam@150
|
136 }
|
cannam@150
|
137 out += "}";
|
cannam@150
|
138 }
|
cannam@150
|
139
|
cannam@150
|
140 void Json::dump(string &out) const {
|
cannam@150
|
141 m_ptr->dump(out);
|
cannam@150
|
142 }
|
cannam@150
|
143
|
cannam@150
|
144 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
145 * Value wrappers
|
cannam@150
|
146 */
|
cannam@150
|
147
|
cannam@150
|
148 template <Json::Type tag, typename T>
|
cannam@150
|
149 class Value : public JsonValue {
|
cannam@150
|
150 protected:
|
cannam@150
|
151
|
cannam@150
|
152 // Constructors
|
cannam@150
|
153 explicit Value(const T &value) : m_value(value) {}
|
cannam@150
|
154 explicit Value(T &&value) : m_value(move(value)) {}
|
cannam@150
|
155
|
cannam@150
|
156 // Get type tag
|
cannam@150
|
157 Json::Type type() const override {
|
cannam@150
|
158 return tag;
|
cannam@150
|
159 }
|
cannam@150
|
160
|
cannam@150
|
161 // Comparisons
|
cannam@150
|
162 bool equals(const JsonValue * other) const override {
|
cannam@150
|
163 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
164 }
|
cannam@150
|
165 bool less(const JsonValue * other) const override {
|
cannam@150
|
166 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
167 }
|
cannam@150
|
168
|
cannam@150
|
169 const T m_value;
|
cannam@150
|
170 void dump(string &out) const override { json11::dump(m_value, out); }
|
cannam@150
|
171 };
|
cannam@150
|
172
|
cannam@150
|
173 class JsonDouble final : public Value<Json::NUMBER, double> {
|
cannam@150
|
174 double number_value() const override { return m_value; }
|
cannam@150
|
175 int int_value() const override { return static_cast<int>(m_value); }
|
cannam@150
|
176 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
177 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
178 public:
|
cannam@150
|
179 explicit JsonDouble(double value) : Value(value) {}
|
cannam@150
|
180 };
|
cannam@150
|
181
|
cannam@150
|
182 class JsonInt final : public Value<Json::NUMBER, int> {
|
cannam@150
|
183 double number_value() const override { return m_value; }
|
cannam@150
|
184 int int_value() const override { return m_value; }
|
cannam@150
|
185 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
186 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
187 public:
|
cannam@150
|
188 explicit JsonInt(int value) : Value(value) {}
|
cannam@150
|
189 };
|
cannam@150
|
190
|
cannam@150
|
191 class JsonBoolean final : public Value<Json::BOOL, bool> {
|
cannam@150
|
192 bool bool_value() const override { return m_value; }
|
cannam@150
|
193 public:
|
cannam@150
|
194 explicit JsonBoolean(bool value) : Value(value) {}
|
cannam@150
|
195 };
|
cannam@150
|
196
|
cannam@150
|
197 class JsonString final : public Value<Json::STRING, string> {
|
cannam@150
|
198 const string &string_value() const override { return m_value; }
|
cannam@150
|
199 public:
|
cannam@150
|
200 explicit JsonString(const string &value) : Value(value) {}
|
cannam@150
|
201 explicit JsonString(string &&value) : Value(move(value)) {}
|
cannam@150
|
202 };
|
cannam@150
|
203
|
cannam@150
|
204 class JsonArray final : public Value<Json::ARRAY, Json::array> {
|
cannam@150
|
205 const Json::array &array_items() const override { return m_value; }
|
cannam@150
|
206 const Json & operator[](size_t i) const override;
|
cannam@150
|
207 public:
|
cannam@150
|
208 explicit JsonArray(const Json::array &value) : Value(value) {}
|
cannam@150
|
209 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
|
cannam@150
|
210 };
|
cannam@150
|
211
|
cannam@150
|
212 class JsonObject final : public Value<Json::OBJECT, Json::object> {
|
cannam@150
|
213 const Json::object &object_items() const override { return m_value; }
|
cannam@150
|
214 const Json & operator[](const string &key) const override;
|
cannam@150
|
215 public:
|
cannam@150
|
216 explicit JsonObject(const Json::object &value) : Value(value) {}
|
cannam@150
|
217 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
|
cannam@150
|
218 };
|
cannam@150
|
219
|
cannam@242
|
220 class JsonNull final : public Value<Json::NUL, NullStruct> {
|
cannam@150
|
221 public:
|
cannam@242
|
222 JsonNull() : Value({}) {}
|
cannam@150
|
223 };
|
cannam@150
|
224
|
cannam@150
|
225 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
226 * Static globals - static-init-safe
|
cannam@150
|
227 */
|
cannam@150
|
228 struct Statics {
|
cannam@150
|
229 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
|
cannam@150
|
230 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
|
cannam@150
|
231 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
|
cannam@150
|
232 const string empty_string;
|
cannam@150
|
233 const vector<Json> empty_vector;
|
cannam@150
|
234 const map<string, Json> empty_map;
|
cannam@150
|
235 Statics() {}
|
cannam@150
|
236 };
|
cannam@150
|
237
|
cannam@150
|
238 static const Statics & statics() {
|
cannam@150
|
239 static const Statics s {};
|
cannam@150
|
240 return s;
|
cannam@150
|
241 }
|
cannam@150
|
242
|
cannam@150
|
243 static const Json & static_null() {
|
cannam@150
|
244 // This has to be separate, not in Statics, because Json() accesses statics().null.
|
cannam@150
|
245 static const Json json_null;
|
cannam@150
|
246 return json_null;
|
cannam@150
|
247 }
|
cannam@150
|
248
|
cannam@150
|
249 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
250 * Constructors
|
cannam@150
|
251 */
|
cannam@150
|
252
|
cannam@150
|
253 Json::Json() noexcept : m_ptr(statics().null) {}
|
cannam@150
|
254 Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
|
cannam@150
|
255 Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
|
cannam@150
|
256 Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
|
cannam@150
|
257 Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
|
cannam@150
|
258 Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
259 Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
|
cannam@150
|
260 Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
261 Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
|
cannam@150
|
262 Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
|
cannam@150
|
263 Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
|
cannam@150
|
264 Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
|
cannam@150
|
265
|
cannam@150
|
266 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
267 * Accessors
|
cannam@150
|
268 */
|
cannam@150
|
269
|
cannam@150
|
270 Json::Type Json::type() const { return m_ptr->type(); }
|
cannam@150
|
271 double Json::number_value() const { return m_ptr->number_value(); }
|
cannam@150
|
272 int Json::int_value() const { return m_ptr->int_value(); }
|
cannam@150
|
273 bool Json::bool_value() const { return m_ptr->bool_value(); }
|
cannam@150
|
274 const string & Json::string_value() const { return m_ptr->string_value(); }
|
cannam@150
|
275 const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
|
cannam@150
|
276 const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
|
cannam@150
|
277 const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
|
cannam@150
|
278 const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
|
cannam@150
|
279
|
cannam@150
|
280 double JsonValue::number_value() const { return 0; }
|
cannam@150
|
281 int JsonValue::int_value() const { return 0; }
|
cannam@150
|
282 bool JsonValue::bool_value() const { return false; }
|
cannam@150
|
283 const string & JsonValue::string_value() const { return statics().empty_string; }
|
cannam@150
|
284 const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
|
cannam@150
|
285 const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
|
cannam@150
|
286 const Json & JsonValue::operator[] (size_t) const { return static_null(); }
|
cannam@150
|
287 const Json & JsonValue::operator[] (const string &) const { return static_null(); }
|
cannam@150
|
288
|
cannam@150
|
289 const Json & JsonObject::operator[] (const string &key) const {
|
cannam@150
|
290 auto iter = m_value.find(key);
|
cannam@150
|
291 return (iter == m_value.end()) ? static_null() : iter->second;
|
cannam@150
|
292 }
|
cannam@150
|
293 const Json & JsonArray::operator[] (size_t i) const {
|
cannam@150
|
294 if (i >= m_value.size()) return static_null();
|
cannam@150
|
295 else return m_value[i];
|
cannam@150
|
296 }
|
cannam@150
|
297
|
cannam@150
|
298 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
299 * Comparison
|
cannam@150
|
300 */
|
cannam@150
|
301
|
cannam@150
|
302 bool Json::operator== (const Json &other) const {
|
cannam@242
|
303 if (m_ptr == other.m_ptr)
|
cannam@242
|
304 return true;
|
cannam@150
|
305 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
306 return false;
|
cannam@150
|
307
|
cannam@150
|
308 return m_ptr->equals(other.m_ptr.get());
|
cannam@150
|
309 }
|
cannam@150
|
310
|
cannam@150
|
311 bool Json::operator< (const Json &other) const {
|
cannam@242
|
312 if (m_ptr == other.m_ptr)
|
cannam@242
|
313 return false;
|
cannam@150
|
314 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
315 return m_ptr->type() < other.m_ptr->type();
|
cannam@150
|
316
|
cannam@150
|
317 return m_ptr->less(other.m_ptr.get());
|
cannam@150
|
318 }
|
cannam@150
|
319
|
cannam@150
|
320 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
321 * Parsing
|
cannam@150
|
322 */
|
cannam@150
|
323
|
cannam@150
|
324 /* esc(c)
|
cannam@150
|
325 *
|
cannam@150
|
326 * Format char c suitable for printing in an error message.
|
cannam@150
|
327 */
|
cannam@150
|
328 static inline string esc(char c) {
|
cannam@150
|
329 char buf[12];
|
cannam@150
|
330 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
|
cannam@150
|
331 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
|
cannam@150
|
332 } else {
|
cannam@150
|
333 snprintf(buf, sizeof buf, "(%d)", c);
|
cannam@150
|
334 }
|
cannam@150
|
335 return string(buf);
|
cannam@150
|
336 }
|
cannam@150
|
337
|
cannam@150
|
338 static inline bool in_range(long x, long lower, long upper) {
|
cannam@150
|
339 return (x >= lower && x <= upper);
|
cannam@150
|
340 }
|
cannam@150
|
341
|
cannam@242
|
342 namespace {
|
cannam@150
|
343 /* JsonParser
|
cannam@150
|
344 *
|
cannam@150
|
345 * Object that tracks all state of an in-progress parse.
|
cannam@150
|
346 */
|
cannam@242
|
347 struct JsonParser final {
|
cannam@150
|
348
|
cannam@150
|
349 /* State
|
cannam@150
|
350 */
|
cannam@150
|
351 const string &str;
|
cannam@150
|
352 size_t i;
|
cannam@150
|
353 string &err;
|
cannam@150
|
354 bool failed;
|
cannam@150
|
355 const JsonParse strategy;
|
cannam@150
|
356
|
cannam@150
|
357 /* fail(msg, err_ret = Json())
|
cannam@150
|
358 *
|
cannam@150
|
359 * Mark this parse as failed.
|
cannam@150
|
360 */
|
cannam@150
|
361 Json fail(string &&msg) {
|
cannam@150
|
362 return fail(move(msg), Json());
|
cannam@150
|
363 }
|
cannam@150
|
364
|
cannam@150
|
365 template <typename T>
|
cannam@150
|
366 T fail(string &&msg, const T err_ret) {
|
cannam@150
|
367 if (!failed)
|
cannam@150
|
368 err = std::move(msg);
|
cannam@150
|
369 failed = true;
|
cannam@150
|
370 return err_ret;
|
cannam@150
|
371 }
|
cannam@150
|
372
|
cannam@150
|
373 /* consume_whitespace()
|
cannam@150
|
374 *
|
cannam@150
|
375 * Advance until the current character is non-whitespace.
|
cannam@150
|
376 */
|
cannam@150
|
377 void consume_whitespace() {
|
cannam@150
|
378 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
|
cannam@150
|
379 i++;
|
cannam@150
|
380 }
|
cannam@150
|
381
|
cannam@150
|
382 /* consume_comment()
|
cannam@150
|
383 *
|
cannam@150
|
384 * Advance comments (c-style inline and multiline).
|
cannam@150
|
385 */
|
cannam@150
|
386 bool consume_comment() {
|
cannam@150
|
387 bool comment_found = false;
|
cannam@150
|
388 if (str[i] == '/') {
|
cannam@150
|
389 i++;
|
cannam@150
|
390 if (i == str.size())
|
cannam@242
|
391 return fail("unexpected end of input after start of comment", false);
|
cannam@150
|
392 if (str[i] == '/') { // inline comment
|
cannam@150
|
393 i++;
|
cannam@242
|
394 // advance until next line, or end of input
|
cannam@242
|
395 while (i < str.size() && str[i] != '\n') {
|
cannam@150
|
396 i++;
|
cannam@150
|
397 }
|
cannam@150
|
398 comment_found = true;
|
cannam@150
|
399 }
|
cannam@150
|
400 else if (str[i] == '*') { // multiline comment
|
cannam@150
|
401 i++;
|
cannam@150
|
402 if (i > str.size()-2)
|
cannam@242
|
403 return fail("unexpected end of input inside multi-line comment", false);
|
cannam@150
|
404 // advance until closing tokens
|
cannam@150
|
405 while (!(str[i] == '*' && str[i+1] == '/')) {
|
cannam@150
|
406 i++;
|
cannam@150
|
407 if (i > str.size()-2)
|
cannam@150
|
408 return fail(
|
cannam@242
|
409 "unexpected end of input inside multi-line comment", false);
|
cannam@150
|
410 }
|
cannam@150
|
411 i += 2;
|
cannam@150
|
412 comment_found = true;
|
cannam@150
|
413 }
|
cannam@150
|
414 else
|
cannam@242
|
415 return fail("malformed comment", false);
|
cannam@150
|
416 }
|
cannam@150
|
417 return comment_found;
|
cannam@150
|
418 }
|
cannam@150
|
419
|
cannam@150
|
420 /* consume_garbage()
|
cannam@150
|
421 *
|
cannam@150
|
422 * Advance until the current character is non-whitespace and non-comment.
|
cannam@150
|
423 */
|
cannam@150
|
424 void consume_garbage() {
|
cannam@150
|
425 consume_whitespace();
|
cannam@150
|
426 if(strategy == JsonParse::COMMENTS) {
|
cannam@150
|
427 bool comment_found = false;
|
cannam@150
|
428 do {
|
cannam@150
|
429 comment_found = consume_comment();
|
cannam@242
|
430 if (failed) return;
|
cannam@150
|
431 consume_whitespace();
|
cannam@150
|
432 }
|
cannam@150
|
433 while(comment_found);
|
cannam@150
|
434 }
|
cannam@150
|
435 }
|
cannam@150
|
436
|
cannam@150
|
437 /* get_next_token()
|
cannam@150
|
438 *
|
cannam@150
|
439 * Return the next non-whitespace character. If the end of the input is reached,
|
cannam@150
|
440 * flag an error and return 0.
|
cannam@150
|
441 */
|
cannam@150
|
442 char get_next_token() {
|
cannam@150
|
443 consume_garbage();
|
cannam@242
|
444 if (failed) return (char)0;
|
cannam@150
|
445 if (i == str.size())
|
cannam@242
|
446 return fail("unexpected end of input", (char)0);
|
cannam@150
|
447
|
cannam@150
|
448 return str[i++];
|
cannam@150
|
449 }
|
cannam@150
|
450
|
cannam@150
|
451 /* encode_utf8(pt, out)
|
cannam@150
|
452 *
|
cannam@150
|
453 * Encode pt as UTF-8 and add it to out.
|
cannam@150
|
454 */
|
cannam@150
|
455 void encode_utf8(long pt, string & out) {
|
cannam@150
|
456 if (pt < 0)
|
cannam@150
|
457 return;
|
cannam@150
|
458
|
cannam@150
|
459 if (pt < 0x80) {
|
cannam@150
|
460 out += static_cast<char>(pt);
|
cannam@150
|
461 } else if (pt < 0x800) {
|
cannam@150
|
462 out += static_cast<char>((pt >> 6) | 0xC0);
|
cannam@150
|
463 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
464 } else if (pt < 0x10000) {
|
cannam@150
|
465 out += static_cast<char>((pt >> 12) | 0xE0);
|
cannam@150
|
466 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
467 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
468 } else {
|
cannam@150
|
469 out += static_cast<char>((pt >> 18) | 0xF0);
|
cannam@150
|
470 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
|
cannam@150
|
471 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
472 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
473 }
|
cannam@150
|
474 }
|
cannam@150
|
475
|
cannam@150
|
476 /* parse_string()
|
cannam@150
|
477 *
|
cannam@150
|
478 * Parse a string, starting at the current position.
|
cannam@150
|
479 */
|
cannam@150
|
480 string parse_string() {
|
cannam@150
|
481 string out;
|
cannam@150
|
482 long last_escaped_codepoint = -1;
|
cannam@150
|
483 while (true) {
|
cannam@150
|
484 if (i == str.size())
|
cannam@150
|
485 return fail("unexpected end of input in string", "");
|
cannam@150
|
486
|
cannam@150
|
487 char ch = str[i++];
|
cannam@150
|
488
|
cannam@150
|
489 if (ch == '"') {
|
cannam@150
|
490 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
491 return out;
|
cannam@150
|
492 }
|
cannam@150
|
493
|
cannam@150
|
494 if (in_range(ch, 0, 0x1f))
|
cannam@150
|
495 return fail("unescaped " + esc(ch) + " in string", "");
|
cannam@150
|
496
|
cannam@150
|
497 // The usual case: non-escaped characters
|
cannam@150
|
498 if (ch != '\\') {
|
cannam@150
|
499 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
500 last_escaped_codepoint = -1;
|
cannam@150
|
501 out += ch;
|
cannam@150
|
502 continue;
|
cannam@150
|
503 }
|
cannam@150
|
504
|
cannam@150
|
505 // Handle escapes
|
cannam@150
|
506 if (i == str.size())
|
cannam@150
|
507 return fail("unexpected end of input in string", "");
|
cannam@150
|
508
|
cannam@150
|
509 ch = str[i++];
|
cannam@150
|
510
|
cannam@150
|
511 if (ch == 'u') {
|
cannam@150
|
512 // Extract 4-byte escape sequence
|
cannam@150
|
513 string esc = str.substr(i, 4);
|
cannam@150
|
514 // Explicitly check length of the substring. The following loop
|
cannam@150
|
515 // relies on std::string returning the terminating NUL when
|
cannam@150
|
516 // accessing str[length]. Checking here reduces brittleness.
|
cannam@150
|
517 if (esc.length() < 4) {
|
cannam@150
|
518 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
519 }
|
cannam@242
|
520 for (size_t j = 0; j < 4; j++) {
|
cannam@150
|
521 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
|
cannam@150
|
522 && !in_range(esc[j], '0', '9'))
|
cannam@150
|
523 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
524 }
|
cannam@150
|
525
|
cannam@150
|
526 long codepoint = strtol(esc.data(), nullptr, 16);
|
cannam@150
|
527
|
cannam@150
|
528 // JSON specifies that characters outside the BMP shall be encoded as a pair
|
cannam@150
|
529 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
|
cannam@150
|
530 // whether we're in the middle of such a beast: the previous codepoint was an
|
cannam@150
|
531 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
|
cannam@150
|
532 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
|
cannam@150
|
533 && in_range(codepoint, 0xDC00, 0xDFFF)) {
|
cannam@150
|
534 // Reassemble the two surrogate pairs into one astral-plane character, per
|
cannam@150
|
535 // the UTF-16 algorithm.
|
cannam@150
|
536 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
|
cannam@150
|
537 | (codepoint - 0xDC00)) + 0x10000, out);
|
cannam@150
|
538 last_escaped_codepoint = -1;
|
cannam@150
|
539 } else {
|
cannam@150
|
540 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
541 last_escaped_codepoint = codepoint;
|
cannam@150
|
542 }
|
cannam@150
|
543
|
cannam@150
|
544 i += 4;
|
cannam@150
|
545 continue;
|
cannam@150
|
546 }
|
cannam@150
|
547
|
cannam@150
|
548 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
549 last_escaped_codepoint = -1;
|
cannam@150
|
550
|
cannam@150
|
551 if (ch == 'b') {
|
cannam@150
|
552 out += '\b';
|
cannam@150
|
553 } else if (ch == 'f') {
|
cannam@150
|
554 out += '\f';
|
cannam@150
|
555 } else if (ch == 'n') {
|
cannam@150
|
556 out += '\n';
|
cannam@150
|
557 } else if (ch == 'r') {
|
cannam@150
|
558 out += '\r';
|
cannam@150
|
559 } else if (ch == 't') {
|
cannam@150
|
560 out += '\t';
|
cannam@150
|
561 } else if (ch == '"' || ch == '\\' || ch == '/') {
|
cannam@150
|
562 out += ch;
|
cannam@150
|
563 } else {
|
cannam@150
|
564 return fail("invalid escape character " + esc(ch), "");
|
cannam@150
|
565 }
|
cannam@150
|
566 }
|
cannam@150
|
567 }
|
cannam@150
|
568
|
cannam@150
|
569 /* parse_number()
|
cannam@150
|
570 *
|
cannam@150
|
571 * Parse a double.
|
cannam@150
|
572 */
|
cannam@150
|
573 Json parse_number() {
|
cannam@150
|
574 size_t start_pos = i;
|
cannam@150
|
575
|
cannam@150
|
576 if (str[i] == '-')
|
cannam@150
|
577 i++;
|
cannam@150
|
578
|
cannam@150
|
579 // Integer part
|
cannam@150
|
580 if (str[i] == '0') {
|
cannam@150
|
581 i++;
|
cannam@150
|
582 if (in_range(str[i], '0', '9'))
|
cannam@150
|
583 return fail("leading 0s not permitted in numbers");
|
cannam@150
|
584 } else if (in_range(str[i], '1', '9')) {
|
cannam@150
|
585 i++;
|
cannam@150
|
586 while (in_range(str[i], '0', '9'))
|
cannam@150
|
587 i++;
|
cannam@150
|
588 } else {
|
cannam@150
|
589 return fail("invalid " + esc(str[i]) + " in number");
|
cannam@150
|
590 }
|
cannam@150
|
591
|
cannam@150
|
592 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
|
cannam@150
|
593 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
|
cannam@150
|
594 return std::atoi(str.c_str() + start_pos);
|
cannam@150
|
595 }
|
cannam@150
|
596
|
cannam@150
|
597 // Decimal part
|
cannam@150
|
598 if (str[i] == '.') {
|
cannam@150
|
599 i++;
|
cannam@150
|
600 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
601 return fail("at least one digit required in fractional part");
|
cannam@150
|
602
|
cannam@150
|
603 while (in_range(str[i], '0', '9'))
|
cannam@150
|
604 i++;
|
cannam@150
|
605 }
|
cannam@150
|
606
|
cannam@150
|
607 // Exponent part
|
cannam@150
|
608 if (str[i] == 'e' || str[i] == 'E') {
|
cannam@150
|
609 i++;
|
cannam@150
|
610
|
cannam@150
|
611 if (str[i] == '+' || str[i] == '-')
|
cannam@150
|
612 i++;
|
cannam@150
|
613
|
cannam@150
|
614 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
615 return fail("at least one digit required in exponent");
|
cannam@150
|
616
|
cannam@150
|
617 while (in_range(str[i], '0', '9'))
|
cannam@150
|
618 i++;
|
cannam@150
|
619 }
|
cannam@150
|
620
|
cannam@150
|
621 return std::strtod(str.c_str() + start_pos, nullptr);
|
cannam@150
|
622 }
|
cannam@150
|
623
|
cannam@150
|
624 /* expect(str, res)
|
cannam@150
|
625 *
|
cannam@150
|
626 * Expect that 'str' starts at the character that was just read. If it does, advance
|
cannam@150
|
627 * the input and return res. If not, flag an error.
|
cannam@150
|
628 */
|
cannam@150
|
629 Json expect(const string &expected, Json res) {
|
cannam@150
|
630 assert(i != 0);
|
cannam@150
|
631 i--;
|
cannam@150
|
632 if (str.compare(i, expected.length(), expected) == 0) {
|
cannam@150
|
633 i += expected.length();
|
cannam@150
|
634 return res;
|
cannam@150
|
635 } else {
|
cannam@150
|
636 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
|
cannam@150
|
637 }
|
cannam@150
|
638 }
|
cannam@150
|
639
|
cannam@150
|
640 /* parse_json()
|
cannam@150
|
641 *
|
cannam@150
|
642 * Parse a JSON object.
|
cannam@150
|
643 */
|
cannam@150
|
644 Json parse_json(int depth) {
|
cannam@150
|
645 if (depth > max_depth) {
|
cannam@150
|
646 return fail("exceeded maximum nesting depth");
|
cannam@150
|
647 }
|
cannam@150
|
648
|
cannam@150
|
649 char ch = get_next_token();
|
cannam@150
|
650 if (failed)
|
cannam@150
|
651 return Json();
|
cannam@150
|
652
|
cannam@150
|
653 if (ch == '-' || (ch >= '0' && ch <= '9')) {
|
cannam@150
|
654 i--;
|
cannam@150
|
655 return parse_number();
|
cannam@150
|
656 }
|
cannam@150
|
657
|
cannam@150
|
658 if (ch == 't')
|
cannam@150
|
659 return expect("true", true);
|
cannam@150
|
660
|
cannam@150
|
661 if (ch == 'f')
|
cannam@150
|
662 return expect("false", false);
|
cannam@150
|
663
|
cannam@150
|
664 if (ch == 'n')
|
cannam@150
|
665 return expect("null", Json());
|
cannam@150
|
666
|
cannam@150
|
667 if (ch == '"')
|
cannam@150
|
668 return parse_string();
|
cannam@150
|
669
|
cannam@150
|
670 if (ch == '{') {
|
cannam@150
|
671 map<string, Json> data;
|
cannam@150
|
672 ch = get_next_token();
|
cannam@150
|
673 if (ch == '}')
|
cannam@150
|
674 return data;
|
cannam@150
|
675
|
cannam@150
|
676 while (1) {
|
cannam@150
|
677 if (ch != '"')
|
cannam@150
|
678 return fail("expected '\"' in object, got " + esc(ch));
|
cannam@150
|
679
|
cannam@150
|
680 string key = parse_string();
|
cannam@150
|
681 if (failed)
|
cannam@150
|
682 return Json();
|
cannam@150
|
683
|
cannam@150
|
684 ch = get_next_token();
|
cannam@150
|
685 if (ch != ':')
|
cannam@150
|
686 return fail("expected ':' in object, got " + esc(ch));
|
cannam@150
|
687
|
cannam@150
|
688 data[std::move(key)] = parse_json(depth + 1);
|
cannam@150
|
689 if (failed)
|
cannam@150
|
690 return Json();
|
cannam@150
|
691
|
cannam@150
|
692 ch = get_next_token();
|
cannam@150
|
693 if (ch == '}')
|
cannam@150
|
694 break;
|
cannam@150
|
695 if (ch != ',')
|
cannam@150
|
696 return fail("expected ',' in object, got " + esc(ch));
|
cannam@150
|
697
|
cannam@150
|
698 ch = get_next_token();
|
cannam@150
|
699 }
|
cannam@150
|
700 return data;
|
cannam@150
|
701 }
|
cannam@150
|
702
|
cannam@150
|
703 if (ch == '[') {
|
cannam@150
|
704 vector<Json> data;
|
cannam@150
|
705 ch = get_next_token();
|
cannam@150
|
706 if (ch == ']')
|
cannam@150
|
707 return data;
|
cannam@150
|
708
|
cannam@150
|
709 while (1) {
|
cannam@150
|
710 i--;
|
cannam@150
|
711 data.push_back(parse_json(depth + 1));
|
cannam@150
|
712 if (failed)
|
cannam@150
|
713 return Json();
|
cannam@150
|
714
|
cannam@150
|
715 ch = get_next_token();
|
cannam@150
|
716 if (ch == ']')
|
cannam@150
|
717 break;
|
cannam@150
|
718 if (ch != ',')
|
cannam@150
|
719 return fail("expected ',' in list, got " + esc(ch));
|
cannam@150
|
720
|
cannam@150
|
721 ch = get_next_token();
|
cannam@150
|
722 (void)ch;
|
cannam@150
|
723 }
|
cannam@150
|
724 return data;
|
cannam@150
|
725 }
|
cannam@150
|
726
|
cannam@150
|
727 return fail("expected value, got " + esc(ch));
|
cannam@150
|
728 }
|
cannam@150
|
729 };
|
cannam@242
|
730 }//namespace {
|
cannam@150
|
731
|
cannam@150
|
732 Json Json::parse(const string &in, string &err, JsonParse strategy) {
|
cannam@150
|
733 JsonParser parser { in, 0, err, false, strategy };
|
cannam@150
|
734 Json result = parser.parse_json(0);
|
cannam@150
|
735
|
cannam@150
|
736 // Check for any trailing garbage
|
cannam@150
|
737 parser.consume_garbage();
|
cannam@242
|
738 if (parser.failed)
|
cannam@242
|
739 return Json();
|
cannam@150
|
740 if (parser.i != in.size())
|
cannam@150
|
741 return parser.fail("unexpected trailing " + esc(in[parser.i]));
|
cannam@150
|
742
|
cannam@150
|
743 return result;
|
cannam@150
|
744 }
|
cannam@150
|
745
|
cannam@150
|
746 // Documented in json11.hpp
|
cannam@150
|
747 vector<Json> Json::parse_multi(const string &in,
|
cannam@242
|
748 std::string::size_type &parser_stop_pos,
|
cannam@150
|
749 string &err,
|
cannam@150
|
750 JsonParse strategy) {
|
cannam@150
|
751 JsonParser parser { in, 0, err, false, strategy };
|
cannam@242
|
752 parser_stop_pos = 0;
|
cannam@150
|
753 vector<Json> json_vec;
|
cannam@150
|
754 while (parser.i != in.size() && !parser.failed) {
|
cannam@150
|
755 json_vec.push_back(parser.parse_json(0));
|
cannam@242
|
756 if (parser.failed)
|
cannam@242
|
757 break;
|
cannam@242
|
758
|
cannam@150
|
759 // Check for another object
|
cannam@150
|
760 parser.consume_garbage();
|
cannam@242
|
761 if (parser.failed)
|
cannam@242
|
762 break;
|
cannam@242
|
763 parser_stop_pos = parser.i;
|
cannam@150
|
764 }
|
cannam@150
|
765 return json_vec;
|
cannam@150
|
766 }
|
cannam@150
|
767
|
cannam@150
|
768 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
769 * Shape-checking
|
cannam@150
|
770 */
|
cannam@150
|
771
|
cannam@150
|
772 bool Json::has_shape(const shape & types, string & err) const {
|
cannam@150
|
773 if (!is_object()) {
|
cannam@150
|
774 err = "expected JSON object, got " + dump();
|
cannam@150
|
775 return false;
|
cannam@150
|
776 }
|
cannam@150
|
777
|
cannam@150
|
778 for (auto & item : types) {
|
cannam@150
|
779 if ((*this)[item.first].type() != item.second) {
|
cannam@150
|
780 err = "bad type for " + item.first + " in " + dump();
|
cannam@150
|
781 return false;
|
cannam@150
|
782 }
|
cannam@150
|
783 }
|
cannam@150
|
784
|
cannam@150
|
785 return true;
|
cannam@150
|
786 }
|
cannam@150
|
787
|
cannam@150
|
788 } // namespace json11
|