cannam@150
|
1 /* Copyright (c) 2013 Dropbox, Inc.
|
cannam@150
|
2 *
|
cannam@150
|
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
|
cannam@150
|
4 * of this software and associated documentation files (the "Software"), to deal
|
cannam@150
|
5 * in the Software without restriction, including without limitation the rights
|
cannam@150
|
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
cannam@150
|
7 * copies of the Software, and to permit persons to whom the Software is
|
cannam@150
|
8 * furnished to do so, subject to the following conditions:
|
cannam@150
|
9 *
|
cannam@150
|
10 * The above copyright notice and this permission notice shall be included in
|
cannam@150
|
11 * all copies or substantial portions of the Software.
|
cannam@150
|
12 *
|
cannam@150
|
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
cannam@150
|
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
cannam@150
|
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
cannam@150
|
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
cannam@150
|
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
cannam@150
|
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
cannam@150
|
19 * THE SOFTWARE.
|
cannam@150
|
20 */
|
cannam@150
|
21
|
cannam@150
|
22 #include "json11.hpp"
|
cannam@150
|
23 #include <cassert>
|
cannam@150
|
24 #include <cmath>
|
salimma@295
|
25 #include <cstdint>
|
cannam@150
|
26 #include <cstdlib>
|
cannam@150
|
27 #include <cstdio>
|
cannam@150
|
28 #include <limits>
|
cannam@150
|
29
|
cannam@150
|
30 namespace json11 {
|
cannam@150
|
31
|
cannam@150
|
32 static const int max_depth = 200;
|
cannam@150
|
33
|
cannam@150
|
34 using std::string;
|
cannam@150
|
35 using std::vector;
|
cannam@150
|
36 using std::map;
|
cannam@150
|
37 using std::make_shared;
|
cannam@150
|
38 using std::initializer_list;
|
cannam@150
|
39 using std::move;
|
cannam@150
|
40
|
cannam@242
|
41 /* Helper for representing null - just a do-nothing struct, plus comparison
|
cannam@242
|
42 * operators so the helpers in JsonValue work. We can't use nullptr_t because
|
cannam@242
|
43 * it may not be orderable.
|
cannam@242
|
44 */
|
cannam@242
|
45 struct NullStruct {
|
cannam@242
|
46 bool operator==(NullStruct) const { return true; }
|
cannam@242
|
47 bool operator<(NullStruct) const { return false; }
|
cannam@242
|
48 };
|
cannam@242
|
49
|
cannam@150
|
50 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
51 * Serialization
|
cannam@150
|
52 */
|
cannam@150
|
53
|
cannam@242
|
54 static void dump(NullStruct, string &out) {
|
cannam@150
|
55 out += "null";
|
cannam@150
|
56 }
|
cannam@150
|
57
|
cannam@150
|
58 static void dump(double value, string &out) {
|
cannam@150
|
59 if (std::isfinite(value)) {
|
cannam@150
|
60 char buf[32];
|
cannam@150
|
61 snprintf(buf, sizeof buf, "%.17g", value);
|
cannam@150
|
62 out += buf;
|
cannam@150
|
63 } else {
|
cannam@150
|
64 out += "null";
|
cannam@150
|
65 }
|
cannam@150
|
66 }
|
cannam@150
|
67
|
cannam@150
|
68 static void dump(int value, string &out) {
|
cannam@150
|
69 char buf[32];
|
cannam@150
|
70 snprintf(buf, sizeof buf, "%d", value);
|
cannam@150
|
71 out += buf;
|
cannam@150
|
72 }
|
cannam@150
|
73
|
cannam@150
|
74 static void dump(bool value, string &out) {
|
cannam@150
|
75 out += value ? "true" : "false";
|
cannam@150
|
76 }
|
cannam@150
|
77
|
cannam@150
|
78 static void dump(const string &value, string &out) {
|
cannam@150
|
79 out += '"';
|
cannam@150
|
80 for (size_t i = 0; i < value.length(); i++) {
|
cannam@150
|
81 const char ch = value[i];
|
cannam@150
|
82 if (ch == '\\') {
|
cannam@150
|
83 out += "\\\\";
|
cannam@150
|
84 } else if (ch == '"') {
|
cannam@150
|
85 out += "\\\"";
|
cannam@150
|
86 } else if (ch == '\b') {
|
cannam@150
|
87 out += "\\b";
|
cannam@150
|
88 } else if (ch == '\f') {
|
cannam@150
|
89 out += "\\f";
|
cannam@150
|
90 } else if (ch == '\n') {
|
cannam@150
|
91 out += "\\n";
|
cannam@150
|
92 } else if (ch == '\r') {
|
cannam@150
|
93 out += "\\r";
|
cannam@150
|
94 } else if (ch == '\t') {
|
cannam@150
|
95 out += "\\t";
|
cannam@150
|
96 } else if (static_cast<uint8_t>(ch) <= 0x1f) {
|
cannam@150
|
97 char buf[8];
|
cannam@150
|
98 snprintf(buf, sizeof buf, "\\u%04x", ch);
|
cannam@150
|
99 out += buf;
|
cannam@150
|
100 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
101 && static_cast<uint8_t>(value[i+2]) == 0xa8) {
|
cannam@150
|
102 out += "\\u2028";
|
cannam@150
|
103 i += 2;
|
cannam@150
|
104 } else if (static_cast<uint8_t>(ch) == 0xe2 && static_cast<uint8_t>(value[i+1]) == 0x80
|
cannam@150
|
105 && static_cast<uint8_t>(value[i+2]) == 0xa9) {
|
cannam@150
|
106 out += "\\u2029";
|
cannam@150
|
107 i += 2;
|
cannam@150
|
108 } else {
|
cannam@150
|
109 out += ch;
|
cannam@150
|
110 }
|
cannam@150
|
111 }
|
cannam@150
|
112 out += '"';
|
cannam@150
|
113 }
|
cannam@150
|
114
|
cannam@150
|
115 static void dump(const Json::array &values, string &out) {
|
cannam@150
|
116 bool first = true;
|
cannam@150
|
117 out += "[";
|
cannam@150
|
118 for (const auto &value : values) {
|
cannam@150
|
119 if (!first)
|
cannam@150
|
120 out += ", ";
|
cannam@150
|
121 value.dump(out);
|
cannam@150
|
122 first = false;
|
cannam@150
|
123 }
|
cannam@150
|
124 out += "]";
|
cannam@150
|
125 }
|
cannam@150
|
126
|
cannam@150
|
127 static void dump(const Json::object &values, string &out) {
|
cannam@150
|
128 bool first = true;
|
cannam@150
|
129 out += "{";
|
cannam@150
|
130 for (const auto &kv : values) {
|
cannam@150
|
131 if (!first)
|
cannam@150
|
132 out += ", ";
|
cannam@150
|
133 dump(kv.first, out);
|
cannam@150
|
134 out += ": ";
|
cannam@150
|
135 kv.second.dump(out);
|
cannam@150
|
136 first = false;
|
cannam@150
|
137 }
|
cannam@150
|
138 out += "}";
|
cannam@150
|
139 }
|
cannam@150
|
140
|
cannam@150
|
141 void Json::dump(string &out) const {
|
cannam@150
|
142 m_ptr->dump(out);
|
cannam@150
|
143 }
|
cannam@150
|
144
|
cannam@150
|
145 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
146 * Value wrappers
|
cannam@150
|
147 */
|
cannam@150
|
148
|
cannam@150
|
149 template <Json::Type tag, typename T>
|
cannam@150
|
150 class Value : public JsonValue {
|
cannam@150
|
151 protected:
|
cannam@150
|
152
|
cannam@150
|
153 // Constructors
|
cannam@150
|
154 explicit Value(const T &value) : m_value(value) {}
|
cannam@150
|
155 explicit Value(T &&value) : m_value(move(value)) {}
|
cannam@150
|
156
|
cannam@150
|
157 // Get type tag
|
cannam@150
|
158 Json::Type type() const override {
|
cannam@150
|
159 return tag;
|
cannam@150
|
160 }
|
cannam@150
|
161
|
cannam@150
|
162 // Comparisons
|
cannam@150
|
163 bool equals(const JsonValue * other) const override {
|
cannam@150
|
164 return m_value == static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
165 }
|
cannam@150
|
166 bool less(const JsonValue * other) const override {
|
cannam@150
|
167 return m_value < static_cast<const Value<tag, T> *>(other)->m_value;
|
cannam@150
|
168 }
|
cannam@150
|
169
|
cannam@150
|
170 const T m_value;
|
cannam@150
|
171 void dump(string &out) const override { json11::dump(m_value, out); }
|
cannam@150
|
172 };
|
cannam@150
|
173
|
cannam@150
|
174 class JsonDouble final : public Value<Json::NUMBER, double> {
|
cannam@150
|
175 double number_value() const override { return m_value; }
|
cannam@150
|
176 int int_value() const override { return static_cast<int>(m_value); }
|
cannam@150
|
177 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
178 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
179 public:
|
cannam@150
|
180 explicit JsonDouble(double value) : Value(value) {}
|
cannam@150
|
181 };
|
cannam@150
|
182
|
cannam@150
|
183 class JsonInt final : public Value<Json::NUMBER, int> {
|
cannam@150
|
184 double number_value() const override { return m_value; }
|
cannam@150
|
185 int int_value() const override { return m_value; }
|
cannam@150
|
186 bool equals(const JsonValue * other) const override { return m_value == other->number_value(); }
|
cannam@150
|
187 bool less(const JsonValue * other) const override { return m_value < other->number_value(); }
|
cannam@150
|
188 public:
|
cannam@150
|
189 explicit JsonInt(int value) : Value(value) {}
|
cannam@150
|
190 };
|
cannam@150
|
191
|
cannam@150
|
192 class JsonBoolean final : public Value<Json::BOOL, bool> {
|
cannam@150
|
193 bool bool_value() const override { return m_value; }
|
cannam@150
|
194 public:
|
cannam@150
|
195 explicit JsonBoolean(bool value) : Value(value) {}
|
cannam@150
|
196 };
|
cannam@150
|
197
|
cannam@150
|
198 class JsonString final : public Value<Json::STRING, string> {
|
cannam@150
|
199 const string &string_value() const override { return m_value; }
|
cannam@150
|
200 public:
|
cannam@150
|
201 explicit JsonString(const string &value) : Value(value) {}
|
cannam@150
|
202 explicit JsonString(string &&value) : Value(move(value)) {}
|
cannam@150
|
203 };
|
cannam@150
|
204
|
cannam@150
|
205 class JsonArray final : public Value<Json::ARRAY, Json::array> {
|
cannam@150
|
206 const Json::array &array_items() const override { return m_value; }
|
cannam@150
|
207 const Json & operator[](size_t i) const override;
|
cannam@150
|
208 public:
|
cannam@150
|
209 explicit JsonArray(const Json::array &value) : Value(value) {}
|
cannam@150
|
210 explicit JsonArray(Json::array &&value) : Value(move(value)) {}
|
cannam@150
|
211 };
|
cannam@150
|
212
|
cannam@150
|
213 class JsonObject final : public Value<Json::OBJECT, Json::object> {
|
cannam@150
|
214 const Json::object &object_items() const override { return m_value; }
|
cannam@150
|
215 const Json & operator[](const string &key) const override;
|
cannam@150
|
216 public:
|
cannam@150
|
217 explicit JsonObject(const Json::object &value) : Value(value) {}
|
cannam@150
|
218 explicit JsonObject(Json::object &&value) : Value(move(value)) {}
|
cannam@150
|
219 };
|
cannam@150
|
220
|
cannam@242
|
221 class JsonNull final : public Value<Json::NUL, NullStruct> {
|
cannam@150
|
222 public:
|
cannam@242
|
223 JsonNull() : Value({}) {}
|
cannam@150
|
224 };
|
cannam@150
|
225
|
cannam@150
|
226 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
227 * Static globals - static-init-safe
|
cannam@150
|
228 */
|
cannam@150
|
229 struct Statics {
|
cannam@150
|
230 const std::shared_ptr<JsonValue> null = make_shared<JsonNull>();
|
cannam@150
|
231 const std::shared_ptr<JsonValue> t = make_shared<JsonBoolean>(true);
|
cannam@150
|
232 const std::shared_ptr<JsonValue> f = make_shared<JsonBoolean>(false);
|
cannam@150
|
233 const string empty_string;
|
cannam@150
|
234 const vector<Json> empty_vector;
|
cannam@150
|
235 const map<string, Json> empty_map;
|
cannam@150
|
236 Statics() {}
|
cannam@150
|
237 };
|
cannam@150
|
238
|
cannam@150
|
239 static const Statics & statics() {
|
cannam@150
|
240 static const Statics s {};
|
cannam@150
|
241 return s;
|
cannam@150
|
242 }
|
cannam@150
|
243
|
cannam@150
|
244 static const Json & static_null() {
|
cannam@150
|
245 // This has to be separate, not in Statics, because Json() accesses statics().null.
|
cannam@150
|
246 static const Json json_null;
|
cannam@150
|
247 return json_null;
|
cannam@150
|
248 }
|
cannam@150
|
249
|
cannam@150
|
250 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
251 * Constructors
|
cannam@150
|
252 */
|
cannam@150
|
253
|
cannam@150
|
254 Json::Json() noexcept : m_ptr(statics().null) {}
|
cannam@150
|
255 Json::Json(std::nullptr_t) noexcept : m_ptr(statics().null) {}
|
cannam@150
|
256 Json::Json(double value) : m_ptr(make_shared<JsonDouble>(value)) {}
|
cannam@150
|
257 Json::Json(int value) : m_ptr(make_shared<JsonInt>(value)) {}
|
cannam@150
|
258 Json::Json(bool value) : m_ptr(value ? statics().t : statics().f) {}
|
cannam@150
|
259 Json::Json(const string &value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
260 Json::Json(string &&value) : m_ptr(make_shared<JsonString>(move(value))) {}
|
cannam@150
|
261 Json::Json(const char * value) : m_ptr(make_shared<JsonString>(value)) {}
|
cannam@150
|
262 Json::Json(const Json::array &values) : m_ptr(make_shared<JsonArray>(values)) {}
|
cannam@150
|
263 Json::Json(Json::array &&values) : m_ptr(make_shared<JsonArray>(move(values))) {}
|
cannam@150
|
264 Json::Json(const Json::object &values) : m_ptr(make_shared<JsonObject>(values)) {}
|
cannam@150
|
265 Json::Json(Json::object &&values) : m_ptr(make_shared<JsonObject>(move(values))) {}
|
cannam@150
|
266
|
cannam@150
|
267 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
268 * Accessors
|
cannam@150
|
269 */
|
cannam@150
|
270
|
cannam@150
|
271 Json::Type Json::type() const { return m_ptr->type(); }
|
cannam@150
|
272 double Json::number_value() const { return m_ptr->number_value(); }
|
cannam@150
|
273 int Json::int_value() const { return m_ptr->int_value(); }
|
cannam@150
|
274 bool Json::bool_value() const { return m_ptr->bool_value(); }
|
cannam@150
|
275 const string & Json::string_value() const { return m_ptr->string_value(); }
|
cannam@150
|
276 const vector<Json> & Json::array_items() const { return m_ptr->array_items(); }
|
cannam@150
|
277 const map<string, Json> & Json::object_items() const { return m_ptr->object_items(); }
|
cannam@150
|
278 const Json & Json::operator[] (size_t i) const { return (*m_ptr)[i]; }
|
cannam@150
|
279 const Json & Json::operator[] (const string &key) const { return (*m_ptr)[key]; }
|
cannam@150
|
280
|
cannam@150
|
281 double JsonValue::number_value() const { return 0; }
|
cannam@150
|
282 int JsonValue::int_value() const { return 0; }
|
cannam@150
|
283 bool JsonValue::bool_value() const { return false; }
|
cannam@150
|
284 const string & JsonValue::string_value() const { return statics().empty_string; }
|
cannam@150
|
285 const vector<Json> & JsonValue::array_items() const { return statics().empty_vector; }
|
cannam@150
|
286 const map<string, Json> & JsonValue::object_items() const { return statics().empty_map; }
|
cannam@150
|
287 const Json & JsonValue::operator[] (size_t) const { return static_null(); }
|
cannam@150
|
288 const Json & JsonValue::operator[] (const string &) const { return static_null(); }
|
cannam@150
|
289
|
cannam@150
|
290 const Json & JsonObject::operator[] (const string &key) const {
|
cannam@150
|
291 auto iter = m_value.find(key);
|
cannam@150
|
292 return (iter == m_value.end()) ? static_null() : iter->second;
|
cannam@150
|
293 }
|
cannam@150
|
294 const Json & JsonArray::operator[] (size_t i) const {
|
cannam@150
|
295 if (i >= m_value.size()) return static_null();
|
cannam@150
|
296 else return m_value[i];
|
cannam@150
|
297 }
|
cannam@150
|
298
|
cannam@150
|
299 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
300 * Comparison
|
cannam@150
|
301 */
|
cannam@150
|
302
|
cannam@150
|
303 bool Json::operator== (const Json &other) const {
|
cannam@242
|
304 if (m_ptr == other.m_ptr)
|
cannam@242
|
305 return true;
|
cannam@150
|
306 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
307 return false;
|
cannam@150
|
308
|
cannam@150
|
309 return m_ptr->equals(other.m_ptr.get());
|
cannam@150
|
310 }
|
cannam@150
|
311
|
cannam@150
|
312 bool Json::operator< (const Json &other) const {
|
cannam@242
|
313 if (m_ptr == other.m_ptr)
|
cannam@242
|
314 return false;
|
cannam@150
|
315 if (m_ptr->type() != other.m_ptr->type())
|
cannam@150
|
316 return m_ptr->type() < other.m_ptr->type();
|
cannam@150
|
317
|
cannam@150
|
318 return m_ptr->less(other.m_ptr.get());
|
cannam@150
|
319 }
|
cannam@150
|
320
|
cannam@150
|
321 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
322 * Parsing
|
cannam@150
|
323 */
|
cannam@150
|
324
|
cannam@150
|
325 /* esc(c)
|
cannam@150
|
326 *
|
cannam@150
|
327 * Format char c suitable for printing in an error message.
|
cannam@150
|
328 */
|
cannam@150
|
329 static inline string esc(char c) {
|
cannam@150
|
330 char buf[12];
|
cannam@150
|
331 if (static_cast<uint8_t>(c) >= 0x20 && static_cast<uint8_t>(c) <= 0x7f) {
|
cannam@150
|
332 snprintf(buf, sizeof buf, "'%c' (%d)", c, c);
|
cannam@150
|
333 } else {
|
cannam@150
|
334 snprintf(buf, sizeof buf, "(%d)", c);
|
cannam@150
|
335 }
|
cannam@150
|
336 return string(buf);
|
cannam@150
|
337 }
|
cannam@150
|
338
|
cannam@150
|
339 static inline bool in_range(long x, long lower, long upper) {
|
cannam@150
|
340 return (x >= lower && x <= upper);
|
cannam@150
|
341 }
|
cannam@150
|
342
|
cannam@242
|
343 namespace {
|
cannam@150
|
344 /* JsonParser
|
cannam@150
|
345 *
|
cannam@150
|
346 * Object that tracks all state of an in-progress parse.
|
cannam@150
|
347 */
|
cannam@242
|
348 struct JsonParser final {
|
cannam@150
|
349
|
cannam@150
|
350 /* State
|
cannam@150
|
351 */
|
cannam@150
|
352 const string &str;
|
cannam@150
|
353 size_t i;
|
cannam@150
|
354 string &err;
|
cannam@150
|
355 bool failed;
|
cannam@150
|
356 const JsonParse strategy;
|
cannam@150
|
357
|
cannam@150
|
358 /* fail(msg, err_ret = Json())
|
cannam@150
|
359 *
|
cannam@150
|
360 * Mark this parse as failed.
|
cannam@150
|
361 */
|
cannam@150
|
362 Json fail(string &&msg) {
|
cannam@150
|
363 return fail(move(msg), Json());
|
cannam@150
|
364 }
|
cannam@150
|
365
|
cannam@150
|
366 template <typename T>
|
cannam@150
|
367 T fail(string &&msg, const T err_ret) {
|
cannam@150
|
368 if (!failed)
|
cannam@150
|
369 err = std::move(msg);
|
cannam@150
|
370 failed = true;
|
cannam@150
|
371 return err_ret;
|
cannam@150
|
372 }
|
cannam@150
|
373
|
cannam@150
|
374 /* consume_whitespace()
|
cannam@150
|
375 *
|
cannam@150
|
376 * Advance until the current character is non-whitespace.
|
cannam@150
|
377 */
|
cannam@150
|
378 void consume_whitespace() {
|
cannam@150
|
379 while (str[i] == ' ' || str[i] == '\r' || str[i] == '\n' || str[i] == '\t')
|
cannam@150
|
380 i++;
|
cannam@150
|
381 }
|
cannam@150
|
382
|
cannam@150
|
383 /* consume_comment()
|
cannam@150
|
384 *
|
cannam@150
|
385 * Advance comments (c-style inline and multiline).
|
cannam@150
|
386 */
|
cannam@150
|
387 bool consume_comment() {
|
cannam@150
|
388 bool comment_found = false;
|
cannam@150
|
389 if (str[i] == '/') {
|
cannam@150
|
390 i++;
|
cannam@150
|
391 if (i == str.size())
|
cannam@242
|
392 return fail("unexpected end of input after start of comment", false);
|
cannam@150
|
393 if (str[i] == '/') { // inline comment
|
cannam@150
|
394 i++;
|
cannam@242
|
395 // advance until next line, or end of input
|
cannam@242
|
396 while (i < str.size() && str[i] != '\n') {
|
cannam@150
|
397 i++;
|
cannam@150
|
398 }
|
cannam@150
|
399 comment_found = true;
|
cannam@150
|
400 }
|
cannam@150
|
401 else if (str[i] == '*') { // multiline comment
|
cannam@150
|
402 i++;
|
cannam@150
|
403 if (i > str.size()-2)
|
cannam@242
|
404 return fail("unexpected end of input inside multi-line comment", false);
|
cannam@150
|
405 // advance until closing tokens
|
cannam@150
|
406 while (!(str[i] == '*' && str[i+1] == '/')) {
|
cannam@150
|
407 i++;
|
cannam@150
|
408 if (i > str.size()-2)
|
cannam@150
|
409 return fail(
|
cannam@242
|
410 "unexpected end of input inside multi-line comment", false);
|
cannam@150
|
411 }
|
cannam@150
|
412 i += 2;
|
cannam@150
|
413 comment_found = true;
|
cannam@150
|
414 }
|
cannam@150
|
415 else
|
cannam@242
|
416 return fail("malformed comment", false);
|
cannam@150
|
417 }
|
cannam@150
|
418 return comment_found;
|
cannam@150
|
419 }
|
cannam@150
|
420
|
cannam@150
|
421 /* consume_garbage()
|
cannam@150
|
422 *
|
cannam@150
|
423 * Advance until the current character is non-whitespace and non-comment.
|
cannam@150
|
424 */
|
cannam@150
|
425 void consume_garbage() {
|
cannam@150
|
426 consume_whitespace();
|
cannam@150
|
427 if(strategy == JsonParse::COMMENTS) {
|
cannam@150
|
428 bool comment_found = false;
|
cannam@150
|
429 do {
|
cannam@150
|
430 comment_found = consume_comment();
|
cannam@242
|
431 if (failed) return;
|
cannam@150
|
432 consume_whitespace();
|
cannam@150
|
433 }
|
cannam@150
|
434 while(comment_found);
|
cannam@150
|
435 }
|
cannam@150
|
436 }
|
cannam@150
|
437
|
cannam@150
|
438 /* get_next_token()
|
cannam@150
|
439 *
|
cannam@150
|
440 * Return the next non-whitespace character. If the end of the input is reached,
|
cannam@150
|
441 * flag an error and return 0.
|
cannam@150
|
442 */
|
cannam@150
|
443 char get_next_token() {
|
cannam@150
|
444 consume_garbage();
|
cannam@242
|
445 if (failed) return (char)0;
|
cannam@150
|
446 if (i == str.size())
|
cannam@242
|
447 return fail("unexpected end of input", (char)0);
|
cannam@150
|
448
|
cannam@150
|
449 return str[i++];
|
cannam@150
|
450 }
|
cannam@150
|
451
|
cannam@150
|
452 /* encode_utf8(pt, out)
|
cannam@150
|
453 *
|
cannam@150
|
454 * Encode pt as UTF-8 and add it to out.
|
cannam@150
|
455 */
|
cannam@150
|
456 void encode_utf8(long pt, string & out) {
|
cannam@150
|
457 if (pt < 0)
|
cannam@150
|
458 return;
|
cannam@150
|
459
|
cannam@150
|
460 if (pt < 0x80) {
|
cannam@150
|
461 out += static_cast<char>(pt);
|
cannam@150
|
462 } else if (pt < 0x800) {
|
cannam@150
|
463 out += static_cast<char>((pt >> 6) | 0xC0);
|
cannam@150
|
464 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
465 } else if (pt < 0x10000) {
|
cannam@150
|
466 out += static_cast<char>((pt >> 12) | 0xE0);
|
cannam@150
|
467 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
468 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
469 } else {
|
cannam@150
|
470 out += static_cast<char>((pt >> 18) | 0xF0);
|
cannam@150
|
471 out += static_cast<char>(((pt >> 12) & 0x3F) | 0x80);
|
cannam@150
|
472 out += static_cast<char>(((pt >> 6) & 0x3F) | 0x80);
|
cannam@150
|
473 out += static_cast<char>((pt & 0x3F) | 0x80);
|
cannam@150
|
474 }
|
cannam@150
|
475 }
|
cannam@150
|
476
|
cannam@150
|
477 /* parse_string()
|
cannam@150
|
478 *
|
cannam@150
|
479 * Parse a string, starting at the current position.
|
cannam@150
|
480 */
|
cannam@150
|
481 string parse_string() {
|
cannam@150
|
482 string out;
|
cannam@150
|
483 long last_escaped_codepoint = -1;
|
cannam@150
|
484 while (true) {
|
cannam@150
|
485 if (i == str.size())
|
cannam@150
|
486 return fail("unexpected end of input in string", "");
|
cannam@150
|
487
|
cannam@150
|
488 char ch = str[i++];
|
cannam@150
|
489
|
cannam@150
|
490 if (ch == '"') {
|
cannam@150
|
491 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
492 return out;
|
cannam@150
|
493 }
|
cannam@150
|
494
|
cannam@150
|
495 if (in_range(ch, 0, 0x1f))
|
cannam@150
|
496 return fail("unescaped " + esc(ch) + " in string", "");
|
cannam@150
|
497
|
cannam@150
|
498 // The usual case: non-escaped characters
|
cannam@150
|
499 if (ch != '\\') {
|
cannam@150
|
500 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
501 last_escaped_codepoint = -1;
|
cannam@150
|
502 out += ch;
|
cannam@150
|
503 continue;
|
cannam@150
|
504 }
|
cannam@150
|
505
|
cannam@150
|
506 // Handle escapes
|
cannam@150
|
507 if (i == str.size())
|
cannam@150
|
508 return fail("unexpected end of input in string", "");
|
cannam@150
|
509
|
cannam@150
|
510 ch = str[i++];
|
cannam@150
|
511
|
cannam@150
|
512 if (ch == 'u') {
|
cannam@150
|
513 // Extract 4-byte escape sequence
|
cannam@150
|
514 string esc = str.substr(i, 4);
|
cannam@150
|
515 // Explicitly check length of the substring. The following loop
|
cannam@150
|
516 // relies on std::string returning the terminating NUL when
|
cannam@150
|
517 // accessing str[length]. Checking here reduces brittleness.
|
cannam@150
|
518 if (esc.length() < 4) {
|
cannam@150
|
519 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
520 }
|
cannam@242
|
521 for (size_t j = 0; j < 4; j++) {
|
cannam@150
|
522 if (!in_range(esc[j], 'a', 'f') && !in_range(esc[j], 'A', 'F')
|
cannam@150
|
523 && !in_range(esc[j], '0', '9'))
|
cannam@150
|
524 return fail("bad \\u escape: " + esc, "");
|
cannam@150
|
525 }
|
cannam@150
|
526
|
cannam@150
|
527 long codepoint = strtol(esc.data(), nullptr, 16);
|
cannam@150
|
528
|
cannam@150
|
529 // JSON specifies that characters outside the BMP shall be encoded as a pair
|
cannam@150
|
530 // of 4-hex-digit \u escapes encoding their surrogate pair components. Check
|
cannam@150
|
531 // whether we're in the middle of such a beast: the previous codepoint was an
|
cannam@150
|
532 // escaped lead (high) surrogate, and this is a trail (low) surrogate.
|
cannam@150
|
533 if (in_range(last_escaped_codepoint, 0xD800, 0xDBFF)
|
cannam@150
|
534 && in_range(codepoint, 0xDC00, 0xDFFF)) {
|
cannam@150
|
535 // Reassemble the two surrogate pairs into one astral-plane character, per
|
cannam@150
|
536 // the UTF-16 algorithm.
|
cannam@150
|
537 encode_utf8((((last_escaped_codepoint - 0xD800) << 10)
|
cannam@150
|
538 | (codepoint - 0xDC00)) + 0x10000, out);
|
cannam@150
|
539 last_escaped_codepoint = -1;
|
cannam@150
|
540 } else {
|
cannam@150
|
541 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
542 last_escaped_codepoint = codepoint;
|
cannam@150
|
543 }
|
cannam@150
|
544
|
cannam@150
|
545 i += 4;
|
cannam@150
|
546 continue;
|
cannam@150
|
547 }
|
cannam@150
|
548
|
cannam@150
|
549 encode_utf8(last_escaped_codepoint, out);
|
cannam@150
|
550 last_escaped_codepoint = -1;
|
cannam@150
|
551
|
cannam@150
|
552 if (ch == 'b') {
|
cannam@150
|
553 out += '\b';
|
cannam@150
|
554 } else if (ch == 'f') {
|
cannam@150
|
555 out += '\f';
|
cannam@150
|
556 } else if (ch == 'n') {
|
cannam@150
|
557 out += '\n';
|
cannam@150
|
558 } else if (ch == 'r') {
|
cannam@150
|
559 out += '\r';
|
cannam@150
|
560 } else if (ch == 't') {
|
cannam@150
|
561 out += '\t';
|
cannam@150
|
562 } else if (ch == '"' || ch == '\\' || ch == '/') {
|
cannam@150
|
563 out += ch;
|
cannam@150
|
564 } else {
|
cannam@150
|
565 return fail("invalid escape character " + esc(ch), "");
|
cannam@150
|
566 }
|
cannam@150
|
567 }
|
cannam@150
|
568 }
|
cannam@150
|
569
|
cannam@150
|
570 /* parse_number()
|
cannam@150
|
571 *
|
cannam@150
|
572 * Parse a double.
|
cannam@150
|
573 */
|
cannam@150
|
574 Json parse_number() {
|
cannam@150
|
575 size_t start_pos = i;
|
cannam@150
|
576
|
cannam@150
|
577 if (str[i] == '-')
|
cannam@150
|
578 i++;
|
cannam@150
|
579
|
cannam@150
|
580 // Integer part
|
cannam@150
|
581 if (str[i] == '0') {
|
cannam@150
|
582 i++;
|
cannam@150
|
583 if (in_range(str[i], '0', '9'))
|
cannam@150
|
584 return fail("leading 0s not permitted in numbers");
|
cannam@150
|
585 } else if (in_range(str[i], '1', '9')) {
|
cannam@150
|
586 i++;
|
cannam@150
|
587 while (in_range(str[i], '0', '9'))
|
cannam@150
|
588 i++;
|
cannam@150
|
589 } else {
|
cannam@150
|
590 return fail("invalid " + esc(str[i]) + " in number");
|
cannam@150
|
591 }
|
cannam@150
|
592
|
cannam@150
|
593 if (str[i] != '.' && str[i] != 'e' && str[i] != 'E'
|
cannam@150
|
594 && (i - start_pos) <= static_cast<size_t>(std::numeric_limits<int>::digits10)) {
|
cannam@150
|
595 return std::atoi(str.c_str() + start_pos);
|
cannam@150
|
596 }
|
cannam@150
|
597
|
cannam@150
|
598 // Decimal part
|
cannam@150
|
599 if (str[i] == '.') {
|
cannam@150
|
600 i++;
|
cannam@150
|
601 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
602 return fail("at least one digit required in fractional part");
|
cannam@150
|
603
|
cannam@150
|
604 while (in_range(str[i], '0', '9'))
|
cannam@150
|
605 i++;
|
cannam@150
|
606 }
|
cannam@150
|
607
|
cannam@150
|
608 // Exponent part
|
cannam@150
|
609 if (str[i] == 'e' || str[i] == 'E') {
|
cannam@150
|
610 i++;
|
cannam@150
|
611
|
cannam@150
|
612 if (str[i] == '+' || str[i] == '-')
|
cannam@150
|
613 i++;
|
cannam@150
|
614
|
cannam@150
|
615 if (!in_range(str[i], '0', '9'))
|
cannam@150
|
616 return fail("at least one digit required in exponent");
|
cannam@150
|
617
|
cannam@150
|
618 while (in_range(str[i], '0', '9'))
|
cannam@150
|
619 i++;
|
cannam@150
|
620 }
|
cannam@150
|
621
|
cannam@150
|
622 return std::strtod(str.c_str() + start_pos, nullptr);
|
cannam@150
|
623 }
|
cannam@150
|
624
|
cannam@150
|
625 /* expect(str, res)
|
cannam@150
|
626 *
|
cannam@150
|
627 * Expect that 'str' starts at the character that was just read. If it does, advance
|
cannam@150
|
628 * the input and return res. If not, flag an error.
|
cannam@150
|
629 */
|
cannam@150
|
630 Json expect(const string &expected, Json res) {
|
cannam@150
|
631 assert(i != 0);
|
cannam@150
|
632 i--;
|
cannam@150
|
633 if (str.compare(i, expected.length(), expected) == 0) {
|
cannam@150
|
634 i += expected.length();
|
cannam@150
|
635 return res;
|
cannam@150
|
636 } else {
|
cannam@150
|
637 return fail("parse error: expected " + expected + ", got " + str.substr(i, expected.length()));
|
cannam@150
|
638 }
|
cannam@150
|
639 }
|
cannam@150
|
640
|
cannam@150
|
641 /* parse_json()
|
cannam@150
|
642 *
|
cannam@150
|
643 * Parse a JSON object.
|
cannam@150
|
644 */
|
cannam@150
|
645 Json parse_json(int depth) {
|
cannam@150
|
646 if (depth > max_depth) {
|
cannam@150
|
647 return fail("exceeded maximum nesting depth");
|
cannam@150
|
648 }
|
cannam@150
|
649
|
cannam@150
|
650 char ch = get_next_token();
|
cannam@150
|
651 if (failed)
|
cannam@150
|
652 return Json();
|
cannam@150
|
653
|
cannam@150
|
654 if (ch == '-' || (ch >= '0' && ch <= '9')) {
|
cannam@150
|
655 i--;
|
cannam@150
|
656 return parse_number();
|
cannam@150
|
657 }
|
cannam@150
|
658
|
cannam@150
|
659 if (ch == 't')
|
cannam@150
|
660 return expect("true", true);
|
cannam@150
|
661
|
cannam@150
|
662 if (ch == 'f')
|
cannam@150
|
663 return expect("false", false);
|
cannam@150
|
664
|
cannam@150
|
665 if (ch == 'n')
|
cannam@150
|
666 return expect("null", Json());
|
cannam@150
|
667
|
cannam@150
|
668 if (ch == '"')
|
cannam@150
|
669 return parse_string();
|
cannam@150
|
670
|
cannam@150
|
671 if (ch == '{') {
|
cannam@150
|
672 map<string, Json> data;
|
cannam@150
|
673 ch = get_next_token();
|
cannam@150
|
674 if (ch == '}')
|
cannam@150
|
675 return data;
|
cannam@150
|
676
|
cannam@150
|
677 while (1) {
|
cannam@150
|
678 if (ch != '"')
|
cannam@150
|
679 return fail("expected '\"' in object, got " + esc(ch));
|
cannam@150
|
680
|
cannam@150
|
681 string key = parse_string();
|
cannam@150
|
682 if (failed)
|
cannam@150
|
683 return Json();
|
cannam@150
|
684
|
cannam@150
|
685 ch = get_next_token();
|
cannam@150
|
686 if (ch != ':')
|
cannam@150
|
687 return fail("expected ':' in object, got " + esc(ch));
|
cannam@150
|
688
|
cannam@150
|
689 data[std::move(key)] = parse_json(depth + 1);
|
cannam@150
|
690 if (failed)
|
cannam@150
|
691 return Json();
|
cannam@150
|
692
|
cannam@150
|
693 ch = get_next_token();
|
cannam@150
|
694 if (ch == '}')
|
cannam@150
|
695 break;
|
cannam@150
|
696 if (ch != ',')
|
cannam@150
|
697 return fail("expected ',' in object, got " + esc(ch));
|
cannam@150
|
698
|
cannam@150
|
699 ch = get_next_token();
|
cannam@150
|
700 }
|
cannam@150
|
701 return data;
|
cannam@150
|
702 }
|
cannam@150
|
703
|
cannam@150
|
704 if (ch == '[') {
|
cannam@150
|
705 vector<Json> data;
|
cannam@150
|
706 ch = get_next_token();
|
cannam@150
|
707 if (ch == ']')
|
cannam@150
|
708 return data;
|
cannam@150
|
709
|
cannam@150
|
710 while (1) {
|
cannam@150
|
711 i--;
|
cannam@150
|
712 data.push_back(parse_json(depth + 1));
|
cannam@150
|
713 if (failed)
|
cannam@150
|
714 return Json();
|
cannam@150
|
715
|
cannam@150
|
716 ch = get_next_token();
|
cannam@150
|
717 if (ch == ']')
|
cannam@150
|
718 break;
|
cannam@150
|
719 if (ch != ',')
|
cannam@150
|
720 return fail("expected ',' in list, got " + esc(ch));
|
cannam@150
|
721
|
cannam@150
|
722 ch = get_next_token();
|
cannam@150
|
723 (void)ch;
|
cannam@150
|
724 }
|
cannam@150
|
725 return data;
|
cannam@150
|
726 }
|
cannam@150
|
727
|
cannam@150
|
728 return fail("expected value, got " + esc(ch));
|
cannam@150
|
729 }
|
cannam@150
|
730 };
|
cannam@242
|
731 }//namespace {
|
cannam@150
|
732
|
cannam@150
|
733 Json Json::parse(const string &in, string &err, JsonParse strategy) {
|
cannam@150
|
734 JsonParser parser { in, 0, err, false, strategy };
|
cannam@150
|
735 Json result = parser.parse_json(0);
|
cannam@150
|
736
|
cannam@150
|
737 // Check for any trailing garbage
|
cannam@150
|
738 parser.consume_garbage();
|
cannam@242
|
739 if (parser.failed)
|
cannam@242
|
740 return Json();
|
cannam@150
|
741 if (parser.i != in.size())
|
cannam@150
|
742 return parser.fail("unexpected trailing " + esc(in[parser.i]));
|
cannam@150
|
743
|
cannam@150
|
744 return result;
|
cannam@150
|
745 }
|
cannam@150
|
746
|
cannam@150
|
747 // Documented in json11.hpp
|
cannam@150
|
748 vector<Json> Json::parse_multi(const string &in,
|
cannam@242
|
749 std::string::size_type &parser_stop_pos,
|
cannam@150
|
750 string &err,
|
cannam@150
|
751 JsonParse strategy) {
|
cannam@150
|
752 JsonParser parser { in, 0, err, false, strategy };
|
cannam@242
|
753 parser_stop_pos = 0;
|
cannam@150
|
754 vector<Json> json_vec;
|
cannam@150
|
755 while (parser.i != in.size() && !parser.failed) {
|
cannam@150
|
756 json_vec.push_back(parser.parse_json(0));
|
cannam@242
|
757 if (parser.failed)
|
cannam@242
|
758 break;
|
cannam@242
|
759
|
cannam@150
|
760 // Check for another object
|
cannam@150
|
761 parser.consume_garbage();
|
cannam@242
|
762 if (parser.failed)
|
cannam@242
|
763 break;
|
cannam@242
|
764 parser_stop_pos = parser.i;
|
cannam@150
|
765 }
|
cannam@150
|
766 return json_vec;
|
cannam@150
|
767 }
|
cannam@150
|
768
|
cannam@150
|
769 /* * * * * * * * * * * * * * * * * * * *
|
cannam@150
|
770 * Shape-checking
|
cannam@150
|
771 */
|
cannam@150
|
772
|
cannam@150
|
773 bool Json::has_shape(const shape & types, string & err) const {
|
cannam@150
|
774 if (!is_object()) {
|
cannam@150
|
775 err = "expected JSON object, got " + dump();
|
cannam@150
|
776 return false;
|
cannam@150
|
777 }
|
cannam@150
|
778
|
cannam@150
|
779 for (auto & item : types) {
|
cannam@150
|
780 if ((*this)[item.first].type() != item.second) {
|
cannam@150
|
781 err = "bad type for " + item.first + " in " + dump();
|
cannam@150
|
782 return false;
|
cannam@150
|
783 }
|
cannam@150
|
784 }
|
cannam@150
|
785
|
cannam@150
|
786 return true;
|
cannam@150
|
787 }
|
cannam@150
|
788
|
cannam@150
|
789 } // namespace json11
|