comparison third_party/json/json_reader.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:add35537fdbb
1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5
6 #if !defined(JSON_IS_AMALGAMATION)
7 # include <json/assertions.h>
8 # include <json/reader.h>
9 # include <json/value.h>
10 # include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
12 #include <utility>
13 #include <cstdio>
14 #include <cassert>
15 #include <cstring>
16 #include <iostream>
17 #include <stdexcept>
18
19 #if _MSC_VER >= 1400 // VC++ 8.0
20 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
21 #endif
22
23 namespace Json {
24
25 // Implementation of class Features
26 // ////////////////////////////////
27
28 Features::Features()
29 : allowComments_( true )
30 , strictRoot_( false )
31 {
32 }
33
34
35 Features
36 Features::all()
37 {
38 return Features();
39 }
40
41
42 Features
43 Features::strictMode()
44 {
45 Features features;
46 features.allowComments_ = false;
47 features.strictRoot_ = true;
48 return features;
49 }
50
51 // Implementation of class Reader
52 // ////////////////////////////////
53
54
55 static inline bool
56 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
57 {
58 return c == c1 || c == c2 || c == c3 || c == c4;
59 }
60
61 static inline bool
62 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
63 {
64 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
65 }
66
67
68 static bool
69 containsNewLine( Reader::Location begin,
70 Reader::Location end )
71 {
72 for ( ;begin < end; ++begin )
73 if ( *begin == '\n' || *begin == '\r' )
74 return true;
75 return false;
76 }
77
78
79 // Class Reader
80 // //////////////////////////////////////////////////////////////////
81
82 Reader::Reader()
83 : errors_(),
84 document_(),
85 begin_(),
86 end_(),
87 current_(),
88 lastValueEnd_(),
89 lastValue_(),
90 commentsBefore_(),
91 features_( Features::all() ),
92 collectComments_()
93 {
94 }
95
96
97 Reader::Reader( const Features &features )
98 : errors_(),
99 document_(),
100 begin_(),
101 end_(),
102 current_(),
103 lastValueEnd_(),
104 lastValue_(),
105 commentsBefore_(),
106 features_( features ),
107 collectComments_()
108 {
109 }
110
111
112 bool
113 Reader::parse( const std::string &document,
114 Value &root,
115 bool collectComments )
116 {
117 document_ = document;
118 const char *begin = document_.c_str();
119 const char *end = begin + document_.length();
120 return parse( begin, end, root, collectComments );
121 }
122
123
124 bool
125 Reader::parse( std::istream& sin,
126 Value &root,
127 bool collectComments )
128 {
129 //std::istream_iterator<char> begin(sin);
130 //std::istream_iterator<char> end;
131 // Those would allow streamed input from a file, if parse() were a
132 // template function.
133
134 // Since std::string is reference-counted, this at least does not
135 // create an extra copy.
136 std::string doc;
137 std::getline(sin, doc, (char)EOF);
138 return parse( doc, root, collectComments );
139 }
140
141 bool
142 Reader::parse( const char *beginDoc, const char *endDoc,
143 Value &root,
144 bool collectComments )
145 {
146 if ( !features_.allowComments_ )
147 {
148 collectComments = false;
149 }
150
151 begin_ = beginDoc;
152 end_ = endDoc;
153 collectComments_ = collectComments;
154 current_ = begin_;
155 lastValueEnd_ = 0;
156 lastValue_ = 0;
157 commentsBefore_ = "";
158 errors_.clear();
159 while ( !nodes_.empty() )
160 nodes_.pop();
161 nodes_.push( &root );
162
163 bool successful = readValue();
164 Token token;
165 skipCommentTokens( token );
166 if ( collectComments_ && !commentsBefore_.empty() )
167 root.setComment( commentsBefore_, commentAfter );
168 if ( features_.strictRoot_ )
169 {
170 if ( !root.isArray() && !root.isObject() )
171 {
172 // Set error location to start of doc, ideally should be first token found in doc
173 token.type_ = tokenError;
174 token.start_ = beginDoc;
175 token.end_ = endDoc;
176 addError( "A valid JSON document must be either an array or an object value.",
177 token );
178 return false;
179 }
180 }
181 return successful;
182 }
183
184
185 bool
186 Reader::readValue()
187 {
188 Token token;
189 skipCommentTokens( token );
190 bool successful = true;
191
192 if ( collectComments_ && !commentsBefore_.empty() )
193 {
194 currentValue().setComment( commentsBefore_, commentBefore );
195 commentsBefore_ = "";
196 }
197
198
199 switch ( token.type_ )
200 {
201 case tokenObjectBegin:
202 successful = readObject( token );
203 break;
204 case tokenArrayBegin:
205 successful = readArray( token );
206 break;
207 case tokenNumber:
208 successful = decodeNumber( token );
209 break;
210 case tokenString:
211 successful = decodeString( token );
212 break;
213 case tokenTrue:
214 currentValue() = true;
215 break;
216 case tokenFalse:
217 currentValue() = false;
218 break;
219 case tokenNull:
220 currentValue() = Value();
221 break;
222 default:
223 return addError( "Syntax error: value, object or array expected.", token );
224 }
225
226 if ( collectComments_ )
227 {
228 lastValueEnd_ = current_;
229 lastValue_ = &currentValue();
230 }
231
232 return successful;
233 }
234
235
236 void
237 Reader::skipCommentTokens( Token &token )
238 {
239 if ( features_.allowComments_ )
240 {
241 do
242 {
243 readToken( token );
244 }
245 while ( token.type_ == tokenComment );
246 }
247 else
248 {
249 readToken( token );
250 }
251 }
252
253
254 bool
255 Reader::expectToken( TokenType type, Token &token, const char *message )
256 {
257 readToken( token );
258 if ( token.type_ != type )
259 return addError( message, token );
260 return true;
261 }
262
263
264 bool
265 Reader::readToken( Token &token )
266 {
267 skipSpaces();
268 token.start_ = current_;
269 Char c = getNextChar();
270 bool ok = true;
271 switch ( c )
272 {
273 case '{':
274 token.type_ = tokenObjectBegin;
275 break;
276 case '}':
277 token.type_ = tokenObjectEnd;
278 break;
279 case '[':
280 token.type_ = tokenArrayBegin;
281 break;
282 case ']':
283 token.type_ = tokenArrayEnd;
284 break;
285 case '"':
286 token.type_ = tokenString;
287 ok = readString();
288 break;
289 case '/':
290 token.type_ = tokenComment;
291 ok = readComment();
292 break;
293 case '0':
294 case '1':
295 case '2':
296 case '3':
297 case '4':
298 case '5':
299 case '6':
300 case '7':
301 case '8':
302 case '9':
303 case '-':
304 token.type_ = tokenNumber;
305 readNumber();
306 break;
307 case 't':
308 token.type_ = tokenTrue;
309 ok = match( "rue", 3 );
310 break;
311 case 'f':
312 token.type_ = tokenFalse;
313 ok = match( "alse", 4 );
314 break;
315 case 'n':
316 token.type_ = tokenNull;
317 ok = match( "ull", 3 );
318 break;
319 case ',':
320 token.type_ = tokenArraySeparator;
321 break;
322 case ':':
323 token.type_ = tokenMemberSeparator;
324 break;
325 case 0:
326 token.type_ = tokenEndOfStream;
327 break;
328 default:
329 ok = false;
330 break;
331 }
332 if ( !ok )
333 token.type_ = tokenError;
334 token.end_ = current_;
335 return true;
336 }
337
338
339 void
340 Reader::skipSpaces()
341 {
342 while ( current_ != end_ )
343 {
344 Char c = *current_;
345 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
346 ++current_;
347 else
348 break;
349 }
350 }
351
352
353 bool
354 Reader::match( Location pattern,
355 int patternLength )
356 {
357 if ( end_ - current_ < patternLength )
358 return false;
359 int index = patternLength;
360 while ( index-- )
361 if ( current_[index] != pattern[index] )
362 return false;
363 current_ += patternLength;
364 return true;
365 }
366
367
368 bool
369 Reader::readComment()
370 {
371 Location commentBegin = current_ - 1;
372 Char c = getNextChar();
373 bool successful = false;
374 if ( c == '*' )
375 successful = readCStyleComment();
376 else if ( c == '/' )
377 successful = readCppStyleComment();
378 if ( !successful )
379 return false;
380
381 if ( collectComments_ )
382 {
383 CommentPlacement placement = commentBefore;
384 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
385 {
386 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
387 placement = commentAfterOnSameLine;
388 }
389
390 addComment( commentBegin, current_, placement );
391 }
392 return true;
393 }
394
395
396 void
397 Reader::addComment( Location begin,
398 Location end,
399 CommentPlacement placement )
400 {
401 assert( collectComments_ );
402 if ( placement == commentAfterOnSameLine )
403 {
404 assert( lastValue_ != 0 );
405 lastValue_->setComment( std::string( begin, end ), placement );
406 }
407 else
408 {
409 if ( !commentsBefore_.empty() )
410 commentsBefore_ += "\n";
411 commentsBefore_ += std::string( begin, end );
412 }
413 }
414
415
416 bool
417 Reader::readCStyleComment()
418 {
419 while ( current_ != end_ )
420 {
421 Char c = getNextChar();
422 if ( c == '*' && *current_ == '/' )
423 break;
424 }
425 return getNextChar() == '/';
426 }
427
428
429 bool
430 Reader::readCppStyleComment()
431 {
432 while ( current_ != end_ )
433 {
434 Char c = getNextChar();
435 if ( c == '\r' || c == '\n' )
436 break;
437 }
438 return true;
439 }
440
441
442 void
443 Reader::readNumber()
444 {
445 while ( current_ != end_ )
446 {
447 if ( !(*current_ >= '0' && *current_ <= '9') &&
448 !in( *current_, '.', 'e', 'E', '+', '-' ) )
449 break;
450 ++current_;
451 }
452 }
453
454 bool
455 Reader::readString()
456 {
457 Char c = 0;
458 while ( current_ != end_ )
459 {
460 c = getNextChar();
461 if ( c == '\\' )
462 getNextChar();
463 else if ( c == '"' )
464 break;
465 }
466 return c == '"';
467 }
468
469
470 bool
471 Reader::readObject( Token &/*tokenStart*/ )
472 {
473 Token tokenName;
474 std::string name;
475 currentValue() = Value( objectValue );
476 while ( readToken( tokenName ) )
477 {
478 bool initialTokenOk = true;
479 while ( tokenName.type_ == tokenComment && initialTokenOk )
480 initialTokenOk = readToken( tokenName );
481 if ( !initialTokenOk )
482 break;
483 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
484 return true;
485 if ( tokenName.type_ != tokenString )
486 break;
487
488 name = "";
489 if ( !decodeString( tokenName, name ) )
490 return recoverFromError( tokenObjectEnd );
491
492 Token colon;
493 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
494 {
495 return addErrorAndRecover( "Missing ':' after object member name",
496 colon,
497 tokenObjectEnd );
498 }
499 Value &value = currentValue()[ name ];
500 nodes_.push( &value );
501 bool ok = readValue();
502 nodes_.pop();
503 if ( !ok ) // error already set
504 return recoverFromError( tokenObjectEnd );
505
506 Token comma;
507 if ( !readToken( comma )
508 || ( comma.type_ != tokenObjectEnd &&
509 comma.type_ != tokenArraySeparator &&
510 comma.type_ != tokenComment ) )
511 {
512 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
513 comma,
514 tokenObjectEnd );
515 }
516 bool finalizeTokenOk = true;
517 while ( comma.type_ == tokenComment &&
518 finalizeTokenOk )
519 finalizeTokenOk = readToken( comma );
520 if ( comma.type_ == tokenObjectEnd )
521 return true;
522 }
523 return addErrorAndRecover( "Missing '}' or object member name",
524 tokenName,
525 tokenObjectEnd );
526 }
527
528
529 bool
530 Reader::readArray( Token &/*tokenStart*/ )
531 {
532 currentValue() = Value( arrayValue );
533 skipSpaces();
534 if ( *current_ == ']' ) // empty array
535 {
536 Token endArray;
537 readToken( endArray );
538 return true;
539 }
540 int index = 0;
541 for (;;)
542 {
543 Value &value = currentValue()[ index++ ];
544 nodes_.push( &value );
545 bool ok = readValue();
546 nodes_.pop();
547 if ( !ok ) // error already set
548 return recoverFromError( tokenArrayEnd );
549
550 Token token;
551 // Accept Comment after last item in the array.
552 ok = readToken( token );
553 while ( token.type_ == tokenComment && ok )
554 {
555 ok = readToken( token );
556 }
557 bool badTokenType = ( token.type_ != tokenArraySeparator &&
558 token.type_ != tokenArrayEnd );
559 if ( !ok || badTokenType )
560 {
561 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
562 token,
563 tokenArrayEnd );
564 }
565 if ( token.type_ == tokenArrayEnd )
566 break;
567 }
568 return true;
569 }
570
571
572 bool
573 Reader::decodeNumber( Token &token )
574 {
575 bool isDouble = false;
576 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
577 {
578 isDouble = isDouble
579 || in( *inspect, '.', 'e', 'E', '+' )
580 || ( *inspect == '-' && inspect != token.start_ );
581 }
582 if ( isDouble )
583 return decodeDouble( token );
584 // Attempts to parse the number as an integer. If the number is
585 // larger than the maximum supported value of an integer then
586 // we decode the number as a double.
587 Location current = token.start_;
588 bool isNegative = *current == '-';
589 if ( isNegative )
590 ++current;
591 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
592 : Value::maxLargestUInt;
593 Value::LargestUInt threshold = maxIntegerValue / 10;
594 Value::LargestUInt value = 0;
595 while ( current < token.end_ )
596 {
597 Char c = *current++;
598 if ( c < '0' || c > '9' )
599 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
600 Value::UInt digit(c - '0');
601 if ( value >= threshold )
602 {
603 // We've hit or exceeded the max value divided by 10 (rounded down). If
604 // a) we've only just touched the limit, b) this is the last digit, and
605 // c) it's small enough to fit in that rounding delta, we're okay.
606 // Otherwise treat this number as a double to avoid overflow.
607 if (value > threshold ||
608 current != token.end_ ||
609 digit > maxIntegerValue % 10)
610 {
611 return decodeDouble( token );
612 }
613 }
614 value = value * 10 + digit;
615 }
616 if ( isNegative )
617 currentValue() = -Value::LargestInt( value );
618 else if ( value <= Value::LargestUInt(Value::maxInt) )
619 currentValue() = Value::LargestInt( value );
620 else
621 currentValue() = value;
622 return true;
623 }
624
625
626 bool
627 Reader::decodeDouble( Token &token )
628 {
629 double value = 0;
630 const int bufferSize = 32;
631 int count;
632 int length = int(token.end_ - token.start_);
633
634 // Sanity check to avoid buffer overflow exploits.
635 if (length < 0) {
636 return addError( "Unable to parse token length", token );
637 }
638
639 // Avoid using a string constant for the format control string given to
640 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
641 // info:
642 //
643 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
644 char format[] = "%lf";
645
646 if ( length <= bufferSize )
647 {
648 Char buffer[bufferSize+1];
649 memcpy( buffer, token.start_, length );
650 buffer[length] = 0;
651 count = sscanf( buffer, format, &value );
652 }
653 else
654 {
655 std::string buffer( token.start_, token.end_ );
656 count = sscanf( buffer.c_str(), format, &value );
657 }
658
659 if ( count != 1 )
660 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
661 currentValue() = value;
662 return true;
663 }
664
665
666 bool
667 Reader::decodeString( Token &token )
668 {
669 std::string decoded;
670 if ( !decodeString( token, decoded ) )
671 return false;
672 currentValue() = decoded;
673 return true;
674 }
675
676
677 bool
678 Reader::decodeString( Token &token, std::string &decoded )
679 {
680 decoded.reserve( token.end_ - token.start_ - 2 );
681 Location current = token.start_ + 1; // skip '"'
682 Location end = token.end_ - 1; // do not include '"'
683 while ( current != end )
684 {
685 Char c = *current++;
686 if ( c == '"' )
687 break;
688 else if ( c == '\\' )
689 {
690 if ( current == end )
691 return addError( "Empty escape sequence in string", token, current );
692 Char escape = *current++;
693 switch ( escape )
694 {
695 case '"': decoded += '"'; break;
696 case '/': decoded += '/'; break;
697 case '\\': decoded += '\\'; break;
698 case 'b': decoded += '\b'; break;
699 case 'f': decoded += '\f'; break;
700 case 'n': decoded += '\n'; break;
701 case 'r': decoded += '\r'; break;
702 case 't': decoded += '\t'; break;
703 case 'u':
704 {
705 unsigned int unicode;
706 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
707 return false;
708 decoded += codePointToUTF8(unicode);
709 }
710 break;
711 default:
712 return addError( "Bad escape sequence in string", token, current );
713 }
714 }
715 else
716 {
717 decoded += c;
718 }
719 }
720 return true;
721 }
722
723 bool
724 Reader::decodeUnicodeCodePoint( Token &token,
725 Location &current,
726 Location end,
727 unsigned int &unicode )
728 {
729
730 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
731 return false;
732 if (unicode >= 0xD800 && unicode <= 0xDBFF)
733 {
734 // surrogate pairs
735 if (end - current < 6)
736 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
737 unsigned int surrogatePair;
738 if (*(current++) == '\\' && *(current++)== 'u')
739 {
740 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
741 {
742 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
743 }
744 else
745 return false;
746 }
747 else
748 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
749 }
750 return true;
751 }
752
753 bool
754 Reader::decodeUnicodeEscapeSequence( Token &token,
755 Location &current,
756 Location end,
757 unsigned int &unicode )
758 {
759 if ( end - current < 4 )
760 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
761 unicode = 0;
762 for ( int index =0; index < 4; ++index )
763 {
764 Char c = *current++;
765 unicode *= 16;
766 if ( c >= '0' && c <= '9' )
767 unicode += c - '0';
768 else if ( c >= 'a' && c <= 'f' )
769 unicode += c - 'a' + 10;
770 else if ( c >= 'A' && c <= 'F' )
771 unicode += c - 'A' + 10;
772 else
773 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
774 }
775 return true;
776 }
777
778
779 bool
780 Reader::addError( const std::string &message,
781 Token &token,
782 Location extra )
783 {
784 ErrorInfo info;
785 info.token_ = token;
786 info.message_ = message;
787 info.extra_ = extra;
788 errors_.push_back( info );
789 return false;
790 }
791
792
793 bool
794 Reader::recoverFromError( TokenType skipUntilToken )
795 {
796 int errorCount = int(errors_.size());
797 Token skip;
798 for (;;)
799 {
800 if ( !readToken(skip) )
801 errors_.resize( errorCount ); // discard errors caused by recovery
802 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
803 break;
804 }
805 errors_.resize( errorCount );
806 return false;
807 }
808
809
810 bool
811 Reader::addErrorAndRecover( const std::string &message,
812 Token &token,
813 TokenType skipUntilToken )
814 {
815 addError( message, token );
816 return recoverFromError( skipUntilToken );
817 }
818
819
820 Value &
821 Reader::currentValue()
822 {
823 return *(nodes_.top());
824 }
825
826
827 Reader::Char
828 Reader::getNextChar()
829 {
830 if ( current_ == end_ )
831 return 0;
832 return *current_++;
833 }
834
835
836 void
837 Reader::getLocationLineAndColumn( Location location,
838 int &line,
839 int &column ) const
840 {
841 Location current = begin_;
842 Location lastLineStart = current;
843 line = 0;
844 while ( current < location && current != end_ )
845 {
846 Char c = *current++;
847 if ( c == '\r' )
848 {
849 if ( *current == '\n' )
850 ++current;
851 lastLineStart = current;
852 ++line;
853 }
854 else if ( c == '\n' )
855 {
856 lastLineStart = current;
857 ++line;
858 }
859 }
860 // column & line start at 1
861 column = int(location - lastLineStart) + 1;
862 ++line;
863 }
864
865
866 std::string
867 Reader::getLocationLineAndColumn( Location location ) const
868 {
869 int line, column;
870 getLocationLineAndColumn( location, line, column );
871 char buffer[18+16+16+1];
872 sprintf( buffer, "Line %d, Column %d", line, column );
873 return buffer;
874 }
875
876
877 // Deprecated. Preserved for backward compatibility
878 std::string
879 Reader::getFormatedErrorMessages() const
880 {
881 return getFormattedErrorMessages();
882 }
883
884
885 std::string
886 Reader::getFormattedErrorMessages() const
887 {
888 std::string formattedMessage;
889 for ( Errors::const_iterator itError = errors_.begin();
890 itError != errors_.end();
891 ++itError )
892 {
893 const ErrorInfo &error = *itError;
894 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
895 formattedMessage += " " + error.message_ + "\n";
896 if ( error.extra_ )
897 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
898 }
899 return formattedMessage;
900 }
901
902
903 std::istream& operator>>( std::istream &sin, Value &root )
904 {
905 Json::Reader reader;
906 bool ok = reader.parse(sin, root, true);
907 if (!ok) JSON_FAIL_MESSAGE(reader.getFormattedErrorMessages());
908 return sin;
909 }
910
911
912 } // namespace Json