annotate third_party/json/json_reader.cpp @ 0:add35537fdbb tip

Initial import
author irh <ian.r.hobson@gmail.com>
date Thu, 25 Aug 2011 11:05:55 +0100
parents
children
rev   line source
ian@0 1 // Copyright 2007-2011 Baptiste Lepilleur
ian@0 2 // Distributed under MIT license, or public domain if desired and
ian@0 3 // recognized in your jurisdiction.
ian@0 4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
ian@0 5
ian@0 6 #if !defined(JSON_IS_AMALGAMATION)
ian@0 7 # include <json/assertions.h>
ian@0 8 # include <json/reader.h>
ian@0 9 # include <json/value.h>
ian@0 10 # include "json_tool.h"
ian@0 11 #endif // if !defined(JSON_IS_AMALGAMATION)
ian@0 12 #include <utility>
ian@0 13 #include <cstdio>
ian@0 14 #include <cassert>
ian@0 15 #include <cstring>
ian@0 16 #include <iostream>
ian@0 17 #include <stdexcept>
ian@0 18
ian@0 19 #if _MSC_VER >= 1400 // VC++ 8.0
ian@0 20 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
ian@0 21 #endif
ian@0 22
ian@0 23 namespace Json {
ian@0 24
ian@0 25 // Implementation of class Features
ian@0 26 // ////////////////////////////////
ian@0 27
ian@0 28 Features::Features()
ian@0 29 : allowComments_( true )
ian@0 30 , strictRoot_( false )
ian@0 31 {
ian@0 32 }
ian@0 33
ian@0 34
ian@0 35 Features
ian@0 36 Features::all()
ian@0 37 {
ian@0 38 return Features();
ian@0 39 }
ian@0 40
ian@0 41
ian@0 42 Features
ian@0 43 Features::strictMode()
ian@0 44 {
ian@0 45 Features features;
ian@0 46 features.allowComments_ = false;
ian@0 47 features.strictRoot_ = true;
ian@0 48 return features;
ian@0 49 }
ian@0 50
ian@0 51 // Implementation of class Reader
ian@0 52 // ////////////////////////////////
ian@0 53
ian@0 54
ian@0 55 static inline bool
ian@0 56 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
ian@0 57 {
ian@0 58 return c == c1 || c == c2 || c == c3 || c == c4;
ian@0 59 }
ian@0 60
ian@0 61 static inline bool
ian@0 62 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
ian@0 63 {
ian@0 64 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
ian@0 65 }
ian@0 66
ian@0 67
ian@0 68 static bool
ian@0 69 containsNewLine( Reader::Location begin,
ian@0 70 Reader::Location end )
ian@0 71 {
ian@0 72 for ( ;begin < end; ++begin )
ian@0 73 if ( *begin == '\n' || *begin == '\r' )
ian@0 74 return true;
ian@0 75 return false;
ian@0 76 }
ian@0 77
ian@0 78
ian@0 79 // Class Reader
ian@0 80 // //////////////////////////////////////////////////////////////////
ian@0 81
ian@0 82 Reader::Reader()
ian@0 83 : errors_(),
ian@0 84 document_(),
ian@0 85 begin_(),
ian@0 86 end_(),
ian@0 87 current_(),
ian@0 88 lastValueEnd_(),
ian@0 89 lastValue_(),
ian@0 90 commentsBefore_(),
ian@0 91 features_( Features::all() ),
ian@0 92 collectComments_()
ian@0 93 {
ian@0 94 }
ian@0 95
ian@0 96
ian@0 97 Reader::Reader( const Features &features )
ian@0 98 : errors_(),
ian@0 99 document_(),
ian@0 100 begin_(),
ian@0 101 end_(),
ian@0 102 current_(),
ian@0 103 lastValueEnd_(),
ian@0 104 lastValue_(),
ian@0 105 commentsBefore_(),
ian@0 106 features_( features ),
ian@0 107 collectComments_()
ian@0 108 {
ian@0 109 }
ian@0 110
ian@0 111
ian@0 112 bool
ian@0 113 Reader::parse( const std::string &document,
ian@0 114 Value &root,
ian@0 115 bool collectComments )
ian@0 116 {
ian@0 117 document_ = document;
ian@0 118 const char *begin = document_.c_str();
ian@0 119 const char *end = begin + document_.length();
ian@0 120 return parse( begin, end, root, collectComments );
ian@0 121 }
ian@0 122
ian@0 123
ian@0 124 bool
ian@0 125 Reader::parse( std::istream& sin,
ian@0 126 Value &root,
ian@0 127 bool collectComments )
ian@0 128 {
ian@0 129 //std::istream_iterator<char> begin(sin);
ian@0 130 //std::istream_iterator<char> end;
ian@0 131 // Those would allow streamed input from a file, if parse() were a
ian@0 132 // template function.
ian@0 133
ian@0 134 // Since std::string is reference-counted, this at least does not
ian@0 135 // create an extra copy.
ian@0 136 std::string doc;
ian@0 137 std::getline(sin, doc, (char)EOF);
ian@0 138 return parse( doc, root, collectComments );
ian@0 139 }
ian@0 140
ian@0 141 bool
ian@0 142 Reader::parse( const char *beginDoc, const char *endDoc,
ian@0 143 Value &root,
ian@0 144 bool collectComments )
ian@0 145 {
ian@0 146 if ( !features_.allowComments_ )
ian@0 147 {
ian@0 148 collectComments = false;
ian@0 149 }
ian@0 150
ian@0 151 begin_ = beginDoc;
ian@0 152 end_ = endDoc;
ian@0 153 collectComments_ = collectComments;
ian@0 154 current_ = begin_;
ian@0 155 lastValueEnd_ = 0;
ian@0 156 lastValue_ = 0;
ian@0 157 commentsBefore_ = "";
ian@0 158 errors_.clear();
ian@0 159 while ( !nodes_.empty() )
ian@0 160 nodes_.pop();
ian@0 161 nodes_.push( &root );
ian@0 162
ian@0 163 bool successful = readValue();
ian@0 164 Token token;
ian@0 165 skipCommentTokens( token );
ian@0 166 if ( collectComments_ && !commentsBefore_.empty() )
ian@0 167 root.setComment( commentsBefore_, commentAfter );
ian@0 168 if ( features_.strictRoot_ )
ian@0 169 {
ian@0 170 if ( !root.isArray() && !root.isObject() )
ian@0 171 {
ian@0 172 // Set error location to start of doc, ideally should be first token found in doc
ian@0 173 token.type_ = tokenError;
ian@0 174 token.start_ = beginDoc;
ian@0 175 token.end_ = endDoc;
ian@0 176 addError( "A valid JSON document must be either an array or an object value.",
ian@0 177 token );
ian@0 178 return false;
ian@0 179 }
ian@0 180 }
ian@0 181 return successful;
ian@0 182 }
ian@0 183
ian@0 184
ian@0 185 bool
ian@0 186 Reader::readValue()
ian@0 187 {
ian@0 188 Token token;
ian@0 189 skipCommentTokens( token );
ian@0 190 bool successful = true;
ian@0 191
ian@0 192 if ( collectComments_ && !commentsBefore_.empty() )
ian@0 193 {
ian@0 194 currentValue().setComment( commentsBefore_, commentBefore );
ian@0 195 commentsBefore_ = "";
ian@0 196 }
ian@0 197
ian@0 198
ian@0 199 switch ( token.type_ )
ian@0 200 {
ian@0 201 case tokenObjectBegin:
ian@0 202 successful = readObject( token );
ian@0 203 break;
ian@0 204 case tokenArrayBegin:
ian@0 205 successful = readArray( token );
ian@0 206 break;
ian@0 207 case tokenNumber:
ian@0 208 successful = decodeNumber( token );
ian@0 209 break;
ian@0 210 case tokenString:
ian@0 211 successful = decodeString( token );
ian@0 212 break;
ian@0 213 case tokenTrue:
ian@0 214 currentValue() = true;
ian@0 215 break;
ian@0 216 case tokenFalse:
ian@0 217 currentValue() = false;
ian@0 218 break;
ian@0 219 case tokenNull:
ian@0 220 currentValue() = Value();
ian@0 221 break;
ian@0 222 default:
ian@0 223 return addError( "Syntax error: value, object or array expected.", token );
ian@0 224 }
ian@0 225
ian@0 226 if ( collectComments_ )
ian@0 227 {
ian@0 228 lastValueEnd_ = current_;
ian@0 229 lastValue_ = &currentValue();
ian@0 230 }
ian@0 231
ian@0 232 return successful;
ian@0 233 }
ian@0 234
ian@0 235
ian@0 236 void
ian@0 237 Reader::skipCommentTokens( Token &token )
ian@0 238 {
ian@0 239 if ( features_.allowComments_ )
ian@0 240 {
ian@0 241 do
ian@0 242 {
ian@0 243 readToken( token );
ian@0 244 }
ian@0 245 while ( token.type_ == tokenComment );
ian@0 246 }
ian@0 247 else
ian@0 248 {
ian@0 249 readToken( token );
ian@0 250 }
ian@0 251 }
ian@0 252
ian@0 253
ian@0 254 bool
ian@0 255 Reader::expectToken( TokenType type, Token &token, const char *message )
ian@0 256 {
ian@0 257 readToken( token );
ian@0 258 if ( token.type_ != type )
ian@0 259 return addError( message, token );
ian@0 260 return true;
ian@0 261 }
ian@0 262
ian@0 263
ian@0 264 bool
ian@0 265 Reader::readToken( Token &token )
ian@0 266 {
ian@0 267 skipSpaces();
ian@0 268 token.start_ = current_;
ian@0 269 Char c = getNextChar();
ian@0 270 bool ok = true;
ian@0 271 switch ( c )
ian@0 272 {
ian@0 273 case '{':
ian@0 274 token.type_ = tokenObjectBegin;
ian@0 275 break;
ian@0 276 case '}':
ian@0 277 token.type_ = tokenObjectEnd;
ian@0 278 break;
ian@0 279 case '[':
ian@0 280 token.type_ = tokenArrayBegin;
ian@0 281 break;
ian@0 282 case ']':
ian@0 283 token.type_ = tokenArrayEnd;
ian@0 284 break;
ian@0 285 case '"':
ian@0 286 token.type_ = tokenString;
ian@0 287 ok = readString();
ian@0 288 break;
ian@0 289 case '/':
ian@0 290 token.type_ = tokenComment;
ian@0 291 ok = readComment();
ian@0 292 break;
ian@0 293 case '0':
ian@0 294 case '1':
ian@0 295 case '2':
ian@0 296 case '3':
ian@0 297 case '4':
ian@0 298 case '5':
ian@0 299 case '6':
ian@0 300 case '7':
ian@0 301 case '8':
ian@0 302 case '9':
ian@0 303 case '-':
ian@0 304 token.type_ = tokenNumber;
ian@0 305 readNumber();
ian@0 306 break;
ian@0 307 case 't':
ian@0 308 token.type_ = tokenTrue;
ian@0 309 ok = match( "rue", 3 );
ian@0 310 break;
ian@0 311 case 'f':
ian@0 312 token.type_ = tokenFalse;
ian@0 313 ok = match( "alse", 4 );
ian@0 314 break;
ian@0 315 case 'n':
ian@0 316 token.type_ = tokenNull;
ian@0 317 ok = match( "ull", 3 );
ian@0 318 break;
ian@0 319 case ',':
ian@0 320 token.type_ = tokenArraySeparator;
ian@0 321 break;
ian@0 322 case ':':
ian@0 323 token.type_ = tokenMemberSeparator;
ian@0 324 break;
ian@0 325 case 0:
ian@0 326 token.type_ = tokenEndOfStream;
ian@0 327 break;
ian@0 328 default:
ian@0 329 ok = false;
ian@0 330 break;
ian@0 331 }
ian@0 332 if ( !ok )
ian@0 333 token.type_ = tokenError;
ian@0 334 token.end_ = current_;
ian@0 335 return true;
ian@0 336 }
ian@0 337
ian@0 338
ian@0 339 void
ian@0 340 Reader::skipSpaces()
ian@0 341 {
ian@0 342 while ( current_ != end_ )
ian@0 343 {
ian@0 344 Char c = *current_;
ian@0 345 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
ian@0 346 ++current_;
ian@0 347 else
ian@0 348 break;
ian@0 349 }
ian@0 350 }
ian@0 351
ian@0 352
ian@0 353 bool
ian@0 354 Reader::match( Location pattern,
ian@0 355 int patternLength )
ian@0 356 {
ian@0 357 if ( end_ - current_ < patternLength )
ian@0 358 return false;
ian@0 359 int index = patternLength;
ian@0 360 while ( index-- )
ian@0 361 if ( current_[index] != pattern[index] )
ian@0 362 return false;
ian@0 363 current_ += patternLength;
ian@0 364 return true;
ian@0 365 }
ian@0 366
ian@0 367
ian@0 368 bool
ian@0 369 Reader::readComment()
ian@0 370 {
ian@0 371 Location commentBegin = current_ - 1;
ian@0 372 Char c = getNextChar();
ian@0 373 bool successful = false;
ian@0 374 if ( c == '*' )
ian@0 375 successful = readCStyleComment();
ian@0 376 else if ( c == '/' )
ian@0 377 successful = readCppStyleComment();
ian@0 378 if ( !successful )
ian@0 379 return false;
ian@0 380
ian@0 381 if ( collectComments_ )
ian@0 382 {
ian@0 383 CommentPlacement placement = commentBefore;
ian@0 384 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
ian@0 385 {
ian@0 386 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
ian@0 387 placement = commentAfterOnSameLine;
ian@0 388 }
ian@0 389
ian@0 390 addComment( commentBegin, current_, placement );
ian@0 391 }
ian@0 392 return true;
ian@0 393 }
ian@0 394
ian@0 395
ian@0 396 void
ian@0 397 Reader::addComment( Location begin,
ian@0 398 Location end,
ian@0 399 CommentPlacement placement )
ian@0 400 {
ian@0 401 assert( collectComments_ );
ian@0 402 if ( placement == commentAfterOnSameLine )
ian@0 403 {
ian@0 404 assert( lastValue_ != 0 );
ian@0 405 lastValue_->setComment( std::string( begin, end ), placement );
ian@0 406 }
ian@0 407 else
ian@0 408 {
ian@0 409 if ( !commentsBefore_.empty() )
ian@0 410 commentsBefore_ += "\n";
ian@0 411 commentsBefore_ += std::string( begin, end );
ian@0 412 }
ian@0 413 }
ian@0 414
ian@0 415
ian@0 416 bool
ian@0 417 Reader::readCStyleComment()
ian@0 418 {
ian@0 419 while ( current_ != end_ )
ian@0 420 {
ian@0 421 Char c = getNextChar();
ian@0 422 if ( c == '*' && *current_ == '/' )
ian@0 423 break;
ian@0 424 }
ian@0 425 return getNextChar() == '/';
ian@0 426 }
ian@0 427
ian@0 428
ian@0 429 bool
ian@0 430 Reader::readCppStyleComment()
ian@0 431 {
ian@0 432 while ( current_ != end_ )
ian@0 433 {
ian@0 434 Char c = getNextChar();
ian@0 435 if ( c == '\r' || c == '\n' )
ian@0 436 break;
ian@0 437 }
ian@0 438 return true;
ian@0 439 }
ian@0 440
ian@0 441
ian@0 442 void
ian@0 443 Reader::readNumber()
ian@0 444 {
ian@0 445 while ( current_ != end_ )
ian@0 446 {
ian@0 447 if ( !(*current_ >= '0' && *current_ <= '9') &&
ian@0 448 !in( *current_, '.', 'e', 'E', '+', '-' ) )
ian@0 449 break;
ian@0 450 ++current_;
ian@0 451 }
ian@0 452 }
ian@0 453
ian@0 454 bool
ian@0 455 Reader::readString()
ian@0 456 {
ian@0 457 Char c = 0;
ian@0 458 while ( current_ != end_ )
ian@0 459 {
ian@0 460 c = getNextChar();
ian@0 461 if ( c == '\\' )
ian@0 462 getNextChar();
ian@0 463 else if ( c == '"' )
ian@0 464 break;
ian@0 465 }
ian@0 466 return c == '"';
ian@0 467 }
ian@0 468
ian@0 469
ian@0 470 bool
ian@0 471 Reader::readObject( Token &/*tokenStart*/ )
ian@0 472 {
ian@0 473 Token tokenName;
ian@0 474 std::string name;
ian@0 475 currentValue() = Value( objectValue );
ian@0 476 while ( readToken( tokenName ) )
ian@0 477 {
ian@0 478 bool initialTokenOk = true;
ian@0 479 while ( tokenName.type_ == tokenComment && initialTokenOk )
ian@0 480 initialTokenOk = readToken( tokenName );
ian@0 481 if ( !initialTokenOk )
ian@0 482 break;
ian@0 483 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
ian@0 484 return true;
ian@0 485 if ( tokenName.type_ != tokenString )
ian@0 486 break;
ian@0 487
ian@0 488 name = "";
ian@0 489 if ( !decodeString( tokenName, name ) )
ian@0 490 return recoverFromError( tokenObjectEnd );
ian@0 491
ian@0 492 Token colon;
ian@0 493 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
ian@0 494 {
ian@0 495 return addErrorAndRecover( "Missing ':' after object member name",
ian@0 496 colon,
ian@0 497 tokenObjectEnd );
ian@0 498 }
ian@0 499 Value &value = currentValue()[ name ];
ian@0 500 nodes_.push( &value );
ian@0 501 bool ok = readValue();
ian@0 502 nodes_.pop();
ian@0 503 if ( !ok ) // error already set
ian@0 504 return recoverFromError( tokenObjectEnd );
ian@0 505
ian@0 506 Token comma;
ian@0 507 if ( !readToken( comma )
ian@0 508 || ( comma.type_ != tokenObjectEnd &&
ian@0 509 comma.type_ != tokenArraySeparator &&
ian@0 510 comma.type_ != tokenComment ) )
ian@0 511 {
ian@0 512 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
ian@0 513 comma,
ian@0 514 tokenObjectEnd );
ian@0 515 }
ian@0 516 bool finalizeTokenOk = true;
ian@0 517 while ( comma.type_ == tokenComment &&
ian@0 518 finalizeTokenOk )
ian@0 519 finalizeTokenOk = readToken( comma );
ian@0 520 if ( comma.type_ == tokenObjectEnd )
ian@0 521 return true;
ian@0 522 }
ian@0 523 return addErrorAndRecover( "Missing '}' or object member name",
ian@0 524 tokenName,
ian@0 525 tokenObjectEnd );
ian@0 526 }
ian@0 527
ian@0 528
ian@0 529 bool
ian@0 530 Reader::readArray( Token &/*tokenStart*/ )
ian@0 531 {
ian@0 532 currentValue() = Value( arrayValue );
ian@0 533 skipSpaces();
ian@0 534 if ( *current_ == ']' ) // empty array
ian@0 535 {
ian@0 536 Token endArray;
ian@0 537 readToken( endArray );
ian@0 538 return true;
ian@0 539 }
ian@0 540 int index = 0;
ian@0 541 for (;;)
ian@0 542 {
ian@0 543 Value &value = currentValue()[ index++ ];
ian@0 544 nodes_.push( &value );
ian@0 545 bool ok = readValue();
ian@0 546 nodes_.pop();
ian@0 547 if ( !ok ) // error already set
ian@0 548 return recoverFromError( tokenArrayEnd );
ian@0 549
ian@0 550 Token token;
ian@0 551 // Accept Comment after last item in the array.
ian@0 552 ok = readToken( token );
ian@0 553 while ( token.type_ == tokenComment && ok )
ian@0 554 {
ian@0 555 ok = readToken( token );
ian@0 556 }
ian@0 557 bool badTokenType = ( token.type_ != tokenArraySeparator &&
ian@0 558 token.type_ != tokenArrayEnd );
ian@0 559 if ( !ok || badTokenType )
ian@0 560 {
ian@0 561 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
ian@0 562 token,
ian@0 563 tokenArrayEnd );
ian@0 564 }
ian@0 565 if ( token.type_ == tokenArrayEnd )
ian@0 566 break;
ian@0 567 }
ian@0 568 return true;
ian@0 569 }
ian@0 570
ian@0 571
ian@0 572 bool
ian@0 573 Reader::decodeNumber( Token &token )
ian@0 574 {
ian@0 575 bool isDouble = false;
ian@0 576 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
ian@0 577 {
ian@0 578 isDouble = isDouble
ian@0 579 || in( *inspect, '.', 'e', 'E', '+' )
ian@0 580 || ( *inspect == '-' && inspect != token.start_ );
ian@0 581 }
ian@0 582 if ( isDouble )
ian@0 583 return decodeDouble( token );
ian@0 584 // Attempts to parse the number as an integer. If the number is
ian@0 585 // larger than the maximum supported value of an integer then
ian@0 586 // we decode the number as a double.
ian@0 587 Location current = token.start_;
ian@0 588 bool isNegative = *current == '-';
ian@0 589 if ( isNegative )
ian@0 590 ++current;
ian@0 591 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
ian@0 592 : Value::maxLargestUInt;
ian@0 593 Value::LargestUInt threshold = maxIntegerValue / 10;
ian@0 594 Value::LargestUInt value = 0;
ian@0 595 while ( current < token.end_ )
ian@0 596 {
ian@0 597 Char c = *current++;
ian@0 598 if ( c < '0' || c > '9' )
ian@0 599 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
ian@0 600 Value::UInt digit(c - '0');
ian@0 601 if ( value >= threshold )
ian@0 602 {
ian@0 603 // We've hit or exceeded the max value divided by 10 (rounded down). If
ian@0 604 // a) we've only just touched the limit, b) this is the last digit, and
ian@0 605 // c) it's small enough to fit in that rounding delta, we're okay.
ian@0 606 // Otherwise treat this number as a double to avoid overflow.
ian@0 607 if (value > threshold ||
ian@0 608 current != token.end_ ||
ian@0 609 digit > maxIntegerValue % 10)
ian@0 610 {
ian@0 611 return decodeDouble( token );
ian@0 612 }
ian@0 613 }
ian@0 614 value = value * 10 + digit;
ian@0 615 }
ian@0 616 if ( isNegative )
ian@0 617 currentValue() = -Value::LargestInt( value );
ian@0 618 else if ( value <= Value::LargestUInt(Value::maxInt) )
ian@0 619 currentValue() = Value::LargestInt( value );
ian@0 620 else
ian@0 621 currentValue() = value;
ian@0 622 return true;
ian@0 623 }
ian@0 624
ian@0 625
ian@0 626 bool
ian@0 627 Reader::decodeDouble( Token &token )
ian@0 628 {
ian@0 629 double value = 0;
ian@0 630 const int bufferSize = 32;
ian@0 631 int count;
ian@0 632 int length = int(token.end_ - token.start_);
ian@0 633
ian@0 634 // Sanity check to avoid buffer overflow exploits.
ian@0 635 if (length < 0) {
ian@0 636 return addError( "Unable to parse token length", token );
ian@0 637 }
ian@0 638
ian@0 639 // Avoid using a string constant for the format control string given to
ian@0 640 // sscanf, as this can cause hard to debug crashes on OS X. See here for more
ian@0 641 // info:
ian@0 642 //
ian@0 643 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
ian@0 644 char format[] = "%lf";
ian@0 645
ian@0 646 if ( length <= bufferSize )
ian@0 647 {
ian@0 648 Char buffer[bufferSize+1];
ian@0 649 memcpy( buffer, token.start_, length );
ian@0 650 buffer[length] = 0;
ian@0 651 count = sscanf( buffer, format, &value );
ian@0 652 }
ian@0 653 else
ian@0 654 {
ian@0 655 std::string buffer( token.start_, token.end_ );
ian@0 656 count = sscanf( buffer.c_str(), format, &value );
ian@0 657 }
ian@0 658
ian@0 659 if ( count != 1 )
ian@0 660 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
ian@0 661 currentValue() = value;
ian@0 662 return true;
ian@0 663 }
ian@0 664
ian@0 665
ian@0 666 bool
ian@0 667 Reader::decodeString( Token &token )
ian@0 668 {
ian@0 669 std::string decoded;
ian@0 670 if ( !decodeString( token, decoded ) )
ian@0 671 return false;
ian@0 672 currentValue() = decoded;
ian@0 673 return true;
ian@0 674 }
ian@0 675
ian@0 676
ian@0 677 bool
ian@0 678 Reader::decodeString( Token &token, std::string &decoded )
ian@0 679 {
ian@0 680 decoded.reserve( token.end_ - token.start_ - 2 );
ian@0 681 Location current = token.start_ + 1; // skip '"'
ian@0 682 Location end = token.end_ - 1; // do not include '"'
ian@0 683 while ( current != end )
ian@0 684 {
ian@0 685 Char c = *current++;
ian@0 686 if ( c == '"' )
ian@0 687 break;
ian@0 688 else if ( c == '\\' )
ian@0 689 {
ian@0 690 if ( current == end )
ian@0 691 return addError( "Empty escape sequence in string", token, current );
ian@0 692 Char escape = *current++;
ian@0 693 switch ( escape )
ian@0 694 {
ian@0 695 case '"': decoded += '"'; break;
ian@0 696 case '/': decoded += '/'; break;
ian@0 697 case '\\': decoded += '\\'; break;
ian@0 698 case 'b': decoded += '\b'; break;
ian@0 699 case 'f': decoded += '\f'; break;
ian@0 700 case 'n': decoded += '\n'; break;
ian@0 701 case 'r': decoded += '\r'; break;
ian@0 702 case 't': decoded += '\t'; break;
ian@0 703 case 'u':
ian@0 704 {
ian@0 705 unsigned int unicode;
ian@0 706 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
ian@0 707 return false;
ian@0 708 decoded += codePointToUTF8(unicode);
ian@0 709 }
ian@0 710 break;
ian@0 711 default:
ian@0 712 return addError( "Bad escape sequence in string", token, current );
ian@0 713 }
ian@0 714 }
ian@0 715 else
ian@0 716 {
ian@0 717 decoded += c;
ian@0 718 }
ian@0 719 }
ian@0 720 return true;
ian@0 721 }
ian@0 722
ian@0 723 bool
ian@0 724 Reader::decodeUnicodeCodePoint( Token &token,
ian@0 725 Location &current,
ian@0 726 Location end,
ian@0 727 unsigned int &unicode )
ian@0 728 {
ian@0 729
ian@0 730 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
ian@0 731 return false;
ian@0 732 if (unicode >= 0xD800 && unicode <= 0xDBFF)
ian@0 733 {
ian@0 734 // surrogate pairs
ian@0 735 if (end - current < 6)
ian@0 736 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
ian@0 737 unsigned int surrogatePair;
ian@0 738 if (*(current++) == '\\' && *(current++)== 'u')
ian@0 739 {
ian@0 740 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
ian@0 741 {
ian@0 742 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
ian@0 743 }
ian@0 744 else
ian@0 745 return false;
ian@0 746 }
ian@0 747 else
ian@0 748 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
ian@0 749 }
ian@0 750 return true;
ian@0 751 }
ian@0 752
ian@0 753 bool
ian@0 754 Reader::decodeUnicodeEscapeSequence( Token &token,
ian@0 755 Location &current,
ian@0 756 Location end,
ian@0 757 unsigned int &unicode )
ian@0 758 {
ian@0 759 if ( end - current < 4 )
ian@0 760 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
ian@0 761 unicode = 0;
ian@0 762 for ( int index =0; index < 4; ++index )
ian@0 763 {
ian@0 764 Char c = *current++;
ian@0 765 unicode *= 16;
ian@0 766 if ( c >= '0' && c <= '9' )
ian@0 767 unicode += c - '0';
ian@0 768 else if ( c >= 'a' && c <= 'f' )
ian@0 769 unicode += c - 'a' + 10;
ian@0 770 else if ( c >= 'A' && c <= 'F' )
ian@0 771 unicode += c - 'A' + 10;
ian@0 772 else
ian@0 773 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
ian@0 774 }
ian@0 775 return true;
ian@0 776 }
ian@0 777
ian@0 778
ian@0 779 bool
ian@0 780 Reader::addError( const std::string &message,
ian@0 781 Token &token,
ian@0 782 Location extra )
ian@0 783 {
ian@0 784 ErrorInfo info;
ian@0 785 info.token_ = token;
ian@0 786 info.message_ = message;
ian@0 787 info.extra_ = extra;
ian@0 788 errors_.push_back( info );
ian@0 789 return false;
ian@0 790 }
ian@0 791
ian@0 792
ian@0 793 bool
ian@0 794 Reader::recoverFromError( TokenType skipUntilToken )
ian@0 795 {
ian@0 796 int errorCount = int(errors_.size());
ian@0 797 Token skip;
ian@0 798 for (;;)
ian@0 799 {
ian@0 800 if ( !readToken(skip) )
ian@0 801 errors_.resize( errorCount ); // discard errors caused by recovery
ian@0 802 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
ian@0 803 break;
ian@0 804 }
ian@0 805 errors_.resize( errorCount );
ian@0 806 return false;
ian@0 807 }
ian@0 808
ian@0 809
ian@0 810 bool
ian@0 811 Reader::addErrorAndRecover( const std::string &message,
ian@0 812 Token &token,
ian@0 813 TokenType skipUntilToken )
ian@0 814 {
ian@0 815 addError( message, token );
ian@0 816 return recoverFromError( skipUntilToken );
ian@0 817 }
ian@0 818
ian@0 819
ian@0 820 Value &
ian@0 821 Reader::currentValue()
ian@0 822 {
ian@0 823 return *(nodes_.top());
ian@0 824 }
ian@0 825
ian@0 826
ian@0 827 Reader::Char
ian@0 828 Reader::getNextChar()
ian@0 829 {
ian@0 830 if ( current_ == end_ )
ian@0 831 return 0;
ian@0 832 return *current_++;
ian@0 833 }
ian@0 834
ian@0 835
ian@0 836 void
ian@0 837 Reader::getLocationLineAndColumn( Location location,
ian@0 838 int &line,
ian@0 839 int &column ) const
ian@0 840 {
ian@0 841 Location current = begin_;
ian@0 842 Location lastLineStart = current;
ian@0 843 line = 0;
ian@0 844 while ( current < location && current != end_ )
ian@0 845 {
ian@0 846 Char c = *current++;
ian@0 847 if ( c == '\r' )
ian@0 848 {
ian@0 849 if ( *current == '\n' )
ian@0 850 ++current;
ian@0 851 lastLineStart = current;
ian@0 852 ++line;
ian@0 853 }
ian@0 854 else if ( c == '\n' )
ian@0 855 {
ian@0 856 lastLineStart = current;
ian@0 857 ++line;
ian@0 858 }
ian@0 859 }
ian@0 860 // column & line start at 1
ian@0 861 column = int(location - lastLineStart) + 1;
ian@0 862 ++line;
ian@0 863 }
ian@0 864
ian@0 865
ian@0 866 std::string
ian@0 867 Reader::getLocationLineAndColumn( Location location ) const
ian@0 868 {
ian@0 869 int line, column;
ian@0 870 getLocationLineAndColumn( location, line, column );
ian@0 871 char buffer[18+16+16+1];
ian@0 872 sprintf( buffer, "Line %d, Column %d", line, column );
ian@0 873 return buffer;
ian@0 874 }
ian@0 875
ian@0 876
ian@0 877 // Deprecated. Preserved for backward compatibility
ian@0 878 std::string
ian@0 879 Reader::getFormatedErrorMessages() const
ian@0 880 {
ian@0 881 return getFormattedErrorMessages();
ian@0 882 }
ian@0 883
ian@0 884
ian@0 885 std::string
ian@0 886 Reader::getFormattedErrorMessages() const
ian@0 887 {
ian@0 888 std::string formattedMessage;
ian@0 889 for ( Errors::const_iterator itError = errors_.begin();
ian@0 890 itError != errors_.end();
ian@0 891 ++itError )
ian@0 892 {
ian@0 893 const ErrorInfo &error = *itError;
ian@0 894 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
ian@0 895 formattedMessage += " " + error.message_ + "\n";
ian@0 896 if ( error.extra_ )
ian@0 897 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
ian@0 898 }
ian@0 899 return formattedMessage;
ian@0 900 }
ian@0 901
ian@0 902
ian@0 903 std::istream& operator>>( std::istream &sin, Value &root )
ian@0 904 {
ian@0 905 Json::Reader reader;
ian@0 906 bool ok = reader.parse(sin, root, true);
ian@0 907 if (!ok) JSON_FAIL_MESSAGE(reader.getFormattedErrorMessages());
ian@0 908 return sin;
ian@0 909 }
ian@0 910
ian@0 911
ian@0 912 } // namespace Json