JsonCpp project page JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5 
6 #if !defined(JSON_IS_AMALGAMATION)
7 #include <json/assertions.h>
8 #include <json/reader.h>
9 #include <json/value.h>
10 #include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
12 #include <utility>
13 #include <cstdio>
14 #include <cassert>
15 #include <cstring>
16 #include <istream>
17 #include <sstream>
18 #include <memory>
19 #include <set>
20 #include <limits>
21 
22 #if defined(_MSC_VER)
23 #if !defined(WINCE) && defined(__STDC_SECURE_LIB__) && _MSC_VER >= 1500 // VC++ 9.0 and above
24 #define snprintf sprintf_s
25 #elif _MSC_VER >= 1900 // VC++ 14.0 and above
26 #define snprintf std::snprintf
27 #else
28 #define snprintf _snprintf
29 #endif
30 #elif defined(__ANDROID__) || defined(__QNXNTO__)
31 #define snprintf snprintf
32 #elif __cplusplus >= 201103L
33 #define snprintf std::snprintf
34 #endif
35 
36 #if defined(__QNXNTO__)
37 #define sscanf std::sscanf
38 #endif
39 
40 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
41 // Disable warning about strdup being deprecated.
42 #pragma warning(disable : 4996)
43 #endif
44 
45 static int const stackLimit_g = 1000;
46 static int stackDepth_g = 0; // see readValue()
47 
48 namespace Json {
49 
50 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
51 typedef std::unique_ptr<CharReader> CharReaderPtr;
52 #else
53 typedef std::auto_ptr<CharReader> CharReaderPtr;
54 #endif
55 
56 // Implementation of class Features
57 // ////////////////////////////////
58 
60  : allowComments_(true), strictRoot_(false),
61  allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
62 
64 
66  Features features;
67  features.allowComments_ = false;
68  features.strictRoot_ = true;
69  features.allowDroppedNullPlaceholders_ = false;
70  features.allowNumericKeys_ = false;
71  return features;
72 }
73 
74 // Implementation of class Reader
75 // ////////////////////////////////
76 
78  for (; begin < end; ++begin)
79  if (*begin == '\n' || *begin == '\r')
80  return true;
81  return false;
82 }
83 
84 // Class Reader
85 // //////////////////////////////////////////////////////////////////
86 
88  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
89  lastValue_(), commentsBefore_(), features_(Features::all()),
90  collectComments_() {}
91 
92 Reader::Reader(const Features& features)
93  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
94  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
95 }
96 
97 bool
98 Reader::parse(const std::string& document, Value& root, bool collectComments) {
99  document_ = document;
100  const char* begin = document_.c_str();
101  const char* end = begin + document_.length();
102  return parse(begin, end, root, collectComments);
103 }
104 
105 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
106  // std::istream_iterator<char> begin(sin);
107  // std::istream_iterator<char> end;
108  // Those would allow streamed input from a file, if parse() were a
109  // template function.
110 
111  // Since std::string is reference-counted, this at least does not
112  // create an extra copy.
113  std::string doc;
114  std::getline(sin, doc, (char)EOF);
115  return parse(doc, root, collectComments);
116 }
117 
118 bool Reader::parse(const char* beginDoc,
119  const char* endDoc,
120  Value& root,
121  bool collectComments) {
122  if (!features_.allowComments_) {
123  collectComments = false;
124  }
125 
126  begin_ = beginDoc;
127  end_ = endDoc;
128  collectComments_ = collectComments;
129  current_ = begin_;
130  lastValueEnd_ = 0;
131  lastValue_ = 0;
132  commentsBefore_ = "";
133  errors_.clear();
134  while (!nodes_.empty())
135  nodes_.pop();
136  nodes_.push(&root);
137 
138  stackDepth_g = 0; // Yes, this is bad coding, but options are limited.
139  bool successful = readValue();
140  Token token;
141  skipCommentTokens(token);
142  if (collectComments_ && !commentsBefore_.empty())
143  root.setComment(commentsBefore_, commentAfter);
144  if (features_.strictRoot_) {
145  if (!root.isArray() && !root.isObject()) {
146  // Set error location to start of doc, ideally should be first token found
147  // in doc
148  token.type_ = tokenError;
149  token.start_ = beginDoc;
150  token.end_ = endDoc;
151  addError(
152  "A valid JSON document must be either an array or an object value.",
153  token);
154  return false;
155  }
156  }
157  return successful;
158 }
159 
160 bool Reader::readValue() {
161  // This is a non-reentrant way to support a stackLimit. Terrible!
162  // But this deprecated class has a security problem: Bad input can
163  // cause a seg-fault. This seems like a fair, binary-compatible way
164  // to prevent the problem.
165  if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
166  ++stackDepth_g;
167 
168  Token token;
169  skipCommentTokens(token);
170  bool successful = true;
171 
172  if (collectComments_ && !commentsBefore_.empty()) {
173  currentValue().setComment(commentsBefore_, commentBefore);
174  commentsBefore_ = "";
175  }
176 
177  switch (token.type_) {
178  case tokenObjectBegin:
179  successful = readObject(token);
180  currentValue().setOffsetLimit(current_ - begin_);
181  break;
182  case tokenArrayBegin:
183  successful = readArray(token);
184  currentValue().setOffsetLimit(current_ - begin_);
185  break;
186  case tokenNumber:
187  successful = decodeNumber(token);
188  break;
189  case tokenString:
190  successful = decodeString(token);
191  break;
192  case tokenTrue:
193  {
194  Value v(true);
195  currentValue().swapPayload(v);
196  currentValue().setOffsetStart(token.start_ - begin_);
197  currentValue().setOffsetLimit(token.end_ - begin_);
198  }
199  break;
200  case tokenFalse:
201  {
202  Value v(false);
203  currentValue().swapPayload(v);
204  currentValue().setOffsetStart(token.start_ - begin_);
205  currentValue().setOffsetLimit(token.end_ - begin_);
206  }
207  break;
208  case tokenNull:
209  {
210  Value v;
211  currentValue().swapPayload(v);
212  currentValue().setOffsetStart(token.start_ - begin_);
213  currentValue().setOffsetLimit(token.end_ - begin_);
214  }
215  break;
216  case tokenArraySeparator:
217  case tokenObjectEnd:
218  case tokenArrayEnd:
219  if (features_.allowDroppedNullPlaceholders_) {
220  // "Un-read" the current token and mark the current value as a null
221  // token.
222  current_--;
223  Value v;
224  currentValue().swapPayload(v);
225  currentValue().setOffsetStart(current_ - begin_ - 1);
226  currentValue().setOffsetLimit(current_ - begin_);
227  break;
228  } // Else, fall through...
229  default:
230  currentValue().setOffsetStart(token.start_ - begin_);
231  currentValue().setOffsetLimit(token.end_ - begin_);
232  return addError("Syntax error: value, object or array expected.", token);
233  }
234 
235  if (collectComments_) {
236  lastValueEnd_ = current_;
237  lastValue_ = &currentValue();
238  }
239 
240  --stackDepth_g;
241  return successful;
242 }
243 
244 void Reader::skipCommentTokens(Token& token) {
245  if (features_.allowComments_) {
246  do {
247  readToken(token);
248  } while (token.type_ == tokenComment);
249  } else {
250  readToken(token);
251  }
252 }
253 
254 bool Reader::readToken(Token& token) {
255  skipSpaces();
256  token.start_ = current_;
257  Char c = getNextChar();
258  bool ok = true;
259  switch (c) {
260  case '{':
261  token.type_ = tokenObjectBegin;
262  break;
263  case '}':
264  token.type_ = tokenObjectEnd;
265  break;
266  case '[':
267  token.type_ = tokenArrayBegin;
268  break;
269  case ']':
270  token.type_ = tokenArrayEnd;
271  break;
272  case '"':
273  token.type_ = tokenString;
274  ok = readString();
275  break;
276  case '/':
277  token.type_ = tokenComment;
278  ok = readComment();
279  break;
280  case '0':
281  case '1':
282  case '2':
283  case '3':
284  case '4':
285  case '5':
286  case '6':
287  case '7':
288  case '8':
289  case '9':
290  case '-':
291  token.type_ = tokenNumber;
292  readNumber();
293  break;
294  case 't':
295  token.type_ = tokenTrue;
296  ok = match("rue", 3);
297  break;
298  case 'f':
299  token.type_ = tokenFalse;
300  ok = match("alse", 4);
301  break;
302  case 'n':
303  token.type_ = tokenNull;
304  ok = match("ull", 3);
305  break;
306  case ',':
307  token.type_ = tokenArraySeparator;
308  break;
309  case ':':
310  token.type_ = tokenMemberSeparator;
311  break;
312  case 0:
313  token.type_ = tokenEndOfStream;
314  break;
315  default:
316  ok = false;
317  break;
318  }
319  if (!ok)
320  token.type_ = tokenError;
321  token.end_ = current_;
322  return true;
323 }
324 
325 void Reader::skipSpaces() {
326  while (current_ != end_) {
327  Char c = *current_;
328  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
329  ++current_;
330  else
331  break;
332  }
333 }
334 
335 bool Reader::match(Location pattern, int patternLength) {
336  if (end_ - current_ < patternLength)
337  return false;
338  int index = patternLength;
339  while (index--)
340  if (current_[index] != pattern[index])
341  return false;
342  current_ += patternLength;
343  return true;
344 }
345 
346 bool Reader::readComment() {
347  Location commentBegin = current_ - 1;
348  Char c = getNextChar();
349  bool successful = false;
350  if (c == '*')
351  successful = readCStyleComment();
352  else if (c == '/')
353  successful = readCppStyleComment();
354  if (!successful)
355  return false;
356 
357  if (collectComments_) {
358  CommentPlacement placement = commentBefore;
359  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
360  if (c != '*' || !containsNewLine(commentBegin, current_))
361  placement = commentAfterOnSameLine;
362  }
363 
364  addComment(commentBegin, current_, placement);
365  }
366  return true;
367 }
368 
369 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
370  std::string normalized;
371  normalized.reserve(end - begin);
372  Reader::Location current = begin;
373  while (current != end) {
374  char c = *current++;
375  if (c == '\r') {
376  if (current != end && *current == '\n')
377  // convert dos EOL
378  ++current;
379  // convert Mac EOL
380  normalized += '\n';
381  } else {
382  normalized += c;
383  }
384  }
385  return normalized;
386 }
387 
388 void
389 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
390  assert(collectComments_);
391  const std::string& normalized = normalizeEOL(begin, end);
392  if (placement == commentAfterOnSameLine) {
393  assert(lastValue_ != 0);
394  lastValue_->setComment(normalized, placement);
395  } else {
396  commentsBefore_ += normalized;
397  }
398 }
399 
400 bool Reader::readCStyleComment() {
401  while (current_ != end_) {
402  Char c = getNextChar();
403  if (c == '*' && *current_ == '/')
404  break;
405  }
406  return getNextChar() == '/';
407 }
408 
409 bool Reader::readCppStyleComment() {
410  while (current_ != end_) {
411  Char c = getNextChar();
412  if (c == '\n')
413  break;
414  if (c == '\r') {
415  // Consume DOS EOL. It will be normalized in addComment.
416  if (current_ != end_ && *current_ == '\n')
417  getNextChar();
418  // Break on Moc OS 9 EOL.
419  break;
420  }
421  }
422  return true;
423 }
424 
425 void Reader::readNumber() {
426  const char *p = current_;
427  char c = '0'; // stopgap for already consumed character
428  // integral part
429  while (c >= '0' && c <= '9')
430  c = (current_ = p) < end_ ? *p++ : 0;
431  // fractional part
432  if (c == '.') {
433  c = (current_ = p) < end_ ? *p++ : 0;
434  while (c >= '0' && c <= '9')
435  c = (current_ = p) < end_ ? *p++ : 0;
436  }
437  // exponential part
438  if (c == 'e' || c == 'E') {
439  c = (current_ = p) < end_ ? *p++ : 0;
440  if (c == '+' || c == '-')
441  c = (current_ = p) < end_ ? *p++ : 0;
442  while (c >= '0' && c <= '9')
443  c = (current_ = p) < end_ ? *p++ : 0;
444  }
445 }
446 
447 bool Reader::readString() {
448  Char c = 0;
449  while (current_ != end_) {
450  c = getNextChar();
451  if (c == '\\')
452  getNextChar();
453  else if (c == '"')
454  break;
455  }
456  return c == '"';
457 }
458 
459 bool Reader::readObject(Token& tokenStart) {
460  Token tokenName;
461  std::string name;
462  Value init(objectValue);
463  currentValue().swapPayload(init);
464  currentValue().setOffsetStart(tokenStart.start_ - begin_);
465  while (readToken(tokenName)) {
466  bool initialTokenOk = true;
467  while (tokenName.type_ == tokenComment && initialTokenOk)
468  initialTokenOk = readToken(tokenName);
469  if (!initialTokenOk)
470  break;
471  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
472  return true;
473  name = "";
474  if (tokenName.type_ == tokenString) {
475  if (!decodeString(tokenName, name))
476  return recoverFromError(tokenObjectEnd);
477  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
478  Value numberName;
479  if (!decodeNumber(tokenName, numberName))
480  return recoverFromError(tokenObjectEnd);
481  name = numberName.asString();
482  } else {
483  break;
484  }
485 
486  Token colon;
487  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
488  return addErrorAndRecover(
489  "Missing ':' after object member name", colon, tokenObjectEnd);
490  }
491  Value& value = currentValue()[name];
492  nodes_.push(&value);
493  bool ok = readValue();
494  nodes_.pop();
495  if (!ok) // error already set
496  return recoverFromError(tokenObjectEnd);
497 
498  Token comma;
499  if (!readToken(comma) ||
500  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
501  comma.type_ != tokenComment)) {
502  return addErrorAndRecover(
503  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
504  }
505  bool finalizeTokenOk = true;
506  while (comma.type_ == tokenComment && finalizeTokenOk)
507  finalizeTokenOk = readToken(comma);
508  if (comma.type_ == tokenObjectEnd)
509  return true;
510  }
511  return addErrorAndRecover(
512  "Missing '}' or object member name", tokenName, tokenObjectEnd);
513 }
514 
515 bool Reader::readArray(Token& tokenStart) {
516  Value init(arrayValue);
517  currentValue().swapPayload(init);
518  currentValue().setOffsetStart(tokenStart.start_ - begin_);
519  skipSpaces();
520  if (*current_ == ']') // empty array
521  {
522  Token endArray;
523  readToken(endArray);
524  return true;
525  }
526  int index = 0;
527  for (;;) {
528  Value& value = currentValue()[index++];
529  nodes_.push(&value);
530  bool ok = readValue();
531  nodes_.pop();
532  if (!ok) // error already set
533  return recoverFromError(tokenArrayEnd);
534 
535  Token token;
536  // Accept Comment after last item in the array.
537  ok = readToken(token);
538  while (token.type_ == tokenComment && ok) {
539  ok = readToken(token);
540  }
541  bool badTokenType =
542  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
543  if (!ok || badTokenType) {
544  return addErrorAndRecover(
545  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
546  }
547  if (token.type_ == tokenArrayEnd)
548  break;
549  }
550  return true;
551 }
552 
553 bool Reader::decodeNumber(Token& token) {
554  Value decoded;
555  if (!decodeNumber(token, decoded))
556  return false;
557  currentValue().swapPayload(decoded);
558  currentValue().setOffsetStart(token.start_ - begin_);
559  currentValue().setOffsetLimit(token.end_ - begin_);
560  return true;
561 }
562 
563 bool Reader::decodeNumber(Token& token, Value& decoded) {
564  // Attempts to parse the number as an integer. If the number is
565  // larger than the maximum supported value of an integer then
566  // we decode the number as a double.
567  Location current = token.start_;
568  bool isNegative = *current == '-';
569  if (isNegative)
570  ++current;
571  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
572  Value::LargestUInt maxIntegerValue =
573  isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
575  Value::LargestUInt threshold = maxIntegerValue / 10;
576  Value::LargestUInt value = 0;
577  while (current < token.end_) {
578  Char c = *current++;
579  if (c < '0' || c > '9')
580  return decodeDouble(token, decoded);
581  Value::UInt digit(c - '0');
582  if (value >= threshold) {
583  // We've hit or exceeded the max value divided by 10 (rounded down). If
584  // a) we've only just touched the limit, b) this is the last digit, and
585  // c) it's small enough to fit in that rounding delta, we're okay.
586  // Otherwise treat this number as a double to avoid overflow.
587  if (value > threshold || current != token.end_ ||
588  digit > maxIntegerValue % 10) {
589  return decodeDouble(token, decoded);
590  }
591  }
592  value = value * 10 + digit;
593  }
594  if (isNegative && value == maxIntegerValue)
595  decoded = Value::minLargestInt;
596  else if (isNegative)
597  decoded = -Value::LargestInt(value);
598  else if (value <= Value::LargestUInt(Value::maxInt))
599  decoded = Value::LargestInt(value);
600  else
601  decoded = value;
602  return true;
603 }
604 
605 bool Reader::decodeDouble(Token& token) {
606  Value decoded;
607  if (!decodeDouble(token, decoded))
608  return false;
609  currentValue().swapPayload(decoded);
610  currentValue().setOffsetStart(token.start_ - begin_);
611  currentValue().setOffsetLimit(token.end_ - begin_);
612  return true;
613 }
614 
615 bool Reader::decodeDouble(Token& token, Value& decoded) {
616  double value = 0;
617  std::string buffer(token.start_, token.end_);
618  std::istringstream is(buffer);
619  if (!(is >> value))
620  return addError("'" + std::string(token.start_, token.end_) +
621  "' is not a number.",
622  token);
623  decoded = value;
624  return true;
625 }
626 
627 bool Reader::decodeString(Token& token) {
628  std::string decoded_string;
629  if (!decodeString(token, decoded_string))
630  return false;
631  Value decoded(decoded_string);
632  currentValue().swapPayload(decoded);
633  currentValue().setOffsetStart(token.start_ - begin_);
634  currentValue().setOffsetLimit(token.end_ - begin_);
635  return true;
636 }
637 
638 bool Reader::decodeString(Token& token, std::string& decoded) {
639  decoded.reserve(token.end_ - token.start_ - 2);
640  Location current = token.start_ + 1; // skip '"'
641  Location end = token.end_ - 1; // do not include '"'
642  while (current != end) {
643  Char c = *current++;
644  if (c == '"')
645  break;
646  else if (c == '\\') {
647  if (current == end)
648  return addError("Empty escape sequence in string", token, current);
649  Char escape = *current++;
650  switch (escape) {
651  case '"':
652  decoded += '"';
653  break;
654  case '/':
655  decoded += '/';
656  break;
657  case '\\':
658  decoded += '\\';
659  break;
660  case 'b':
661  decoded += '\b';
662  break;
663  case 'f':
664  decoded += '\f';
665  break;
666  case 'n':
667  decoded += '\n';
668  break;
669  case 'r':
670  decoded += '\r';
671  break;
672  case 't':
673  decoded += '\t';
674  break;
675  case 'u': {
676  unsigned int unicode;
677  if (!decodeUnicodeCodePoint(token, current, end, unicode))
678  return false;
679  decoded += codePointToUTF8(unicode);
680  } break;
681  default:
682  return addError("Bad escape sequence in string", token, current);
683  }
684  } else {
685  decoded += c;
686  }
687  }
688  return true;
689 }
690 
691 bool Reader::decodeUnicodeCodePoint(Token& token,
692  Location& current,
693  Location end,
694  unsigned int& unicode) {
695 
696  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
697  return false;
698  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
699  // surrogate pairs
700  if (end - current < 6)
701  return addError(
702  "additional six characters expected to parse unicode surrogate pair.",
703  token,
704  current);
705  unsigned int surrogatePair;
706  if (*(current++) == '\\' && *(current++) == 'u') {
707  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
708  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
709  } else
710  return false;
711  } else
712  return addError("expecting another \\u token to begin the second half of "
713  "a unicode surrogate pair",
714  token,
715  current);
716  }
717  return true;
718 }
719 
720 bool Reader::decodeUnicodeEscapeSequence(Token& token,
721  Location& current,
722  Location end,
723  unsigned int& unicode) {
724  if (end - current < 4)
725  return addError(
726  "Bad unicode escape sequence in string: four digits expected.",
727  token,
728  current);
729  unicode = 0;
730  for (int index = 0; index < 4; ++index) {
731  Char c = *current++;
732  unicode *= 16;
733  if (c >= '0' && c <= '9')
734  unicode += c - '0';
735  else if (c >= 'a' && c <= 'f')
736  unicode += c - 'a' + 10;
737  else if (c >= 'A' && c <= 'F')
738  unicode += c - 'A' + 10;
739  else
740  return addError(
741  "Bad unicode escape sequence in string: hexadecimal digit expected.",
742  token,
743  current);
744  }
745  return true;
746 }
747 
748 bool
749 Reader::addError(const std::string& message, Token& token, Location extra) {
750  ErrorInfo info;
751  info.token_ = token;
752  info.message_ = message;
753  info.extra_ = extra;
754  errors_.push_back(info);
755  return false;
756 }
757 
758 bool Reader::recoverFromError(TokenType skipUntilToken) {
759  int errorCount = int(errors_.size());
760  Token skip;
761  for (;;) {
762  if (!readToken(skip))
763  errors_.resize(errorCount); // discard errors caused by recovery
764  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
765  break;
766  }
767  errors_.resize(errorCount);
768  return false;
769 }
770 
771 bool Reader::addErrorAndRecover(const std::string& message,
772  Token& token,
773  TokenType skipUntilToken) {
774  addError(message, token);
775  return recoverFromError(skipUntilToken);
776 }
777 
778 Value& Reader::currentValue() { return *(nodes_.top()); }
779 
780 Reader::Char Reader::getNextChar() {
781  if (current_ == end_)
782  return 0;
783  return *current_++;
784 }
785 
786 void Reader::getLocationLineAndColumn(Location location,
787  int& line,
788  int& column) const {
789  Location current = begin_;
790  Location lastLineStart = current;
791  line = 0;
792  while (current < location && current != end_) {
793  Char c = *current++;
794  if (c == '\r') {
795  if (*current == '\n')
796  ++current;
797  lastLineStart = current;
798  ++line;
799  } else if (c == '\n') {
800  lastLineStart = current;
801  ++line;
802  }
803  }
804  // column & line start at 1
805  column = int(location - lastLineStart) + 1;
806  ++line;
807 }
808 
809 std::string Reader::getLocationLineAndColumn(Location location) const {
810  int line, column;
811  getLocationLineAndColumn(location, line, column);
812  char buffer[18 + 16 + 16 + 1];
813  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
814  return buffer;
815 }
816 
817 // Deprecated. Preserved for backward compatibility
818 std::string Reader::getFormatedErrorMessages() const {
819  return getFormattedErrorMessages();
820 }
821 
823  std::string formattedMessage;
824  for (Errors::const_iterator itError = errors_.begin();
825  itError != errors_.end();
826  ++itError) {
827  const ErrorInfo& error = *itError;
828  formattedMessage +=
829  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
830  formattedMessage += " " + error.message_ + "\n";
831  if (error.extra_)
832  formattedMessage +=
833  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
834  }
835  return formattedMessage;
836 }
837 
838 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
839  std::vector<Reader::StructuredError> allErrors;
840  for (Errors::const_iterator itError = errors_.begin();
841  itError != errors_.end();
842  ++itError) {
843  const ErrorInfo& error = *itError;
844  Reader::StructuredError structured;
845  structured.offset_start = error.token_.start_ - begin_;
846  structured.offset_limit = error.token_.end_ - begin_;
847  structured.message = error.message_;
848  allErrors.push_back(structured);
849  }
850  return allErrors;
851 }
852 
853 bool Reader::pushError(const Value& value, const std::string& message) {
854  size_t length = end_ - begin_;
855  if(value.getOffsetStart() > length
856  || value.getOffsetLimit() > length)
857  return false;
858  Token token;
859  token.type_ = tokenError;
860  token.start_ = begin_ + value.getOffsetStart();
861  token.end_ = end_ + value.getOffsetLimit();
862  ErrorInfo info;
863  info.token_ = token;
864  info.message_ = message;
865  info.extra_ = 0;
866  errors_.push_back(info);
867  return true;
868 }
869 
870 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
871  size_t length = end_ - begin_;
872  if(value.getOffsetStart() > length
873  || value.getOffsetLimit() > length
874  || extra.getOffsetLimit() > length)
875  return false;
876  Token token;
877  token.type_ = tokenError;
878  token.start_ = begin_ + value.getOffsetStart();
879  token.end_ = begin_ + value.getOffsetLimit();
880  ErrorInfo info;
881  info.token_ = token;
882  info.message_ = message;
883  info.extra_ = begin_ + extra.getOffsetStart();
884  errors_.push_back(info);
885  return true;
886 }
887 
888 bool Reader::good() const {
889  return !errors_.size();
890 }
891 
892 // exact copy of Features
893 class OurFeatures {
894 public:
895  static OurFeatures all();
896  bool allowComments_;
897  bool strictRoot_;
898  bool allowDroppedNullPlaceholders_;
899  bool allowNumericKeys_;
900  bool allowSingleQuotes_;
901  bool failIfExtra_;
902  bool rejectDupKeys_;
903  bool allowSpecialFloats_;
904  int stackLimit_;
905 }; // OurFeatures
906 
907 // exact copy of Implementation of class Features
908 // ////////////////////////////////
909 
910 OurFeatures OurFeatures::all() { return OurFeatures(); }
911 
912 // Implementation of class Reader
913 // ////////////////////////////////
914 
915 // exact copy of Reader, renamed to OurReader
916 class OurReader {
917 public:
918  typedef char Char;
919  typedef const Char* Location;
920  struct StructuredError {
921  size_t offset_start;
922  size_t offset_limit;
923  std::string message;
924  };
925 
926  OurReader(OurFeatures const& features);
927  bool parse(const char* beginDoc,
928  const char* endDoc,
929  Value& root,
930  bool collectComments = true);
931  std::string getFormattedErrorMessages() const;
932  std::vector<StructuredError> getStructuredErrors() const;
933  bool pushError(const Value& value, const std::string& message);
934  bool pushError(const Value& value, const std::string& message, const Value& extra);
935  bool good() const;
936 
937 private:
938  OurReader(OurReader const&); // no impl
939  void operator=(OurReader const&); // no impl
940 
941  enum TokenType {
942  tokenEndOfStream = 0,
943  tokenObjectBegin,
944  tokenObjectEnd,
945  tokenArrayBegin,
946  tokenArrayEnd,
947  tokenString,
948  tokenNumber,
949  tokenTrue,
950  tokenFalse,
951  tokenNull,
952  tokenNaN,
953  tokenPosInf,
954  tokenNegInf,
955  tokenArraySeparator,
956  tokenMemberSeparator,
957  tokenComment,
958  tokenError
959  };
960 
961  class Token {
962  public:
963  TokenType type_;
964  Location start_;
965  Location end_;
966  };
967 
968  class ErrorInfo {
969  public:
970  Token token_;
971  std::string message_;
972  Location extra_;
973  };
974 
975  typedef std::deque<ErrorInfo> Errors;
976 
977  bool readToken(Token& token);
978  void skipSpaces();
979  bool match(Location pattern, int patternLength);
980  bool readComment();
981  bool readCStyleComment();
982  bool readCppStyleComment();
983  bool readString();
984  bool readStringSingleQuote();
985  bool readNumber(bool checkInf);
986  bool readValue();
987  bool readObject(Token& token);
988  bool readArray(Token& token);
989  bool decodeNumber(Token& token);
990  bool decodeNumber(Token& token, Value& decoded);
991  bool decodeString(Token& token);
992  bool decodeString(Token& token, std::string& decoded);
993  bool decodeDouble(Token& token);
994  bool decodeDouble(Token& token, Value& decoded);
995  bool decodeUnicodeCodePoint(Token& token,
996  Location& current,
997  Location end,
998  unsigned int& unicode);
999  bool decodeUnicodeEscapeSequence(Token& token,
1000  Location& current,
1001  Location end,
1002  unsigned int& unicode);
1003  bool addError(const std::string& message, Token& token, Location extra = 0);
1004  bool recoverFromError(TokenType skipUntilToken);
1005  bool addErrorAndRecover(const std::string& message,
1006  Token& token,
1007  TokenType skipUntilToken);
1008  void skipUntilSpace();
1009  Value& currentValue();
1010  Char getNextChar();
1011  void
1012  getLocationLineAndColumn(Location location, int& line, int& column) const;
1013  std::string getLocationLineAndColumn(Location location) const;
1014  void addComment(Location begin, Location end, CommentPlacement placement);
1015  void skipCommentTokens(Token& token);
1016 
1017  typedef std::stack<Value*> Nodes;
1018  Nodes nodes_;
1019  Errors errors_;
1020  std::string document_;
1021  Location begin_;
1022  Location end_;
1023  Location current_;
1024  Location lastValueEnd_;
1025  Value* lastValue_;
1026  std::string commentsBefore_;
1027  int stackDepth_;
1028 
1029  OurFeatures const features_;
1030  bool collectComments_;
1031 }; // OurReader
1032 
1033 // complete copy of Read impl, for OurReader
1034 
1035 OurReader::OurReader(OurFeatures const& features)
1036  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1037  lastValue_(), commentsBefore_(),
1038  stackDepth_(0),
1039  features_(features), collectComments_() {
1040 }
1041 
1042 bool OurReader::parse(const char* beginDoc,
1043  const char* endDoc,
1044  Value& root,
1045  bool collectComments) {
1046  if (!features_.allowComments_) {
1047  collectComments = false;
1048  }
1049 
1050  begin_ = beginDoc;
1051  end_ = endDoc;
1052  collectComments_ = collectComments;
1053  current_ = begin_;
1054  lastValueEnd_ = 0;
1055  lastValue_ = 0;
1056  commentsBefore_ = "";
1057  errors_.clear();
1058  while (!nodes_.empty())
1059  nodes_.pop();
1060  nodes_.push(&root);
1061 
1062  stackDepth_ = 0;
1063  bool successful = readValue();
1064  Token token;
1065  skipCommentTokens(token);
1066  if (features_.failIfExtra_) {
1067  if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
1068  addError("Extra non-whitespace after JSON value.", token);
1069  return false;
1070  }
1071  }
1072  if (collectComments_ && !commentsBefore_.empty())
1073  root.setComment(commentsBefore_, commentAfter);
1074  if (features_.strictRoot_) {
1075  if (!root.isArray() && !root.isObject()) {
1076  // Set error location to start of doc, ideally should be first token found
1077  // in doc
1078  token.type_ = tokenError;
1079  token.start_ = beginDoc;
1080  token.end_ = endDoc;
1081  addError(
1082  "A valid JSON document must be either an array or an object value.",
1083  token);
1084  return false;
1085  }
1086  }
1087  return successful;
1088 }
1089 
1090 bool OurReader::readValue() {
1091  if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
1092  ++stackDepth_;
1093  Token token;
1094  skipCommentTokens(token);
1095  bool successful = true;
1096 
1097  if (collectComments_ && !commentsBefore_.empty()) {
1098  currentValue().setComment(commentsBefore_, commentBefore);
1099  commentsBefore_ = "";
1100  }
1101 
1102  switch (token.type_) {
1103  case tokenObjectBegin:
1104  successful = readObject(token);
1105  currentValue().setOffsetLimit(current_ - begin_);
1106  break;
1107  case tokenArrayBegin:
1108  successful = readArray(token);
1109  currentValue().setOffsetLimit(current_ - begin_);
1110  break;
1111  case tokenNumber:
1112  successful = decodeNumber(token);
1113  break;
1114  case tokenString:
1115  successful = decodeString(token);
1116  break;
1117  case tokenTrue:
1118  {
1119  Value v(true);
1120  currentValue().swapPayload(v);
1121  currentValue().setOffsetStart(token.start_ - begin_);
1122  currentValue().setOffsetLimit(token.end_ - begin_);
1123  }
1124  break;
1125  case tokenFalse:
1126  {
1127  Value v(false);
1128  currentValue().swapPayload(v);
1129  currentValue().setOffsetStart(token.start_ - begin_);
1130  currentValue().setOffsetLimit(token.end_ - begin_);
1131  }
1132  break;
1133  case tokenNull:
1134  {
1135  Value v;
1136  currentValue().swapPayload(v);
1137  currentValue().setOffsetStart(token.start_ - begin_);
1138  currentValue().setOffsetLimit(token.end_ - begin_);
1139  }
1140  break;
1141  case tokenNaN:
1142  {
1143  Value v(std::numeric_limits<double>::quiet_NaN());
1144  currentValue().swapPayload(v);
1145  currentValue().setOffsetStart(token.start_ - begin_);
1146  currentValue().setOffsetLimit(token.end_ - begin_);
1147  }
1148  break;
1149  case tokenPosInf:
1150  {
1151  Value v(std::numeric_limits<double>::infinity());
1152  currentValue().swapPayload(v);
1153  currentValue().setOffsetStart(token.start_ - begin_);
1154  currentValue().setOffsetLimit(token.end_ - begin_);
1155  }
1156  break;
1157  case tokenNegInf:
1158  {
1159  Value v(-std::numeric_limits<double>::infinity());
1160  currentValue().swapPayload(v);
1161  currentValue().setOffsetStart(token.start_ - begin_);
1162  currentValue().setOffsetLimit(token.end_ - begin_);
1163  }
1164  break;
1165  case tokenArraySeparator:
1166  case tokenObjectEnd:
1167  case tokenArrayEnd:
1168  if (features_.allowDroppedNullPlaceholders_) {
1169  // "Un-read" the current token and mark the current value as a null
1170  // token.
1171  current_--;
1172  Value v;
1173  currentValue().swapPayload(v);
1174  currentValue().setOffsetStart(current_ - begin_ - 1);
1175  currentValue().setOffsetLimit(current_ - begin_);
1176  break;
1177  } // else, fall through ...
1178  default:
1179  currentValue().setOffsetStart(token.start_ - begin_);
1180  currentValue().setOffsetLimit(token.end_ - begin_);
1181  return addError("Syntax error: value, object or array expected.", token);
1182  }
1183 
1184  if (collectComments_) {
1185  lastValueEnd_ = current_;
1186  lastValue_ = &currentValue();
1187  }
1188 
1189  --stackDepth_;
1190  return successful;
1191 }
1192 
1193 void OurReader::skipCommentTokens(Token& token) {
1194  if (features_.allowComments_) {
1195  do {
1196  readToken(token);
1197  } while (token.type_ == tokenComment);
1198  } else {
1199  readToken(token);
1200  }
1201 }
1202 
1203 bool OurReader::readToken(Token& token) {
1204  skipSpaces();
1205  token.start_ = current_;
1206  Char c = getNextChar();
1207  bool ok = true;
1208  switch (c) {
1209  case '{':
1210  token.type_ = tokenObjectBegin;
1211  break;
1212  case '}':
1213  token.type_ = tokenObjectEnd;
1214  break;
1215  case '[':
1216  token.type_ = tokenArrayBegin;
1217  break;
1218  case ']':
1219  token.type_ = tokenArrayEnd;
1220  break;
1221  case '"':
1222  token.type_ = tokenString;
1223  ok = readString();
1224  break;
1225  case '\'':
1226  if (features_.allowSingleQuotes_) {
1227  token.type_ = tokenString;
1228  ok = readStringSingleQuote();
1229  break;
1230  } // else continue
1231  case '/':
1232  token.type_ = tokenComment;
1233  ok = readComment();
1234  break;
1235  case '0':
1236  case '1':
1237  case '2':
1238  case '3':
1239  case '4':
1240  case '5':
1241  case '6':
1242  case '7':
1243  case '8':
1244  case '9':
1245  token.type_ = tokenNumber;
1246  readNumber(false);
1247  break;
1248  case '-':
1249  if (readNumber(true)) {
1250  token.type_ = tokenNumber;
1251  } else {
1252  token.type_ = tokenNegInf;
1253  ok = features_.allowSpecialFloats_ && match("nfinity", 7);
1254  }
1255  break;
1256  case 't':
1257  token.type_ = tokenTrue;
1258  ok = match("rue", 3);
1259  break;
1260  case 'f':
1261  token.type_ = tokenFalse;
1262  ok = match("alse", 4);
1263  break;
1264  case 'n':
1265  token.type_ = tokenNull;
1266  ok = match("ull", 3);
1267  break;
1268  case 'N':
1269  if (features_.allowSpecialFloats_) {
1270  token.type_ = tokenNaN;
1271  ok = match("aN", 2);
1272  } else {
1273  ok = false;
1274  }
1275  break;
1276  case 'I':
1277  if (features_.allowSpecialFloats_) {
1278  token.type_ = tokenPosInf;
1279  ok = match("nfinity", 7);
1280  } else {
1281  ok = false;
1282  }
1283  break;
1284  case ',':
1285  token.type_ = tokenArraySeparator;
1286  break;
1287  case ':':
1288  token.type_ = tokenMemberSeparator;
1289  break;
1290  case 0:
1291  token.type_ = tokenEndOfStream;
1292  break;
1293  default:
1294  ok = false;
1295  break;
1296  }
1297  if (!ok)
1298  token.type_ = tokenError;
1299  token.end_ = current_;
1300  return true;
1301 }
1302 
1303 void OurReader::skipSpaces() {
1304  while (current_ != end_) {
1305  Char c = *current_;
1306  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1307  ++current_;
1308  else
1309  break;
1310  }
1311 }
1312 
1313 bool OurReader::match(Location pattern, int patternLength) {
1314  if (end_ - current_ < patternLength)
1315  return false;
1316  int index = patternLength;
1317  while (index--)
1318  if (current_[index] != pattern[index])
1319  return false;
1320  current_ += patternLength;
1321  return true;
1322 }
1323 
1324 bool OurReader::readComment() {
1325  Location commentBegin = current_ - 1;
1326  Char c = getNextChar();
1327  bool successful = false;
1328  if (c == '*')
1329  successful = readCStyleComment();
1330  else if (c == '/')
1331  successful = readCppStyleComment();
1332  if (!successful)
1333  return false;
1334 
1335  if (collectComments_) {
1336  CommentPlacement placement = commentBefore;
1337  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1338  if (c != '*' || !containsNewLine(commentBegin, current_))
1339  placement = commentAfterOnSameLine;
1340  }
1341 
1342  addComment(commentBegin, current_, placement);
1343  }
1344  return true;
1345 }
1346 
1347 void
1348 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
1349  assert(collectComments_);
1350  const std::string& normalized = normalizeEOL(begin, end);
1351  if (placement == commentAfterOnSameLine) {
1352  assert(lastValue_ != 0);
1353  lastValue_->setComment(normalized, placement);
1354  } else {
1355  commentsBefore_ += normalized;
1356  }
1357 }
1358 
1359 bool OurReader::readCStyleComment() {
1360  while (current_ != end_) {
1361  Char c = getNextChar();
1362  if (c == '*' && *current_ == '/')
1363  break;
1364  }
1365  return getNextChar() == '/';
1366 }
1367 
1368 bool OurReader::readCppStyleComment() {
1369  while (current_ != end_) {
1370  Char c = getNextChar();
1371  if (c == '\n')
1372  break;
1373  if (c == '\r') {
1374  // Consume DOS EOL. It will be normalized in addComment.
1375  if (current_ != end_ && *current_ == '\n')
1376  getNextChar();
1377  // Break on Moc OS 9 EOL.
1378  break;
1379  }
1380  }
1381  return true;
1382 }
1383 
1384 bool OurReader::readNumber(bool checkInf) {
1385  const char *p = current_;
1386  if (checkInf && p != end_ && *p == 'I') {
1387  current_ = ++p;
1388  return false;
1389  }
1390  char c = '0'; // stopgap for already consumed character
1391  // integral part
1392  while (c >= '0' && c <= '9')
1393  c = (current_ = p) < end_ ? *p++ : 0;
1394  // fractional part
1395  if (c == '.') {
1396  c = (current_ = p) < end_ ? *p++ : 0;
1397  while (c >= '0' && c <= '9')
1398  c = (current_ = p) < end_ ? *p++ : 0;
1399  }
1400  // exponential part
1401  if (c == 'e' || c == 'E') {
1402  c = (current_ = p) < end_ ? *p++ : 0;
1403  if (c == '+' || c == '-')
1404  c = (current_ = p) < end_ ? *p++ : 0;
1405  while (c >= '0' && c <= '9')
1406  c = (current_ = p) < end_ ? *p++ : 0;
1407  }
1408  return true;
1409 }
1410 bool OurReader::readString() {
1411  Char c = 0;
1412  while (current_ != end_) {
1413  c = getNextChar();
1414  if (c == '\\')
1415  getNextChar();
1416  else if (c == '"')
1417  break;
1418  }
1419  return c == '"';
1420 }
1421 
1422 
1423 bool OurReader::readStringSingleQuote() {
1424  Char c = 0;
1425  while (current_ != end_) {
1426  c = getNextChar();
1427  if (c == '\\')
1428  getNextChar();
1429  else if (c == '\'')
1430  break;
1431  }
1432  return c == '\'';
1433 }
1434 
1435 bool OurReader::readObject(Token& tokenStart) {
1436  Token tokenName;
1437  std::string name;
1438  Value init(objectValue);
1439  currentValue().swapPayload(init);
1440  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1441  while (readToken(tokenName)) {
1442  bool initialTokenOk = true;
1443  while (tokenName.type_ == tokenComment && initialTokenOk)
1444  initialTokenOk = readToken(tokenName);
1445  if (!initialTokenOk)
1446  break;
1447  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1448  return true;
1449  name = "";
1450  if (tokenName.type_ == tokenString) {
1451  if (!decodeString(tokenName, name))
1452  return recoverFromError(tokenObjectEnd);
1453  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1454  Value numberName;
1455  if (!decodeNumber(tokenName, numberName))
1456  return recoverFromError(tokenObjectEnd);
1457  name = numberName.asString();
1458  } else {
1459  break;
1460  }
1461 
1462  Token colon;
1463  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1464  return addErrorAndRecover(
1465  "Missing ':' after object member name", colon, tokenObjectEnd);
1466  }
1467  if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
1468  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1469  std::string msg = "Duplicate key: '" + name + "'";
1470  return addErrorAndRecover(
1471  msg, tokenName, tokenObjectEnd);
1472  }
1473  Value& value = currentValue()[name];
1474  nodes_.push(&value);
1475  bool ok = readValue();
1476  nodes_.pop();
1477  if (!ok) // error already set
1478  return recoverFromError(tokenObjectEnd);
1479 
1480  Token comma;
1481  if (!readToken(comma) ||
1482  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1483  comma.type_ != tokenComment)) {
1484  return addErrorAndRecover(
1485  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
1486  }
1487  bool finalizeTokenOk = true;
1488  while (comma.type_ == tokenComment && finalizeTokenOk)
1489  finalizeTokenOk = readToken(comma);
1490  if (comma.type_ == tokenObjectEnd)
1491  return true;
1492  }
1493  return addErrorAndRecover(
1494  "Missing '}' or object member name", tokenName, tokenObjectEnd);
1495 }
1496 
1497 bool OurReader::readArray(Token& tokenStart) {
1498  Value init(arrayValue);
1499  currentValue().swapPayload(init);
1500  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1501  skipSpaces();
1502  if (*current_ == ']') // empty array
1503  {
1504  Token endArray;
1505  readToken(endArray);
1506  return true;
1507  }
1508  int index = 0;
1509  for (;;) {
1510  Value& value = currentValue()[index++];
1511  nodes_.push(&value);
1512  bool ok = readValue();
1513  nodes_.pop();
1514  if (!ok) // error already set
1515  return recoverFromError(tokenArrayEnd);
1516 
1517  Token token;
1518  // Accept Comment after last item in the array.
1519  ok = readToken(token);
1520  while (token.type_ == tokenComment && ok) {
1521  ok = readToken(token);
1522  }
1523  bool badTokenType =
1524  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
1525  if (!ok || badTokenType) {
1526  return addErrorAndRecover(
1527  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
1528  }
1529  if (token.type_ == tokenArrayEnd)
1530  break;
1531  }
1532  return true;
1533 }
1534 
1535 bool OurReader::decodeNumber(Token& token) {
1536  Value decoded;
1537  if (!decodeNumber(token, decoded))
1538  return false;
1539  currentValue().swapPayload(decoded);
1540  currentValue().setOffsetStart(token.start_ - begin_);
1541  currentValue().setOffsetLimit(token.end_ - begin_);
1542  return true;
1543 }
1544 
1545 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1546  // Attempts to parse the number as an integer. If the number is
1547  // larger than the maximum supported value of an integer then
1548  // we decode the number as a double.
1549  Location current = token.start_;
1550  bool isNegative = *current == '-';
1551  if (isNegative)
1552  ++current;
1553  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
1554  Value::LargestUInt maxIntegerValue =
1556  : Value::maxLargestUInt;
1557  Value::LargestUInt threshold = maxIntegerValue / 10;
1558  Value::LargestUInt value = 0;
1559  while (current < token.end_) {
1560  Char c = *current++;
1561  if (c < '0' || c > '9')
1562  return decodeDouble(token, decoded);
1563  Value::UInt digit(c - '0');
1564  if (value >= threshold) {
1565  // We've hit or exceeded the max value divided by 10 (rounded down). If
1566  // a) we've only just touched the limit, b) this is the last digit, and
1567  // c) it's small enough to fit in that rounding delta, we're okay.
1568  // Otherwise treat this number as a double to avoid overflow.
1569  if (value > threshold || current != token.end_ ||
1570  digit > maxIntegerValue % 10) {
1571  return decodeDouble(token, decoded);
1572  }
1573  }
1574  value = value * 10 + digit;
1575  }
1576  if (isNegative)
1577  decoded = -Value::LargestInt(value);
1578  else if (value <= Value::LargestUInt(Value::maxInt))
1579  decoded = Value::LargestInt(value);
1580  else
1581  decoded = value;
1582  return true;
1583 }
1584 
1585 bool OurReader::decodeDouble(Token& token) {
1586  Value decoded;
1587  if (!decodeDouble(token, decoded))
1588  return false;
1589  currentValue().swapPayload(decoded);
1590  currentValue().setOffsetStart(token.start_ - begin_);
1591  currentValue().setOffsetLimit(token.end_ - begin_);
1592  return true;
1593 }
1594 
1595 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1596  double value = 0;
1597  const int bufferSize = 32;
1598  int count;
1599  int length = int(token.end_ - token.start_);
1600 
1601  // Sanity check to avoid buffer overflow exploits.
1602  if (length < 0) {
1603  return addError("Unable to parse token length", token);
1604  }
1605 
1606  // Avoid using a string constant for the format control string given to
1607  // sscanf, as this can cause hard to debug crashes on OS X. See here for more
1608  // info:
1609  //
1610  // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
1611  char format[] = "%lf";
1612 
1613  if (length <= bufferSize) {
1614  Char buffer[bufferSize + 1];
1615  memcpy(buffer, token.start_, length);
1616  buffer[length] = 0;
1617  count = sscanf(buffer, format, &value);
1618  } else {
1619  std::string buffer(token.start_, token.end_);
1620  count = sscanf(buffer.c_str(), format, &value);
1621  }
1622 
1623  if (count != 1)
1624  return addError("'" + std::string(token.start_, token.end_) +
1625  "' is not a number.",
1626  token);
1627  decoded = value;
1628  return true;
1629 }
1630 
1631 bool OurReader::decodeString(Token& token) {
1632  std::string decoded_string;
1633  if (!decodeString(token, decoded_string))
1634  return false;
1635  Value decoded(decoded_string);
1636  currentValue().swapPayload(decoded);
1637  currentValue().setOffsetStart(token.start_ - begin_);
1638  currentValue().setOffsetLimit(token.end_ - begin_);
1639  return true;
1640 }
1641 
1642 bool OurReader::decodeString(Token& token, std::string& decoded) {
1643  decoded.reserve(token.end_ - token.start_ - 2);
1644  Location current = token.start_ + 1; // skip '"'
1645  Location end = token.end_ - 1; // do not include '"'
1646  while (current != end) {
1647  Char c = *current++;
1648  if (c == '"')
1649  break;
1650  else if (c == '\\') {
1651  if (current == end)
1652  return addError("Empty escape sequence in string", token, current);
1653  Char escape = *current++;
1654  switch (escape) {
1655  case '"':
1656  decoded += '"';
1657  break;
1658  case '/':
1659  decoded += '/';
1660  break;
1661  case '\\':
1662  decoded += '\\';
1663  break;
1664  case 'b':
1665  decoded += '\b';
1666  break;
1667  case 'f':
1668  decoded += '\f';
1669  break;
1670  case 'n':
1671  decoded += '\n';
1672  break;
1673  case 'r':
1674  decoded += '\r';
1675  break;
1676  case 't':
1677  decoded += '\t';
1678  break;
1679  case 'u': {
1680  unsigned int unicode;
1681  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1682  return false;
1683  decoded += codePointToUTF8(unicode);
1684  } break;
1685  default:
1686  return addError("Bad escape sequence in string", token, current);
1687  }
1688  } else {
1689  decoded += c;
1690  }
1691  }
1692  return true;
1693 }
1694 
1695 bool OurReader::decodeUnicodeCodePoint(Token& token,
1696  Location& current,
1697  Location end,
1698  unsigned int& unicode) {
1699 
1700  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1701  return false;
1702  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1703  // surrogate pairs
1704  if (end - current < 6)
1705  return addError(
1706  "additional six characters expected to parse unicode surrogate pair.",
1707  token,
1708  current);
1709  unsigned int surrogatePair;
1710  if (*(current++) == '\\' && *(current++) == 'u') {
1711  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1712  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1713  } else
1714  return false;
1715  } else
1716  return addError("expecting another \\u token to begin the second half of "
1717  "a unicode surrogate pair",
1718  token,
1719  current);
1720  }
1721  return true;
1722 }
1723 
1724 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1725  Location& current,
1726  Location end,
1727  unsigned int& unicode) {
1728  if (end - current < 4)
1729  return addError(
1730  "Bad unicode escape sequence in string: four digits expected.",
1731  token,
1732  current);
1733  unicode = 0;
1734  for (int index = 0; index < 4; ++index) {
1735  Char c = *current++;
1736  unicode *= 16;
1737  if (c >= '0' && c <= '9')
1738  unicode += c - '0';
1739  else if (c >= 'a' && c <= 'f')
1740  unicode += c - 'a' + 10;
1741  else if (c >= 'A' && c <= 'F')
1742  unicode += c - 'A' + 10;
1743  else
1744  return addError(
1745  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1746  token,
1747  current);
1748  }
1749  return true;
1750 }
1751 
1752 bool
1753 OurReader::addError(const std::string& message, Token& token, Location extra) {
1754  ErrorInfo info;
1755  info.token_ = token;
1756  info.message_ = message;
1757  info.extra_ = extra;
1758  errors_.push_back(info);
1759  return false;
1760 }
1761 
1762 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1763  int errorCount = int(errors_.size());
1764  Token skip;
1765  for (;;) {
1766  if (!readToken(skip))
1767  errors_.resize(errorCount); // discard errors caused by recovery
1768  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1769  break;
1770  }
1771  errors_.resize(errorCount);
1772  return false;
1773 }
1774 
1775 bool OurReader::addErrorAndRecover(const std::string& message,
1776  Token& token,
1777  TokenType skipUntilToken) {
1778  addError(message, token);
1779  return recoverFromError(skipUntilToken);
1780 }
1781 
1782 Value& OurReader::currentValue() { return *(nodes_.top()); }
1783 
1784 OurReader::Char OurReader::getNextChar() {
1785  if (current_ == end_)
1786  return 0;
1787  return *current_++;
1788 }
1789 
1790 void OurReader::getLocationLineAndColumn(Location location,
1791  int& line,
1792  int& column) const {
1793  Location current = begin_;
1794  Location lastLineStart = current;
1795  line = 0;
1796  while (current < location && current != end_) {
1797  Char c = *current++;
1798  if (c == '\r') {
1799  if (*current == '\n')
1800  ++current;
1801  lastLineStart = current;
1802  ++line;
1803  } else if (c == '\n') {
1804  lastLineStart = current;
1805  ++line;
1806  }
1807  }
1808  // column & line start at 1
1809  column = int(location - lastLineStart) + 1;
1810  ++line;
1811 }
1812 
1813 std::string OurReader::getLocationLineAndColumn(Location location) const {
1814  int line, column;
1815  getLocationLineAndColumn(location, line, column);
1816  char buffer[18 + 16 + 16 + 1];
1817  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1818  return buffer;
1819 }
1820 
1821 std::string OurReader::getFormattedErrorMessages() const {
1822  std::string formattedMessage;
1823  for (Errors::const_iterator itError = errors_.begin();
1824  itError != errors_.end();
1825  ++itError) {
1826  const ErrorInfo& error = *itError;
1827  formattedMessage +=
1828  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1829  formattedMessage += " " + error.message_ + "\n";
1830  if (error.extra_)
1831  formattedMessage +=
1832  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1833  }
1834  return formattedMessage;
1835 }
1836 
1837 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1838  std::vector<OurReader::StructuredError> allErrors;
1839  for (Errors::const_iterator itError = errors_.begin();
1840  itError != errors_.end();
1841  ++itError) {
1842  const ErrorInfo& error = *itError;
1843  OurReader::StructuredError structured;
1844  structured.offset_start = error.token_.start_ - begin_;
1845  structured.offset_limit = error.token_.end_ - begin_;
1846  structured.message = error.message_;
1847  allErrors.push_back(structured);
1848  }
1849  return allErrors;
1850 }
1851 
1852 bool OurReader::pushError(const Value& value, const std::string& message) {
1853  size_t length = end_ - begin_;
1854  if(value.getOffsetStart() > length
1855  || value.getOffsetLimit() > length)
1856  return false;
1857  Token token;
1858  token.type_ = tokenError;
1859  token.start_ = begin_ + value.getOffsetStart();
1860  token.end_ = end_ + value.getOffsetLimit();
1861  ErrorInfo info;
1862  info.token_ = token;
1863  info.message_ = message;
1864  info.extra_ = 0;
1865  errors_.push_back(info);
1866  return true;
1867 }
1868 
1869 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
1870  size_t length = end_ - begin_;
1871  if(value.getOffsetStart() > length
1872  || value.getOffsetLimit() > length
1873  || extra.getOffsetLimit() > length)
1874  return false;
1875  Token token;
1876  token.type_ = tokenError;
1877  token.start_ = begin_ + value.getOffsetStart();
1878  token.end_ = begin_ + value.getOffsetLimit();
1879  ErrorInfo info;
1880  info.token_ = token;
1881  info.message_ = message;
1882  info.extra_ = begin_ + extra.getOffsetStart();
1883  errors_.push_back(info);
1884  return true;
1885 }
1886 
1887 bool OurReader::good() const {
1888  return !errors_.size();
1889 }
1890 
1891 
1892 class OurCharReader : public CharReader {
1893  bool const collectComments_;
1894  OurReader reader_;
1895 public:
1896  OurCharReader(
1897  bool collectComments,
1898  OurFeatures const& features)
1899  : collectComments_(collectComments)
1900  , reader_(features)
1901  {}
1902  bool parse(
1903  char const* beginDoc, char const* endDoc,
1904  Value* root, std::string* errs) override {
1905  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1906  if (errs) {
1907  *errs = reader_.getFormattedErrorMessages();
1908  }
1909  return ok;
1910  }
1911 };
1912 
1914 {
1916 }
1918 {}
1920 {
1921  bool collectComments = settings_["collectComments"].asBool();
1922  OurFeatures features = OurFeatures::all();
1923  features.allowComments_ = settings_["allowComments"].asBool();
1924  features.strictRoot_ = settings_["strictRoot"].asBool();
1925  features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
1926  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1927  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1928  features.stackLimit_ = settings_["stackLimit"].asInt();
1929  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1930  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1931  features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
1932  return new OurCharReader(collectComments, features);
1933 }
1934 static void getValidReaderKeys(std::set<std::string>* valid_keys)
1935 {
1936  valid_keys->clear();
1937  valid_keys->insert("collectComments");
1938  valid_keys->insert("allowComments");
1939  valid_keys->insert("strictRoot");
1940  valid_keys->insert("allowDroppedNullPlaceholders");
1941  valid_keys->insert("allowNumericKeys");
1942  valid_keys->insert("allowSingleQuotes");
1943  valid_keys->insert("stackLimit");
1944  valid_keys->insert("failIfExtra");
1945  valid_keys->insert("rejectDupKeys");
1946  valid_keys->insert("allowSpecialFloats");
1947 }
1949 {
1950  Json::Value my_invalid;
1951  if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL
1952  Json::Value& inv = *invalid;
1953  std::set<std::string> valid_keys;
1954  getValidReaderKeys(&valid_keys);
1956  size_t n = keys.size();
1957  for (size_t i = 0; i < n; ++i) {
1958  std::string const& key = keys[i];
1959  if (valid_keys.find(key) == valid_keys.end()) {
1960  inv[key] = settings_[key];
1961  }
1962  }
1963  return 0u == inv.size();
1964 }
1966 {
1967  return settings_[key];
1968 }
1969 // static
1971 {
1973  (*settings)["allowComments"] = false;
1974  (*settings)["strictRoot"] = true;
1975  (*settings)["allowDroppedNullPlaceholders"] = false;
1976  (*settings)["allowNumericKeys"] = false;
1977  (*settings)["allowSingleQuotes"] = false;
1978  (*settings)["stackLimit"] = 1000;
1979  (*settings)["failIfExtra"] = true;
1980  (*settings)["rejectDupKeys"] = true;
1981  (*settings)["allowSpecialFloats"] = false;
1983 }
1984 // static
1986 {
1988  (*settings)["collectComments"] = true;
1989  (*settings)["allowComments"] = true;
1990  (*settings)["strictRoot"] = false;
1991  (*settings)["allowDroppedNullPlaceholders"] = false;
1992  (*settings)["allowNumericKeys"] = false;
1993  (*settings)["allowSingleQuotes"] = false;
1994  (*settings)["stackLimit"] = 1000;
1995  (*settings)["failIfExtra"] = false;
1996  (*settings)["rejectDupKeys"] = false;
1997  (*settings)["allowSpecialFloats"] = false;
1999 }
2000 
2002 // global functions
2003 
2005  CharReader::Factory const& fact, std::istream& sin,
2006  Value* root, std::string* errs)
2007 {
2008  std::ostringstream ssin;
2009  ssin << sin.rdbuf();
2010  std::string doc = ssin.str();
2011  char const* begin = doc.data();
2012  char const* end = begin + doc.size();
2013  // Note that we do not actually need a null-terminator.
2014  CharReaderPtr const reader(fact.newCharReader());
2015  return reader->parse(begin, end, root, errs);
2016 }
2017 
2018 std::istream& operator>>(std::istream& sin, Value& root) {
2020  std::string errs;
2021  bool ok = parseFromStream(b, sin, &root, &errs);
2022  if (!ok) {
2023  fprintf(stderr,
2024  "Error from reader: %s",
2025  errs.c_str());
2026 
2027  throwRuntimeError(errs);
2028  }
2029  return sin;
2030 }
2031 
2032 } // namespace Json