rippled
Loading...
Searching...
No Matches
json_reader.cpp
1//------------------------------------------------------------------------------
2/*
3 This file is part of rippled: https://github.com/ripple/rippled
4 Copyright (c) 2012, 2013 Ripple Labs Inc.
5
6 Permission to use, copy, modify, and/or distribute this software for any
7 purpose with or without fee is hereby granted, provided that the above
8 copyright notice and this permission notice appear in all copies.
9
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*/
18//==============================================================================
19
20#include <xrpl/basics/contract.h>
21#include <xrpl/json/json_reader.h>
22
23#include <algorithm>
24#include <cctype>
25#include <istream>
26#include <string>
27
28namespace Json {
29// Implementation of class Reader
30// ////////////////////////////////
31
32static std::string
33codePointToUTF8(unsigned int cp)
34{
35 std::string result;
36
37 // based on description from http://en.wikipedia.org/wiki/UTF-8
38
39 if (cp <= 0x7f)
40 {
41 result.resize(1);
42 result[0] = static_cast<char>(cp);
43 }
44 else if (cp <= 0x7FF)
45 {
46 result.resize(2);
47 result[1] = static_cast<char>(0x80 | (0x3f & cp));
48 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
49 }
50 else if (cp <= 0xFFFF)
51 {
52 result.resize(3);
53 result[2] = static_cast<char>(0x80 | (0x3f & cp));
54 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
55 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
56 }
57 else if (cp <= 0x10FFFF)
58 {
59 result.resize(4);
60 result[3] = static_cast<char>(0x80 | (0x3f & cp));
61 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
62 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
63 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
64 }
65
66 return result;
67}
68
69// Class Reader
70// //////////////////////////////////////////////////////////////////
71
72bool
73Reader::parse(std::string const& document, Value& root)
74{
75 document_ = document;
76 const char* begin = document_.c_str();
77 const char* end = begin + document_.length();
78 return parse(begin, end, root);
79}
80
81bool
83{
84 // std::istream_iterator<char> begin(sin);
85 // std::istream_iterator<char> end;
86 // Those would allow streamed input from a file, if parse() were a
87 // template function.
88
89 // Since std::string is reference-counted, this at least does not
90 // create an extra copy.
91 std::string doc;
92 std::getline(sin, doc, (char)EOF);
93 return parse(doc, root);
94}
95
96bool
97Reader::parse(const char* beginDoc, const char* endDoc, Value& root)
98{
99 begin_ = beginDoc;
100 end_ = endDoc;
102 lastValueEnd_ = 0;
103 lastValue_ = 0;
104 errors_.clear();
105
106 while (!nodes_.empty())
107 nodes_.pop();
108
109 nodes_.push(&root);
110 bool successful = readValue(0);
111 Token token;
112 skipCommentTokens(token);
113
114 if (!root.isNull() && !root.isArray() && !root.isObject())
115 {
116 // Set error location to start of doc, ideally should be first token
117 // found in doc
118 token.type_ = tokenError;
119 token.start_ = beginDoc;
120 token.end_ = endDoc;
121 addError(
122 "A valid JSON document must be either an array or an object value.",
123 token);
124 return false;
125 }
126
127 return successful;
128}
129
130bool
131Reader::readValue(unsigned depth)
132{
133 Token token;
134 skipCommentTokens(token);
135 if (depth > nest_limit)
136 return addError("Syntax error: maximum nesting depth exceeded", token);
137 bool successful = true;
138
139 switch (token.type_)
140 {
141 case tokenObjectBegin:
142 successful = readObject(token, depth);
143 break;
144
145 case tokenArrayBegin:
146 successful = readArray(token, depth);
147 break;
148
149 case tokenInteger:
150 successful = decodeNumber(token);
151 break;
152
153 case tokenDouble:
154 successful = decodeDouble(token);
155 break;
156
157 case tokenString:
158 successful = decodeString(token);
159 break;
160
161 case tokenTrue:
162 currentValue() = true;
163 break;
164
165 case tokenFalse:
166 currentValue() = false;
167 break;
168
169 case tokenNull:
170 currentValue() = Value();
171 break;
172
173 default:
174 return addError(
175 "Syntax error: value, object or array expected.", token);
176 }
177
178 return successful;
179}
180
181void
183{
184 do
185 {
186 readToken(token);
187 } while (token.type_ == tokenComment);
188}
189
190bool
191Reader::expectToken(TokenType type, Token& token, const char* message)
192{
193 readToken(token);
194
195 if (token.type_ != type)
196 return addError(message, token);
197
198 return true;
199}
200
201bool
203{
204 skipSpaces();
205 token.start_ = current_;
206 Char c = getNextChar();
207 bool ok = true;
208
209 switch (c)
210 {
211 case '{':
212 token.type_ = tokenObjectBegin;
213 break;
214
215 case '}':
216 token.type_ = tokenObjectEnd;
217 break;
218
219 case '[':
220 token.type_ = tokenArrayBegin;
221 break;
222
223 case ']':
224 token.type_ = tokenArrayEnd;
225 break;
226
227 case '"':
228 token.type_ = tokenString;
229 ok = readString();
230 break;
231
232 case '/':
233 token.type_ = tokenComment;
234 ok = readComment();
235 break;
236
237 case '0':
238 case '1':
239 case '2':
240 case '3':
241 case '4':
242 case '5':
243 case '6':
244 case '7':
245 case '8':
246 case '9':
247 case '-':
248 token.type_ = readNumber();
249 break;
250
251 case 't':
252 token.type_ = tokenTrue;
253 ok = match("rue", 3);
254 break;
255
256 case 'f':
257 token.type_ = tokenFalse;
258 ok = match("alse", 4);
259 break;
260
261 case 'n':
262 token.type_ = tokenNull;
263 ok = match("ull", 3);
264 break;
265
266 case ',':
268 break;
269
270 case ':':
272 break;
273
274 case 0:
275 token.type_ = tokenEndOfStream;
276 break;
277
278 default:
279 ok = false;
280 break;
281 }
282
283 if (!ok)
284 token.type_ = tokenError;
285
286 token.end_ = current_;
287 return true;
288}
289
290void
292{
293 while (current_ != end_)
294 {
295 Char c = *current_;
296
297 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
298 ++current_;
299 else
300 break;
301 }
302}
303
304bool
305Reader::match(Location pattern, int patternLength)
306{
307 if (end_ - current_ < patternLength)
308 return false;
309
310 int index = patternLength;
311
312 while (index--)
313 if (current_[index] != pattern[index])
314 return false;
315
316 current_ += patternLength;
317 return true;
318}
319
320bool
322{
323 Char c = getNextChar();
324
325 if (c == '*')
326 return readCStyleComment();
327
328 if (c == '/')
329 return readCppStyleComment();
330
331 return false;
332}
333
334bool
336{
337 while (current_ != end_)
338 {
339 Char c = getNextChar();
340
341 if (c == '*' && *current_ == '/')
342 break;
343 }
344
345 return getNextChar() == '/';
346}
347
348bool
350{
351 while (current_ != end_)
352 {
353 Char c = getNextChar();
354
355 if (c == '\r' || c == '\n')
356 break;
357 }
358
359 return true;
360}
361
364{
365 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
366
367 TokenType type = tokenInteger;
368
369 if (current_ != end_)
370 {
371 if (*current_ == '-')
372 ++current_;
373
374 while (current_ != end_)
375 {
376 if (!std::isdigit(static_cast<unsigned char>(*current_)))
377 {
378 auto ret = std::find(
379 std::begin(extended_tokens),
380 std::end(extended_tokens),
381 *current_);
382
383 if (ret == std::end(extended_tokens))
384 break;
385
386 type = tokenDouble;
387 }
388
389 ++current_;
390 }
391 }
392
393 return type;
394}
395
396bool
398{
399 Char c = 0;
400
401 while (current_ != end_)
402 {
403 c = getNextChar();
404
405 if (c == '\\')
406 getNextChar();
407 else if (c == '"')
408 break;
409 }
410
411 return c == '"';
412}
413
414bool
415Reader::readObject(Token& tokenStart, unsigned depth)
416{
417 Token tokenName;
418 std::string name;
420
421 while (readToken(tokenName))
422 {
423 bool initialTokenOk = true;
424
425 while (tokenName.type_ == tokenComment && initialTokenOk)
426 initialTokenOk = readToken(tokenName);
427
428 if (!initialTokenOk)
429 break;
430
431 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
432 return true;
433
434 if (tokenName.type_ != tokenString)
435 break;
436
437 name = "";
438
439 if (!decodeString(tokenName, name))
441
442 Token colon;
443
444 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
445 {
446 return addErrorAndRecover(
447 "Missing ':' after object member name", colon, tokenObjectEnd);
448 }
449
450 // Reject duplicate names
451 if (currentValue().isMember(name))
452 return addError("Key '" + name + "' appears twice.", tokenName);
453
454 Value& value = currentValue()[name];
455 nodes_.push(&value);
456 bool ok = readValue(depth + 1);
457 nodes_.pop();
458
459 if (!ok) // error already set
461
462 Token comma;
463
464 if (!readToken(comma) ||
465 (comma.type_ != tokenObjectEnd &&
466 comma.type_ != tokenArraySeparator && comma.type_ != tokenComment))
467 {
468 return addErrorAndRecover(
469 "Missing ',' or '}' in object declaration",
470 comma,
472 }
473
474 bool finalizeTokenOk = true;
475
476 while (comma.type_ == tokenComment && finalizeTokenOk)
477 finalizeTokenOk = readToken(comma);
478
479 if (comma.type_ == tokenObjectEnd)
480 return true;
481 }
482
483 return addErrorAndRecover(
484 "Missing '}' or object member name", tokenName, tokenObjectEnd);
485}
486
487bool
488Reader::readArray(Token& tokenStart, unsigned depth)
489{
491 skipSpaces();
492
493 if (*current_ == ']') // empty array
494 {
495 Token endArray;
496 readToken(endArray);
497 return true;
498 }
499
500 int index = 0;
501
502 while (true)
503 {
504 Value& value = currentValue()[index++];
505 nodes_.push(&value);
506 bool ok = readValue(depth + 1);
507 nodes_.pop();
508
509 if (!ok) // error already set
511
512 Token token;
513 // Accept Comment after last item in the array.
514 ok = readToken(token);
515
516 while (token.type_ == tokenComment && ok)
517 {
518 ok = readToken(token);
519 }
520
521 bool badTokenType =
522 (token.type_ != tokenArraySeparator &&
523 token.type_ != tokenArrayEnd);
524
525 if (!ok || badTokenType)
526 {
527 return addErrorAndRecover(
528 "Missing ',' or ']' in array declaration",
529 token,
531 }
532
533 if (token.type_ == tokenArrayEnd)
534 break;
535 }
536
537 return true;
538}
539
540bool
542{
543 Location current = token.start_;
544 bool isNegative = *current == '-';
545
546 if (isNegative)
547 ++current;
548
549 if (current == token.end_)
550 {
551 return addError(
552 "'" + std::string(token.start_, token.end_) +
553 "' is not a valid number.",
554 token);
555 }
556
557 // The existing Json integers are 32-bit so using a 64-bit value here avoids
558 // overflows in the conversion code below.
559 std::int64_t value = 0;
560
561 static_assert(
562 sizeof(value) > sizeof(Value::maxUInt),
563 "The JSON integer overflow logic will need to be reworked.");
564
565 while (current < token.end_ && (value <= Value::maxUInt))
566 {
567 Char c = *current++;
568
569 if (c < '0' || c > '9')
570 {
571 return addError(
572 "'" + std::string(token.start_, token.end_) +
573 "' is not a number.",
574 token);
575 }
576
577 value = (value * 10) + (c - '0');
578 }
579
580 // More tokens left -> input is larger than largest possible return value
581 if (current != token.end_)
582 {
583 return addError(
584 "'" + std::string(token.start_, token.end_) +
585 "' exceeds the allowable range.",
586 token);
587 }
588
589 if (isNegative)
590 {
591 value = -value;
592
593 if (value < Value::minInt || value > Value::maxInt)
594 {
595 return addError(
596 "'" + std::string(token.start_, token.end_) +
597 "' exceeds the allowable range.",
598 token);
599 }
600
601 currentValue() = static_cast<Value::Int>(value);
602 }
603 else
604 {
605 if (value > Value::maxUInt)
606 {
607 return addError(
608 "'" + std::string(token.start_, token.end_) +
609 "' exceeds the allowable range.",
610 token);
611 }
612
613 // If it's representable as a signed integer, construct it as one.
614 if (value <= Value::maxInt)
615 currentValue() = static_cast<Value::Int>(value);
616 else
617 currentValue() = static_cast<Value::UInt>(value);
618 }
619
620 return true;
621}
622
623bool
625{
626 double value = 0;
627 const int bufferSize = 32;
628 int count;
629 int length = int(token.end_ - token.start_);
630 // Sanity check to avoid buffer overflow exploits.
631 if (length < 0)
632 {
633 return addError("Unable to parse token length", token);
634 }
635 // Avoid using a string constant for the format control string given to
636 // sscanf, as this can cause hard to debug crashes on OS X. See here for
637 // more info:
638 //
639 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
640 char format[] = "%lf";
641 if (length <= bufferSize)
642 {
643 Char buffer[bufferSize + 1];
644 memcpy(buffer, token.start_, length);
645 buffer[length] = 0;
646 count = sscanf(buffer, format, &value);
647 }
648 else
649 {
650 std::string buffer(token.start_, token.end_);
651 count = sscanf(buffer.c_str(), format, &value);
652 }
653 if (count != 1)
654 return addError(
655 "'" + std::string(token.start_, token.end_) + "' is not a number.",
656 token);
657 currentValue() = value;
658 return true;
659}
660
661bool
663{
664 std::string decoded;
665
666 if (!decodeString(token, decoded))
667 return false;
668
669 currentValue() = decoded;
670 return true;
671}
672
673bool
675{
676 decoded.reserve(token.end_ - token.start_ - 2);
677 Location current = token.start_ + 1; // skip '"'
678 Location end = token.end_ - 1; // do not include '"'
679
680 while (current != end)
681 {
682 Char c = *current++;
683
684 if (c == '"')
685 break;
686 else if (c == '\\')
687 {
688 if (current == end)
689 return addError(
690 "Empty escape sequence in string", token, current);
691
692 Char escape = *current++;
693
694 switch (escape)
695 {
696 case '"':
697 decoded += '"';
698 break;
699
700 case '/':
701 decoded += '/';
702 break;
703
704 case '\\':
705 decoded += '\\';
706 break;
707
708 case 'b':
709 decoded += '\b';
710 break;
711
712 case 'f':
713 decoded += '\f';
714 break;
715
716 case 'n':
717 decoded += '\n';
718 break;
719
720 case 'r':
721 decoded += '\r';
722 break;
723
724 case 't':
725 decoded += '\t';
726 break;
727
728 case 'u': {
729 unsigned int unicode;
730
731 if (!decodeUnicodeCodePoint(token, current, end, unicode))
732 return false;
733
734 decoded += codePointToUTF8(unicode);
735 }
736 break;
737
738 default:
739 return addError(
740 "Bad escape sequence in string", token, current);
741 }
742 }
743 else
744 {
745 decoded += c;
746 }
747 }
748
749 return true;
750}
751
752bool
754 Token& token,
755 Location& current,
756 Location end,
757 unsigned int& unicode)
758{
759 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
760 return false;
761
762 if (unicode >= 0xD800 && unicode <= 0xDBFF)
763 {
764 // surrogate pairs
765 if (end - current < 6)
766 return addError(
767 "additional six characters expected to parse unicode surrogate "
768 "pair.",
769 token,
770 current);
771
772 unsigned int surrogatePair;
773
774 if (*(current++) == '\\' && *(current++) == 'u')
775 {
776 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
777 {
778 unicode = 0x10000 + ((unicode & 0x3FF) << 10) +
779 (surrogatePair & 0x3FF);
780 }
781 else
782 return false;
783 }
784 else
785 return addError(
786 "expecting another \\u token to begin the second half of a "
787 "unicode surrogate pair",
788 token,
789 current);
790 }
791
792 return true;
793}
794
795bool
797 Token& token,
798 Location& current,
799 Location end,
800 unsigned int& unicode)
801{
802 if (end - current < 4)
803 return addError(
804 "Bad unicode escape sequence in string: four digits expected.",
805 token,
806 current);
807
808 unicode = 0;
809
810 for (int index = 0; index < 4; ++index)
811 {
812 Char c = *current++;
813 unicode *= 16;
814
815 if (c >= '0' && c <= '9')
816 unicode += c - '0';
817 else if (c >= 'a' && c <= 'f')
818 unicode += c - 'a' + 10;
819 else if (c >= 'A' && c <= 'F')
820 unicode += c - 'A' + 10;
821 else
822 return addError(
823 "Bad unicode escape sequence in string: hexadecimal digit "
824 "expected.",
825 token,
826 current);
827 }
828
829 return true;
830}
831
832bool
833Reader::addError(std::string const& message, Token& token, Location extra)
834{
835 ErrorInfo info;
836 info.token_ = token;
837 info.message_ = message;
838 info.extra_ = extra;
839 errors_.push_back(info);
840 return false;
841}
842
843bool
845{
846 int errorCount = int(errors_.size());
847 Token skip;
848
849 while (true)
850 {
851 if (!readToken(skip))
852 errors_.resize(errorCount); // discard errors caused by recovery
853
854 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
855 break;
856 }
857
858 errors_.resize(errorCount);
859 return false;
860}
861
862bool
864 std::string const& message,
865 Token& token,
866 TokenType skipUntilToken)
867{
868 addError(message, token);
869 return recoverFromError(skipUntilToken);
870}
871
872Value&
874{
875 return *(nodes_.top());
876}
877
880{
881 if (current_ == end_)
882 return 0;
883
884 return *current_++;
885}
886
887void
888Reader::getLocationLineAndColumn(Location location, int& line, int& column)
889 const
890{
891 Location current = begin_;
892 Location lastLineStart = current;
893 line = 0;
894
895 while (current < location && current != end_)
896 {
897 Char c = *current++;
898
899 if (c == '\r')
900 {
901 if (*current == '\n')
902 ++current;
903
904 lastLineStart = current;
905 ++line;
906 }
907 else if (c == '\n')
908 {
909 lastLineStart = current;
910 ++line;
911 }
912 }
913
914 // column & line start at 1
915 column = int(location - lastLineStart) + 1;
916 ++line;
917}
918
921{
922 int line, column;
923 getLocationLineAndColumn(location, line, column);
924 return "Line " + std::to_string(line) + ", Column " +
925 std::to_string(column);
926}
927
930{
931 std::string formattedMessage;
932
933 for (Errors::const_iterator itError = errors_.begin();
934 itError != errors_.end();
935 ++itError)
936 {
937 const ErrorInfo& error = *itError;
938 formattedMessage +=
939 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
940 formattedMessage += " " + error.message_ + "\n";
941
942 if (error.extra_)
943 formattedMessage += "See " +
944 getLocationLineAndColumn(error.extra_) + " for detail.\n";
945 }
946
947 return formattedMessage;
948}
949
952{
953 Json::Reader reader;
954 bool ok = reader.parse(sin, root);
955
956 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
957 if (!ok)
958 ripple::Throw<std::runtime_error>(reader.getFormatedErrorMessages());
959
960 return sin;
961}
962
963} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition: json_reader.h:37
Errors errors_
Definition: json_reader.h:201
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Location current_
Definition: json_reader.h:205
Location begin_
Definition: json_reader.h:203
bool match(Location pattern, int patternLength)
void skipSpaces()
bool decodeNumber(Token &token)
bool expectToken(TokenType type, Token &token, const char *message)
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
Definition: json_reader.h:206
bool readString()
bool readCppStyleComment()
Location end_
Definition: json_reader.h:204
static constexpr unsigned nest_limit
Definition: json_reader.h:90
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
Definition: json_reader.h:207
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
Definition: json_reader.cpp:73
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
@ tokenMemberSeparator
Definition: json_reader.h:106
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
Char getNextChar()
const Char * Location
Definition: json_reader.h:40
bool addError(std::string const &message, Token &token, Location extra=0)
bool readComment()
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Definition: json_reader.h:202
Represents a JSON value.
Definition: json_value.h:147
Json::UInt UInt
Definition: json_value.h:154
static const Int maxInt
Definition: json_value.h:160
Json::Int Int
Definition: json_value.h:155
static const UInt maxUInt
Definition: json_value.h:161
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
JSON (JavaScript Object Notation).
Definition: json_errors.h:25
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition: json_value.h:42
@ objectValue
object value (collection of name/value pairs).
Definition: json_value.h:43
static std::string codePointToUTF8(unsigned int cp)
Definition: json_reader.cpp:33
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)