rippled
Loading...
Searching...
No Matches
json_reader.cpp
1#include <xrpl/basics/contract.h>
2#include <xrpl/json/json_reader.h>
3#include <xrpl/json/json_value.h>
4
5#include <algorithm>
6#include <cctype>
7#include <cstdint>
8#include <cstdio>
9#include <cstring>
10#include <istream>
11#include <stdexcept>
12#include <string>
13
14namespace Json {
15// Implementation of class Reader
16// ////////////////////////////////
17
18static std::string
19codePointToUTF8(unsigned int cp)
20{
21 std::string result;
22
23 // based on description from http://en.wikipedia.org/wiki/UTF-8
24
25 if (cp <= 0x7f)
26 {
27 result.resize(1);
28 result[0] = static_cast<char>(cp);
29 }
30 else if (cp <= 0x7FF)
31 {
32 result.resize(2);
33 result[1] = static_cast<char>(0x80 | (0x3f & cp));
34 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
35 }
36 else if (cp <= 0xFFFF)
37 {
38 result.resize(3);
39 result[2] = static_cast<char>(0x80 | (0x3f & cp));
40 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
41 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
42 }
43 else if (cp <= 0x10FFFF)
44 {
45 result.resize(4);
46 result[3] = static_cast<char>(0x80 | (0x3f & cp));
47 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
48 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
49 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
50 }
51
52 return result;
53}
54
55// Class Reader
56// //////////////////////////////////////////////////////////////////
57
58bool
59Reader::parse(std::string const& document, Value& root)
60{
61 document_ = document;
62 char const* begin = document_.c_str();
63 char const* end = begin + document_.length();
64 return parse(begin, end, root);
65}
66
67bool
69{
70 // std::istream_iterator<char> begin(sin);
71 // std::istream_iterator<char> end;
72 // Those would allow streamed input from a file, if parse() were a
73 // template function.
74
75 // Since std::string is reference-counted, this at least does not
76 // create an extra copy.
77 std::string doc;
78 std::getline(sin, doc, (char)EOF);
79 return parse(doc, root);
80}
81
82bool
83Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
84{
85 begin_ = beginDoc;
86 end_ = endDoc;
88 lastValueEnd_ = 0;
89 lastValue_ = 0;
90 errors_.clear();
91
92 while (!nodes_.empty())
93 nodes_.pop();
94
95 nodes_.push(&root);
96 bool successful = readValue(0);
97 Token token;
98 skipCommentTokens(token);
99
100 if (!root.isNull() && !root.isArray() && !root.isObject())
101 {
102 // Set error location to start of doc, ideally should be first token
103 // found in doc
104 token.type_ = tokenError;
105 token.start_ = beginDoc;
106 token.end_ = endDoc;
107 addError("A valid JSON document must be either an array or an object value.", token);
108 return false;
109 }
110
111 return successful;
112}
113
114bool
115Reader::readValue(unsigned depth)
116{
117 Token token;
118 skipCommentTokens(token);
119 if (depth > nest_limit)
120 return addError("Syntax error: maximum nesting depth exceeded", token);
121 bool successful = true;
122
123 switch (token.type_)
124 {
125 case tokenObjectBegin:
126 successful = readObject(token, depth);
127 break;
128
129 case tokenArrayBegin:
130 successful = readArray(token, depth);
131 break;
132
133 case tokenInteger:
134 successful = decodeNumber(token);
135 break;
136
137 case tokenDouble:
138 successful = decodeDouble(token);
139 break;
140
141 case tokenString:
142 successful = decodeString(token);
143 break;
144
145 case tokenTrue:
146 currentValue() = true;
147 break;
148
149 case tokenFalse:
150 currentValue() = false;
151 break;
152
153 case tokenNull:
154 currentValue() = Value();
155 break;
156
157 default:
158 return addError("Syntax error: value, object or array expected.", token);
159 }
160
161 return successful;
162}
163
164void
166{
167 do
168 {
169 readToken(token);
170 } while (token.type_ == tokenComment);
171}
172
173bool
174Reader::expectToken(TokenType type, Token& token, char const* message)
175{
176 readToken(token);
177
178 if (token.type_ != type)
179 return addError(message, token);
180
181 return true;
182}
183
184bool
186{
187 skipSpaces();
188 token.start_ = current_;
189 Char c = getNextChar();
190 bool ok = true;
191
192 switch (c)
193 {
194 case '{':
195 token.type_ = tokenObjectBegin;
196 break;
197
198 case '}':
199 token.type_ = tokenObjectEnd;
200 break;
201
202 case '[':
203 token.type_ = tokenArrayBegin;
204 break;
205
206 case ']':
207 token.type_ = tokenArrayEnd;
208 break;
209
210 case '"':
211 token.type_ = tokenString;
212 ok = readString();
213 break;
214
215 case '/':
216 token.type_ = tokenComment;
217 ok = readComment();
218 break;
219
220 case '0':
221 case '1':
222 case '2':
223 case '3':
224 case '4':
225 case '5':
226 case '6':
227 case '7':
228 case '8':
229 case '9':
230 case '-':
231 token.type_ = readNumber();
232 break;
233
234 case 't':
235 token.type_ = tokenTrue;
236 ok = match("rue", 3);
237 break;
238
239 case 'f':
240 token.type_ = tokenFalse;
241 ok = match("alse", 4); // cspell:disable-line
242 break;
243
244 case 'n':
245 token.type_ = tokenNull;
246 ok = match("ull", 3);
247 break;
248
249 case ',':
251 break;
252
253 case ':':
255 break;
256
257 case 0:
258 token.type_ = tokenEndOfStream;
259 break;
260
261 default:
262 ok = false;
263 break;
264 }
265
266 if (!ok)
267 token.type_ = tokenError;
268
269 token.end_ = current_;
270 return true;
271}
272
273void
275{
276 while (current_ != end_)
277 {
278 Char c = *current_;
279
280 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
281 ++current_;
282 else
283 break;
284 }
285}
286
287bool
288Reader::match(Location pattern, int patternLength)
289{
290 if (end_ - current_ < patternLength)
291 return false;
292
293 int index = patternLength;
294
295 while (index--)
296 if (current_[index] != pattern[index])
297 return false;
298
299 current_ += patternLength;
300 return true;
301}
302
303bool
305{
306 Char c = getNextChar();
307
308 if (c == '*')
309 return readCStyleComment();
310
311 if (c == '/')
312 return readCppStyleComment();
313
314 return false;
315}
316
317bool
319{
320 while (current_ != end_)
321 {
322 Char c = getNextChar();
323
324 if (c == '*' && *current_ == '/')
325 break;
326 }
327
328 return getNextChar() == '/';
329}
330
331bool
333{
334 while (current_ != end_)
335 {
336 Char c = getNextChar();
337
338 if (c == '\r' || c == '\n')
339 break;
340 }
341
342 return true;
343}
344
347{
348 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
349
350 TokenType type = tokenInteger;
351
352 if (current_ != end_)
353 {
354 if (*current_ == '-')
355 ++current_;
356
357 while (current_ != end_)
358 {
359 if (!std::isdigit(static_cast<unsigned char>(*current_)))
360 {
361 auto ret =
362 std::find(std::begin(extended_tokens), std::end(extended_tokens), *current_);
363
364 if (ret == std::end(extended_tokens))
365 break;
366
367 type = tokenDouble;
368 }
369
370 ++current_;
371 }
372 }
373
374 return type;
375}
376
377bool
379{
380 Char c = 0;
381
382 while (current_ != end_)
383 {
384 c = getNextChar();
385
386 if (c == '\\')
387 getNextChar();
388 else if (c == '"')
389 break;
390 }
391
392 return c == '"';
393}
394
395bool
396Reader::readObject(Token& tokenStart, unsigned depth)
397{
398 Token tokenName;
399 std::string name;
401
402 while (readToken(tokenName))
403 {
404 bool initialTokenOk = true;
405
406 while (tokenName.type_ == tokenComment && initialTokenOk)
407 initialTokenOk = readToken(tokenName);
408
409 if (!initialTokenOk)
410 break;
411
412 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
413 return true;
414
415 if (tokenName.type_ != tokenString)
416 break;
417
418 name = "";
419
420 if (!decodeString(tokenName, name))
422
423 Token colon;
424
425 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
426 {
427 return addErrorAndRecover(
428 "Missing ':' after object member name", colon, tokenObjectEnd);
429 }
430
431 // Reject duplicate names
432 if (currentValue().isMember(name))
433 return addError("Key '" + name + "' appears twice.", tokenName);
434
435 Value& value = currentValue()[name];
436 nodes_.push(&value);
437 bool ok = readValue(depth + 1);
438 nodes_.pop();
439
440 if (!ok) // error already set
442
443 Token comma;
444
445 if (!readToken(comma) ||
446 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
447 comma.type_ != tokenComment))
448 {
449 return addErrorAndRecover(
450 "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
451 }
452
453 bool finalizeTokenOk = true;
454
455 while (comma.type_ == tokenComment && finalizeTokenOk)
456 finalizeTokenOk = readToken(comma);
457
458 if (comma.type_ == tokenObjectEnd)
459 return true;
460 }
461
462 return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
463}
464
465bool
466Reader::readArray(Token& tokenStart, unsigned depth)
467{
469 skipSpaces();
470
471 if (*current_ == ']') // empty array
472 {
473 Token endArray;
474 readToken(endArray);
475 return true;
476 }
477
478 int index = 0;
479
480 while (true)
481 {
482 Value& value = currentValue()[index++];
483 nodes_.push(&value);
484 bool ok = readValue(depth + 1);
485 nodes_.pop();
486
487 if (!ok) // error already set
489
490 Token token;
491 // Accept Comment after last item in the array.
492 ok = readToken(token);
493
494 while (token.type_ == tokenComment && ok)
495 {
496 ok = readToken(token);
497 }
498
499 bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
500
501 if (!ok || badTokenType)
502 {
503 return addErrorAndRecover(
504 "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
505 }
506
507 if (token.type_ == tokenArrayEnd)
508 break;
509 }
510
511 return true;
512}
513
514bool
516{
517 Location current = token.start_;
518 bool isNegative = *current == '-';
519
520 if (isNegative)
521 ++current;
522
523 if (current == token.end_)
524 {
525 return addError(
526 "'" + std::string(token.start_, token.end_) + "' is not a valid number.", token);
527 }
528
529 // The existing Json integers are 32-bit so using a 64-bit value here avoids
530 // overflows in the conversion code below.
531 std::int64_t value = 0;
532
533 static_assert(
534 sizeof(value) > sizeof(Value::maxUInt),
535 "The JSON integer overflow logic will need to be reworked.");
536
537 while (current < token.end_ && (value <= Value::maxUInt))
538 {
539 Char c = *current++;
540
541 if (c < '0' || c > '9')
542 {
543 return addError(
544 "'" + std::string(token.start_, token.end_) + "' is not a number.", token);
545 }
546
547 value = (value * 10) + (c - '0');
548 }
549
550 // More tokens left -> input is larger than largest possible return value
551 if (current != token.end_)
552 {
553 return addError(
554 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
555 }
556
557 if (isNegative)
558 {
559 value = -value;
560
561 if (value < Value::minInt || value > Value::maxInt)
562 {
563 return addError(
564 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
565 token);
566 }
567
568 currentValue() = static_cast<Value::Int>(value);
569 }
570 else
571 {
572 if (value > Value::maxUInt)
573 {
574 return addError(
575 "'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
576 token);
577 }
578
579 // If it's representable as a signed integer, construct it as one.
580 if (value <= Value::maxInt)
581 currentValue() = static_cast<Value::Int>(value);
582 else
583 currentValue() = static_cast<Value::UInt>(value);
584 }
585
586 return true;
587}
588
589bool
591{
592 double value = 0;
593 int const bufferSize = 32;
594 int count;
595 int length = int(token.end_ - token.start_);
596 // Sanity check to avoid buffer overflow exploits.
597 if (length < 0)
598 {
599 return addError("Unable to parse token length", token);
600 }
601 // Avoid using a string constant for the format control string given to
602 // sscanf, as this can cause hard to debug crashes on OS X. See here for
603 // more info:
604 //
605 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
606 char format[] = "%lf";
607 if (length <= bufferSize)
608 {
609 Char buffer[bufferSize + 1];
610 memcpy(buffer, token.start_, length);
611 buffer[length] = 0;
612 count = sscanf(buffer, format, &value);
613 }
614 else
615 {
616 std::string buffer(token.start_, token.end_);
617 count = sscanf(buffer.c_str(), format, &value);
618 }
619 if (count != 1)
620 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
621 currentValue() = value;
622 return true;
623}
624
625bool
627{
628 std::string decoded;
629
630 if (!decodeString(token, decoded))
631 return false;
632
633 currentValue() = decoded;
634 return true;
635}
636
637bool
639{
640 decoded.reserve(token.end_ - token.start_ - 2);
641 Location current = token.start_ + 1; // skip '"'
642 Location end = token.end_ - 1; // do not include '"'
643
644 while (current != end)
645 {
646 Char c = *current++;
647
648 if (c == '"')
649 break;
650 else if (c == '\\')
651 {
652 if (current == end)
653 return addError("Empty escape sequence in string", token, current);
654
655 Char escape = *current++;
656
657 switch (escape)
658 {
659 case '"':
660 decoded += '"';
661 break;
662
663 case '/':
664 decoded += '/';
665 break;
666
667 case '\\':
668 decoded += '\\';
669 break;
670
671 case 'b':
672 decoded += '\b';
673 break;
674
675 case 'f':
676 decoded += '\f';
677 break;
678
679 case 'n':
680 decoded += '\n';
681 break;
682
683 case 'r':
684 decoded += '\r';
685 break;
686
687 case 't':
688 decoded += '\t';
689 break;
690
691 case 'u': {
692 unsigned int unicode;
693
694 if (!decodeUnicodeCodePoint(token, current, end, unicode))
695 return false;
696
697 decoded += codePointToUTF8(unicode);
698 }
699 break;
700
701 default:
702 return addError("Bad escape sequence in string", token, current);
703 }
704 }
705 else
706 {
707 decoded += c;
708 }
709 }
710
711 return true;
712}
713
714bool
715Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode)
716{
717 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
718 return false;
719
720 if (unicode >= 0xD800 && unicode <= 0xDBFF)
721 {
722 // surrogate pairs
723 if (end - current < 6)
724 return addError(
725 "additional six characters expected to parse unicode surrogate "
726 "pair.",
727 token,
728 current);
729
730 unsigned int surrogatePair;
731
732 if (*(current++) == '\\' && *(current++) == 'u')
733 {
734 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
735 {
736 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
737 }
738 else
739 return false;
740 }
741 else
742 return addError(
743 "expecting another \\u token to begin the second half of a "
744 "unicode surrogate pair",
745 token,
746 current);
747 }
748
749 return true;
750}
751
752bool
754 Token& token,
755 Location& current,
756 Location end,
757 unsigned int& unicode)
758{
759 if (end - current < 4)
760 return addError(
761 "Bad unicode escape sequence in string: four digits expected.", token, current);
762
763 unicode = 0;
764
765 for (int index = 0; index < 4; ++index)
766 {
767 Char c = *current++;
768 unicode *= 16;
769
770 if (c >= '0' && c <= '9')
771 unicode += c - '0';
772 else if (c >= 'a' && c <= 'f')
773 unicode += c - 'a' + 10;
774 else if (c >= 'A' && c <= 'F')
775 unicode += c - 'A' + 10;
776 else
777 return addError(
778 "Bad unicode escape sequence in string: hexadecimal digit "
779 "expected.",
780 token,
781 current);
782 }
783
784 return true;
785}
786
787bool
788Reader::addError(std::string const& message, Token& token, Location extra)
789{
790 ErrorInfo info;
791 info.token_ = token;
792 info.message_ = message;
793 info.extra_ = extra;
794 errors_.push_back(info);
795 return false;
796}
797
798bool
800{
801 int errorCount = int(errors_.size());
802 Token skip;
803
804 while (true)
805 {
806 if (!readToken(skip))
807 errors_.resize(errorCount); // discard errors caused by recovery
808
809 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
810 break;
811 }
812
813 errors_.resize(errorCount);
814 return false;
815}
816
817bool
818Reader::addErrorAndRecover(std::string const& message, Token& token, TokenType skipUntilToken)
819{
820 addError(message, token);
821 return recoverFromError(skipUntilToken);
822}
823
824Value&
826{
827 return *(nodes_.top());
828}
829
832{
833 if (current_ == end_)
834 return 0;
835
836 return *current_++;
837}
838
839void
840Reader::getLocationLineAndColumn(Location location, int& line, int& column) const
841{
842 Location current = begin_;
843 Location lastLineStart = current;
844 line = 0;
845
846 while (current < location && current != end_)
847 {
848 Char c = *current++;
849
850 if (c == '\r')
851 {
852 if (*current == '\n')
853 ++current;
854
855 lastLineStart = current;
856 ++line;
857 }
858 else if (c == '\n')
859 {
860 lastLineStart = current;
861 ++line;
862 }
863 }
864
865 // column & line start at 1
866 column = int(location - lastLineStart) + 1;
867 ++line;
868}
869
872{
873 int line, column;
874 getLocationLineAndColumn(location, line, column);
875 return "Line " + std::to_string(line) + ", Column " + std::to_string(column);
876}
877
880{
881 std::string formattedMessage;
882
883 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError)
884 {
885 ErrorInfo const& error = *itError;
886 formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
887 formattedMessage += " " + error.message_ + "\n";
888
889 if (error.extra_)
890 formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
891 }
892
893 return formattedMessage;
894}
895
898{
899 Json::Reader reader;
900 bool ok = reader.parse(sin, root);
901
902 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
903 if (!ok)
904 xrpl::Throw<std::runtime_error>(reader.getFormattedErrorMessages());
905
906 return sin;
907}
908
909} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition json_reader.h:17
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
Location current_
Location begin_
bool match(Location pattern, int patternLength)
bool expectToken(TokenType type, Token &token, char const *message)
bool decodeNumber(Token &token)
Char const * Location
Definition json_reader.h:20
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
bool readCppStyleComment()
Location end_
static constexpr unsigned nest_limit
Definition json_reader.h:70
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
bool addError(std::string const &message, Token &token, Location extra=0)
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Represents a JSON value.
Definition json_value.h:130
Json::UInt UInt
Definition json_value.h:137
Json::Int Int
Definition json_value.h:138
static constexpr Int maxInt
Definition json_value.h:143
static constexpr UInt maxUInt
Definition json_value.h:144
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
T is_same_v
JSON (JavaScript Object Notation).
Definition json_errors.h:5
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition json_value.h:25
@ objectValue
object value (collection of name/value pairs).
Definition json_value.h:26
static std::string codePointToUTF8(unsigned int cp)
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)