rippled
Loading...
Searching...
No Matches
json_reader.cpp
1#include <xrpl/basics/contract.h>
2#include <xrpl/json/json_reader.h>
3#include <xrpl/json/json_value.h>
4
5#include <algorithm>
6#include <cctype>
7#include <cstdint>
8#include <cstdio>
9#include <cstring>
10#include <istream>
11#include <stdexcept>
12#include <string>
13
14namespace Json {
15// Implementation of class Reader
16// ////////////////////////////////
17
18static std::string
19codePointToUTF8(unsigned int cp)
20{
21 std::string result;
22
23 // based on description from http://en.wikipedia.org/wiki/UTF-8
24
25 if (cp <= 0x7f)
26 {
27 result.resize(1);
28 result[0] = static_cast<char>(cp);
29 }
30 else if (cp <= 0x7FF)
31 {
32 result.resize(2);
33 result[1] = static_cast<char>(0x80 | (0x3f & cp));
34 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
35 }
36 else if (cp <= 0xFFFF)
37 {
38 result.resize(3);
39 result[2] = static_cast<char>(0x80 | (0x3f & cp));
40 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
41 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
42 }
43 else if (cp <= 0x10FFFF)
44 {
45 result.resize(4);
46 result[3] = static_cast<char>(0x80 | (0x3f & cp));
47 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
48 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
49 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
50 }
51
52 return result;
53}
54
55// Class Reader
56// //////////////////////////////////////////////////////////////////
57
58bool
59Reader::parse(std::string const& document, Value& root)
60{
61 document_ = document;
62 char const* begin = document_.c_str();
63 char const* end = begin + document_.length();
64 return parse(begin, end, root);
65}
66
67bool
69{
70 // std::istream_iterator<char> begin(sin);
71 // std::istream_iterator<char> end;
72 // Those would allow streamed input from a file, if parse() were a
73 // template function.
74
75 // Since std::string is reference-counted, this at least does not
76 // create an extra copy.
77 std::string doc;
78 std::getline(sin, doc, (char)EOF);
79 return parse(doc, root);
80}
81
82bool
83Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
84{
85 begin_ = beginDoc;
86 end_ = endDoc;
88 lastValueEnd_ = 0;
89 lastValue_ = 0;
90 errors_.clear();
91
92 while (!nodes_.empty())
93 nodes_.pop();
94
95 nodes_.push(&root);
96 bool successful = readValue(0);
97 Token token;
98 skipCommentTokens(token);
99
100 if (!root.isNull() && !root.isArray() && !root.isObject())
101 {
102 // Set error location to start of doc, ideally should be first token
103 // found in doc
104 token.type_ = tokenError;
105 token.start_ = beginDoc;
106 token.end_ = endDoc;
107 addError("A valid JSON document must be either an array or an object value.", token);
108 return false;
109 }
110
111 return successful;
112}
113
114bool
115Reader::readValue(unsigned depth)
116{
117 Token token;
118 skipCommentTokens(token);
119 if (depth > nest_limit)
120 return addError("Syntax error: maximum nesting depth exceeded", token);
121 bool successful = true;
122
123 switch (token.type_)
124 {
125 case tokenObjectBegin:
126 successful = readObject(token, depth);
127 break;
128
129 case tokenArrayBegin:
130 successful = readArray(token, depth);
131 break;
132
133 case tokenInteger:
134 successful = decodeNumber(token);
135 break;
136
137 case tokenDouble:
138 successful = decodeDouble(token);
139 break;
140
141 case tokenString:
142 successful = decodeString(token);
143 break;
144
145 case tokenTrue:
146 currentValue() = true;
147 break;
148
149 case tokenFalse:
150 currentValue() = false;
151 break;
152
153 case tokenNull:
154 currentValue() = Value();
155 break;
156
157 default:
158 return addError("Syntax error: value, object or array expected.", token);
159 }
160
161 return successful;
162}
163
164void
166{
167 do
168 {
169 readToken(token);
170 } while (token.type_ == tokenComment);
171}
172
173bool
174Reader::expectToken(TokenType type, Token& token, char const* message)
175{
176 readToken(token);
177
178 if (token.type_ != type)
179 return addError(message, token);
180
181 return true;
182}
183
184bool
186{
187 skipSpaces();
188 token.start_ = current_;
189 Char c = getNextChar();
190 bool ok = true;
191
192 switch (c)
193 {
194 case '{':
195 token.type_ = tokenObjectBegin;
196 break;
197
198 case '}':
199 token.type_ = tokenObjectEnd;
200 break;
201
202 case '[':
203 token.type_ = tokenArrayBegin;
204 break;
205
206 case ']':
207 token.type_ = tokenArrayEnd;
208 break;
209
210 case '"':
211 token.type_ = tokenString;
212 ok = readString();
213 break;
214
215 case '/':
216 token.type_ = tokenComment;
217 ok = readComment();
218 break;
219
220 case '0':
221 case '1':
222 case '2':
223 case '3':
224 case '4':
225 case '5':
226 case '6':
227 case '7':
228 case '8':
229 case '9':
230 case '-':
231 token.type_ = readNumber();
232 break;
233
234 case 't':
235 token.type_ = tokenTrue;
236 ok = match("rue", 3);
237 break;
238
239 case 'f':
240 token.type_ = tokenFalse;
241 ok = match("alse", 4); // cspell:disable-line
242 break;
243
244 case 'n':
245 token.type_ = tokenNull;
246 ok = match("ull", 3);
247 break;
248
249 case ',':
251 break;
252
253 case ':':
255 break;
256
257 case 0:
258 token.type_ = tokenEndOfStream;
259 break;
260
261 default:
262 ok = false;
263 break;
264 }
265
266 if (!ok)
267 token.type_ = tokenError;
268
269 token.end_ = current_;
270 return true;
271}
272
273void
275{
276 while (current_ != end_)
277 {
278 Char c = *current_;
279
280 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
281 ++current_;
282 else
283 break;
284 }
285}
286
287bool
288Reader::match(Location pattern, int patternLength)
289{
290 if (end_ - current_ < patternLength)
291 return false;
292
293 int index = patternLength;
294
295 while (index--)
296 if (current_[index] != pattern[index])
297 return false;
298
299 current_ += patternLength;
300 return true;
301}
302
303bool
305{
306 Char c = getNextChar();
307
308 if (c == '*')
309 return readCStyleComment();
310
311 if (c == '/')
312 return readCppStyleComment();
313
314 return false;
315}
316
317bool
319{
320 while (current_ != end_)
321 {
322 Char c = getNextChar();
323
324 if (c == '*' && *current_ == '/')
325 break;
326 }
327
328 return getNextChar() == '/';
329}
330
331bool
333{
334 while (current_ != end_)
335 {
336 Char c = getNextChar();
337
338 if (c == '\r' || c == '\n')
339 break;
340 }
341
342 return true;
343}
344
347{
348 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
349
350 TokenType type = tokenInteger;
351
352 if (current_ != end_)
353 {
354 if (*current_ == '-')
355 ++current_;
356
357 while (current_ != end_)
358 {
359 if (!std::isdigit(static_cast<unsigned char>(*current_)))
360 {
361 auto ret = std::find(std::begin(extended_tokens), std::end(extended_tokens), *current_);
362
363 if (ret == std::end(extended_tokens))
364 break;
365
366 type = tokenDouble;
367 }
368
369 ++current_;
370 }
371 }
372
373 return type;
374}
375
376bool
378{
379 Char c = 0;
380
381 while (current_ != end_)
382 {
383 c = getNextChar();
384
385 if (c == '\\')
386 getNextChar();
387 else if (c == '"')
388 break;
389 }
390
391 return c == '"';
392}
393
394bool
395Reader::readObject(Token& tokenStart, unsigned depth)
396{
397 Token tokenName;
398 std::string name;
400
401 while (readToken(tokenName))
402 {
403 bool initialTokenOk = true;
404
405 while (tokenName.type_ == tokenComment && initialTokenOk)
406 initialTokenOk = readToken(tokenName);
407
408 if (!initialTokenOk)
409 break;
410
411 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
412 return true;
413
414 if (tokenName.type_ != tokenString)
415 break;
416
417 name = "";
418
419 if (!decodeString(tokenName, name))
421
422 Token colon;
423
424 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
425 {
426 return addErrorAndRecover("Missing ':' after object member name", colon, tokenObjectEnd);
427 }
428
429 // Reject duplicate names
430 if (currentValue().isMember(name))
431 return addError("Key '" + name + "' appears twice.", tokenName);
432
433 Value& value = currentValue()[name];
434 nodes_.push(&value);
435 bool ok = readValue(depth + 1);
436 nodes_.pop();
437
438 if (!ok) // error already set
440
441 Token comma;
442
443 if (!readToken(comma) ||
444 (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && comma.type_ != tokenComment))
445 {
446 return addErrorAndRecover("Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
447 }
448
449 bool finalizeTokenOk = true;
450
451 while (comma.type_ == tokenComment && finalizeTokenOk)
452 finalizeTokenOk = readToken(comma);
453
454 if (comma.type_ == tokenObjectEnd)
455 return true;
456 }
457
458 return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
459}
460
461bool
462Reader::readArray(Token& tokenStart, unsigned depth)
463{
465 skipSpaces();
466
467 if (*current_ == ']') // empty array
468 {
469 Token endArray;
470 readToken(endArray);
471 return true;
472 }
473
474 int index = 0;
475
476 while (true)
477 {
478 Value& value = currentValue()[index++];
479 nodes_.push(&value);
480 bool ok = readValue(depth + 1);
481 nodes_.pop();
482
483 if (!ok) // error already set
485
486 Token token;
487 // Accept Comment after last item in the array.
488 ok = readToken(token);
489
490 while (token.type_ == tokenComment && ok)
491 {
492 ok = readToken(token);
493 }
494
495 bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
496
497 if (!ok || badTokenType)
498 {
499 return addErrorAndRecover("Missing ',' or ']' in array declaration", token, tokenArrayEnd);
500 }
501
502 if (token.type_ == tokenArrayEnd)
503 break;
504 }
505
506 return true;
507}
508
509bool
511{
512 Location current = token.start_;
513 bool isNegative = *current == '-';
514
515 if (isNegative)
516 ++current;
517
518 if (current == token.end_)
519 {
520 return addError("'" + std::string(token.start_, token.end_) + "' is not a valid number.", token);
521 }
522
523 // The existing Json integers are 32-bit so using a 64-bit value here avoids
524 // overflows in the conversion code below.
525 std::int64_t value = 0;
526
527 static_assert(sizeof(value) > sizeof(Value::maxUInt), "The JSON integer overflow logic will need to be reworked.");
528
529 while (current < token.end_ && (value <= Value::maxUInt))
530 {
531 Char c = *current++;
532
533 if (c < '0' || c > '9')
534 {
535 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
536 }
537
538 value = (value * 10) + (c - '0');
539 }
540
541 // More tokens left -> input is larger than largest possible return value
542 if (current != token.end_)
543 {
544 return addError("'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
545 }
546
547 if (isNegative)
548 {
549 value = -value;
550
551 if (value < Value::minInt || value > Value::maxInt)
552 {
553 return addError("'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
554 }
555
556 currentValue() = static_cast<Value::Int>(value);
557 }
558 else
559 {
560 if (value > Value::maxUInt)
561 {
562 return addError("'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
563 }
564
565 // If it's representable as a signed integer, construct it as one.
566 if (value <= Value::maxInt)
567 currentValue() = static_cast<Value::Int>(value);
568 else
569 currentValue() = static_cast<Value::UInt>(value);
570 }
571
572 return true;
573}
574
575bool
577{
578 double value = 0;
579 int const bufferSize = 32;
580 int count;
581 int length = int(token.end_ - token.start_);
582 // Sanity check to avoid buffer overflow exploits.
583 if (length < 0)
584 {
585 return addError("Unable to parse token length", token);
586 }
587 // Avoid using a string constant for the format control string given to
588 // sscanf, as this can cause hard to debug crashes on OS X. See here for
589 // more info:
590 //
591 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
592 char format[] = "%lf";
593 if (length <= bufferSize)
594 {
595 Char buffer[bufferSize + 1];
596 memcpy(buffer, token.start_, length);
597 buffer[length] = 0;
598 count = sscanf(buffer, format, &value);
599 }
600 else
601 {
602 std::string buffer(token.start_, token.end_);
603 count = sscanf(buffer.c_str(), format, &value);
604 }
605 if (count != 1)
606 return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
607 currentValue() = value;
608 return true;
609}
610
611bool
613{
614 std::string decoded;
615
616 if (!decodeString(token, decoded))
617 return false;
618
619 currentValue() = decoded;
620 return true;
621}
622
623bool
625{
626 decoded.reserve(token.end_ - token.start_ - 2);
627 Location current = token.start_ + 1; // skip '"'
628 Location end = token.end_ - 1; // do not include '"'
629
630 while (current != end)
631 {
632 Char c = *current++;
633
634 if (c == '"')
635 break;
636 else if (c == '\\')
637 {
638 if (current == end)
639 return addError("Empty escape sequence in string", token, current);
640
641 Char escape = *current++;
642
643 switch (escape)
644 {
645 case '"':
646 decoded += '"';
647 break;
648
649 case '/':
650 decoded += '/';
651 break;
652
653 case '\\':
654 decoded += '\\';
655 break;
656
657 case 'b':
658 decoded += '\b';
659 break;
660
661 case 'f':
662 decoded += '\f';
663 break;
664
665 case 'n':
666 decoded += '\n';
667 break;
668
669 case 'r':
670 decoded += '\r';
671 break;
672
673 case 't':
674 decoded += '\t';
675 break;
676
677 case 'u': {
678 unsigned int unicode;
679
680 if (!decodeUnicodeCodePoint(token, current, end, unicode))
681 return false;
682
683 decoded += codePointToUTF8(unicode);
684 }
685 break;
686
687 default:
688 return addError("Bad escape sequence in string", token, current);
689 }
690 }
691 else
692 {
693 decoded += c;
694 }
695 }
696
697 return true;
698}
699
700bool
701Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode)
702{
703 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
704 return false;
705
706 if (unicode >= 0xD800 && unicode <= 0xDBFF)
707 {
708 // surrogate pairs
709 if (end - current < 6)
710 return addError(
711 "additional six characters expected to parse unicode surrogate "
712 "pair.",
713 token,
714 current);
715
716 unsigned int surrogatePair;
717
718 if (*(current++) == '\\' && *(current++) == 'u')
719 {
720 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
721 {
722 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
723 }
724 else
725 return false;
726 }
727 else
728 return addError(
729 "expecting another \\u token to begin the second half of a "
730 "unicode surrogate pair",
731 token,
732 current);
733 }
734
735 return true;
736}
737
738bool
739Reader::decodeUnicodeEscapeSequence(Token& token, Location& current, Location end, unsigned int& unicode)
740{
741 if (end - current < 4)
742 return addError("Bad unicode escape sequence in string: four digits expected.", token, current);
743
744 unicode = 0;
745
746 for (int index = 0; index < 4; ++index)
747 {
748 Char c = *current++;
749 unicode *= 16;
750
751 if (c >= '0' && c <= '9')
752 unicode += c - '0';
753 else if (c >= 'a' && c <= 'f')
754 unicode += c - 'a' + 10;
755 else if (c >= 'A' && c <= 'F')
756 unicode += c - 'A' + 10;
757 else
758 return addError(
759 "Bad unicode escape sequence in string: hexadecimal digit "
760 "expected.",
761 token,
762 current);
763 }
764
765 return true;
766}
767
768bool
769Reader::addError(std::string const& message, Token& token, Location extra)
770{
771 ErrorInfo info;
772 info.token_ = token;
773 info.message_ = message;
774 info.extra_ = extra;
775 errors_.push_back(info);
776 return false;
777}
778
779bool
781{
782 int errorCount = int(errors_.size());
783 Token skip;
784
785 while (true)
786 {
787 if (!readToken(skip))
788 errors_.resize(errorCount); // discard errors caused by recovery
789
790 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
791 break;
792 }
793
794 errors_.resize(errorCount);
795 return false;
796}
797
798bool
799Reader::addErrorAndRecover(std::string const& message, Token& token, TokenType skipUntilToken)
800{
801 addError(message, token);
802 return recoverFromError(skipUntilToken);
803}
804
805Value&
807{
808 return *(nodes_.top());
809}
810
813{
814 if (current_ == end_)
815 return 0;
816
817 return *current_++;
818}
819
820void
821Reader::getLocationLineAndColumn(Location location, int& line, int& column) const
822{
823 Location current = begin_;
824 Location lastLineStart = current;
825 line = 0;
826
827 while (current < location && current != end_)
828 {
829 Char c = *current++;
830
831 if (c == '\r')
832 {
833 if (*current == '\n')
834 ++current;
835
836 lastLineStart = current;
837 ++line;
838 }
839 else if (c == '\n')
840 {
841 lastLineStart = current;
842 ++line;
843 }
844 }
845
846 // column & line start at 1
847 column = int(location - lastLineStart) + 1;
848 ++line;
849}
850
853{
854 int line, column;
855 getLocationLineAndColumn(location, line, column);
856 return "Line " + std::to_string(line) + ", Column " + std::to_string(column);
857}
858
861{
862 std::string formattedMessage;
863
864 for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError)
865 {
866 ErrorInfo const& error = *itError;
867 formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
868 formattedMessage += " " + error.message_ + "\n";
869
870 if (error.extra_)
871 formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
872 }
873
874 return formattedMessage;
875}
876
879{
880 Json::Reader reader;
881 bool ok = reader.parse(sin, root);
882
883 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
884 if (!ok)
885 xrpl::Throw<std::runtime_error>(reader.getFormattedErrorMessages());
886
887 return sin;
888}
889
890} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition json_reader.h:18
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
Location current_
Location begin_
bool match(Location pattern, int patternLength)
bool expectToken(TokenType type, Token &token, char const *message)
bool decodeNumber(Token &token)
Char const * Location
Definition json_reader.h:21
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
bool readCppStyleComment()
Location end_
static constexpr unsigned nest_limit
Definition json_reader.h:71
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
bool addError(std::string const &message, Token &token, Location extra=0)
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Represents a JSON value.
Definition json_value.h:131
Json::UInt UInt
Definition json_value.h:138
Json::Int Int
Definition json_value.h:139
static constexpr Int maxInt
Definition json_value.h:144
static constexpr UInt maxUInt
Definition json_value.h:145
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
T is_same_v
JSON (JavaScript Object Notation).
Definition json_errors.h:6
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition json_value.h:26
@ objectValue
object value (collection of name/value pairs).
Definition json_value.h:27
static std::string codePointToUTF8(unsigned int cp)
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)