rippled
Loading...
Searching...
No Matches
json_reader.cpp
1//------------------------------------------------------------------------------
2/*
3 This file is part of rippled: https://github.com/ripple/rippled
4 Copyright (c) 2012, 2013 Ripple Labs Inc.
5
6 Permission to use, copy, modify, and/or distribute this software for any
7 purpose with or without fee is hereby granted, provided that the above
8 copyright notice and this permission notice appear in all copies.
9
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*/
18//==============================================================================
19
20#include <xrpl/basics/contract.h>
21#include <xrpl/json/json_reader.h>
22#include <xrpl/json/json_value.h>
23
24#include <algorithm>
25#include <cctype>
26#include <cstdint>
27#include <cstdio>
28#include <cstring>
29#include <istream>
30#include <stdexcept>
31#include <string>
32
33namespace Json {
34// Implementation of class Reader
35// ////////////////////////////////
36
37static std::string
38codePointToUTF8(unsigned int cp)
39{
40 std::string result;
41
42 // based on description from http://en.wikipedia.org/wiki/UTF-8
43
44 if (cp <= 0x7f)
45 {
46 result.resize(1);
47 result[0] = static_cast<char>(cp);
48 }
49 else if (cp <= 0x7FF)
50 {
51 result.resize(2);
52 result[1] = static_cast<char>(0x80 | (0x3f & cp));
53 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
54 }
55 else if (cp <= 0xFFFF)
56 {
57 result.resize(3);
58 result[2] = static_cast<char>(0x80 | (0x3f & cp));
59 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
60 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
61 }
62 else if (cp <= 0x10FFFF)
63 {
64 result.resize(4);
65 result[3] = static_cast<char>(0x80 | (0x3f & cp));
66 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
67 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
68 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
69 }
70
71 return result;
72}
73
74// Class Reader
75// //////////////////////////////////////////////////////////////////
76
77bool
78Reader::parse(std::string const& document, Value& root)
79{
80 document_ = document;
81 char const* begin = document_.c_str();
82 char const* end = begin + document_.length();
83 return parse(begin, end, root);
84}
85
86bool
88{
89 // std::istream_iterator<char> begin(sin);
90 // std::istream_iterator<char> end;
91 // Those would allow streamed input from a file, if parse() were a
92 // template function.
93
94 // Since std::string is reference-counted, this at least does not
95 // create an extra copy.
96 std::string doc;
97 std::getline(sin, doc, (char)EOF);
98 return parse(doc, root);
99}
100
101bool
102Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
103{
104 begin_ = beginDoc;
105 end_ = endDoc;
107 lastValueEnd_ = 0;
108 lastValue_ = 0;
109 errors_.clear();
110
111 while (!nodes_.empty())
112 nodes_.pop();
113
114 nodes_.push(&root);
115 bool successful = readValue(0);
116 Token token;
117 skipCommentTokens(token);
118
119 if (!root.isNull() && !root.isArray() && !root.isObject())
120 {
121 // Set error location to start of doc, ideally should be first token
122 // found in doc
123 token.type_ = tokenError;
124 token.start_ = beginDoc;
125 token.end_ = endDoc;
126 addError(
127 "A valid JSON document must be either an array or an object value.",
128 token);
129 return false;
130 }
131
132 return successful;
133}
134
135bool
136Reader::readValue(unsigned depth)
137{
138 Token token;
139 skipCommentTokens(token);
140 if (depth > nest_limit)
141 return addError("Syntax error: maximum nesting depth exceeded", token);
142 bool successful = true;
143
144 switch (token.type_)
145 {
146 case tokenObjectBegin:
147 successful = readObject(token, depth);
148 break;
149
150 case tokenArrayBegin:
151 successful = readArray(token, depth);
152 break;
153
154 case tokenInteger:
155 successful = decodeNumber(token);
156 break;
157
158 case tokenDouble:
159 successful = decodeDouble(token);
160 break;
161
162 case tokenString:
163 successful = decodeString(token);
164 break;
165
166 case tokenTrue:
167 currentValue() = true;
168 break;
169
170 case tokenFalse:
171 currentValue() = false;
172 break;
173
174 case tokenNull:
175 currentValue() = Value();
176 break;
177
178 default:
179 return addError(
180 "Syntax error: value, object or array expected.", token);
181 }
182
183 return successful;
184}
185
186void
188{
189 do
190 {
191 readToken(token);
192 } while (token.type_ == tokenComment);
193}
194
195bool
196Reader::expectToken(TokenType type, Token& token, char const* message)
197{
198 readToken(token);
199
200 if (token.type_ != type)
201 return addError(message, token);
202
203 return true;
204}
205
206bool
208{
209 skipSpaces();
210 token.start_ = current_;
211 Char c = getNextChar();
212 bool ok = true;
213
214 switch (c)
215 {
216 case '{':
217 token.type_ = tokenObjectBegin;
218 break;
219
220 case '}':
221 token.type_ = tokenObjectEnd;
222 break;
223
224 case '[':
225 token.type_ = tokenArrayBegin;
226 break;
227
228 case ']':
229 token.type_ = tokenArrayEnd;
230 break;
231
232 case '"':
233 token.type_ = tokenString;
234 ok = readString();
235 break;
236
237 case '/':
238 token.type_ = tokenComment;
239 ok = readComment();
240 break;
241
242 case '0':
243 case '1':
244 case '2':
245 case '3':
246 case '4':
247 case '5':
248 case '6':
249 case '7':
250 case '8':
251 case '9':
252 case '-':
253 token.type_ = readNumber();
254 break;
255
256 case 't':
257 token.type_ = tokenTrue;
258 ok = match("rue", 3);
259 break;
260
261 case 'f':
262 token.type_ = tokenFalse;
263 ok = match("alse", 4);
264 break;
265
266 case 'n':
267 token.type_ = tokenNull;
268 ok = match("ull", 3);
269 break;
270
271 case ',':
273 break;
274
275 case ':':
277 break;
278
279 case 0:
280 token.type_ = tokenEndOfStream;
281 break;
282
283 default:
284 ok = false;
285 break;
286 }
287
288 if (!ok)
289 token.type_ = tokenError;
290
291 token.end_ = current_;
292 return true;
293}
294
295void
297{
298 while (current_ != end_)
299 {
300 Char c = *current_;
301
302 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
303 ++current_;
304 else
305 break;
306 }
307}
308
309bool
310Reader::match(Location pattern, int patternLength)
311{
312 if (end_ - current_ < patternLength)
313 return false;
314
315 int index = patternLength;
316
317 while (index--)
318 if (current_[index] != pattern[index])
319 return false;
320
321 current_ += patternLength;
322 return true;
323}
324
325bool
327{
328 Char c = getNextChar();
329
330 if (c == '*')
331 return readCStyleComment();
332
333 if (c == '/')
334 return readCppStyleComment();
335
336 return false;
337}
338
339bool
341{
342 while (current_ != end_)
343 {
344 Char c = getNextChar();
345
346 if (c == '*' && *current_ == '/')
347 break;
348 }
349
350 return getNextChar() == '/';
351}
352
353bool
355{
356 while (current_ != end_)
357 {
358 Char c = getNextChar();
359
360 if (c == '\r' || c == '\n')
361 break;
362 }
363
364 return true;
365}
366
369{
370 static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
371
372 TokenType type = tokenInteger;
373
374 if (current_ != end_)
375 {
376 if (*current_ == '-')
377 ++current_;
378
379 while (current_ != end_)
380 {
381 if (!std::isdigit(static_cast<unsigned char>(*current_)))
382 {
383 auto ret = std::find(
384 std::begin(extended_tokens),
385 std::end(extended_tokens),
386 *current_);
387
388 if (ret == std::end(extended_tokens))
389 break;
390
391 type = tokenDouble;
392 }
393
394 ++current_;
395 }
396 }
397
398 return type;
399}
400
401bool
403{
404 Char c = 0;
405
406 while (current_ != end_)
407 {
408 c = getNextChar();
409
410 if (c == '\\')
411 getNextChar();
412 else if (c == '"')
413 break;
414 }
415
416 return c == '"';
417}
418
419bool
420Reader::readObject(Token& tokenStart, unsigned depth)
421{
422 Token tokenName;
423 std::string name;
425
426 while (readToken(tokenName))
427 {
428 bool initialTokenOk = true;
429
430 while (tokenName.type_ == tokenComment && initialTokenOk)
431 initialTokenOk = readToken(tokenName);
432
433 if (!initialTokenOk)
434 break;
435
436 if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
437 return true;
438
439 if (tokenName.type_ != tokenString)
440 break;
441
442 name = "";
443
444 if (!decodeString(tokenName, name))
446
447 Token colon;
448
449 if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
450 {
451 return addErrorAndRecover(
452 "Missing ':' after object member name", colon, tokenObjectEnd);
453 }
454
455 // Reject duplicate names
456 if (currentValue().isMember(name))
457 return addError("Key '" + name + "' appears twice.", tokenName);
458
459 Value& value = currentValue()[name];
460 nodes_.push(&value);
461 bool ok = readValue(depth + 1);
462 nodes_.pop();
463
464 if (!ok) // error already set
466
467 Token comma;
468
469 if (!readToken(comma) ||
470 (comma.type_ != tokenObjectEnd &&
471 comma.type_ != tokenArraySeparator && comma.type_ != tokenComment))
472 {
473 return addErrorAndRecover(
474 "Missing ',' or '}' in object declaration",
475 comma,
477 }
478
479 bool finalizeTokenOk = true;
480
481 while (comma.type_ == tokenComment && finalizeTokenOk)
482 finalizeTokenOk = readToken(comma);
483
484 if (comma.type_ == tokenObjectEnd)
485 return true;
486 }
487
488 return addErrorAndRecover(
489 "Missing '}' or object member name", tokenName, tokenObjectEnd);
490}
491
492bool
493Reader::readArray(Token& tokenStart, unsigned depth)
494{
496 skipSpaces();
497
498 if (*current_ == ']') // empty array
499 {
500 Token endArray;
501 readToken(endArray);
502 return true;
503 }
504
505 int index = 0;
506
507 while (true)
508 {
509 Value& value = currentValue()[index++];
510 nodes_.push(&value);
511 bool ok = readValue(depth + 1);
512 nodes_.pop();
513
514 if (!ok) // error already set
516
517 Token token;
518 // Accept Comment after last item in the array.
519 ok = readToken(token);
520
521 while (token.type_ == tokenComment && ok)
522 {
523 ok = readToken(token);
524 }
525
526 bool badTokenType =
527 (token.type_ != tokenArraySeparator &&
528 token.type_ != tokenArrayEnd);
529
530 if (!ok || badTokenType)
531 {
532 return addErrorAndRecover(
533 "Missing ',' or ']' in array declaration",
534 token,
536 }
537
538 if (token.type_ == tokenArrayEnd)
539 break;
540 }
541
542 return true;
543}
544
545bool
547{
548 Location current = token.start_;
549 bool isNegative = *current == '-';
550
551 if (isNegative)
552 ++current;
553
554 if (current == token.end_)
555 {
556 return addError(
557 "'" + std::string(token.start_, token.end_) +
558 "' is not a valid number.",
559 token);
560 }
561
562 // The existing Json integers are 32-bit so using a 64-bit value here avoids
563 // overflows in the conversion code below.
564 std::int64_t value = 0;
565
566 static_assert(
567 sizeof(value) > sizeof(Value::maxUInt),
568 "The JSON integer overflow logic will need to be reworked.");
569
570 while (current < token.end_ && (value <= Value::maxUInt))
571 {
572 Char c = *current++;
573
574 if (c < '0' || c > '9')
575 {
576 return addError(
577 "'" + std::string(token.start_, token.end_) +
578 "' is not a number.",
579 token);
580 }
581
582 value = (value * 10) + (c - '0');
583 }
584
585 // More tokens left -> input is larger than largest possible return value
586 if (current != token.end_)
587 {
588 return addError(
589 "'" + std::string(token.start_, token.end_) +
590 "' exceeds the allowable range.",
591 token);
592 }
593
594 if (isNegative)
595 {
596 value = -value;
597
598 if (value < Value::minInt || value > Value::maxInt)
599 {
600 return addError(
601 "'" + std::string(token.start_, token.end_) +
602 "' exceeds the allowable range.",
603 token);
604 }
605
606 currentValue() = static_cast<Value::Int>(value);
607 }
608 else
609 {
610 if (value > Value::maxUInt)
611 {
612 return addError(
613 "'" + std::string(token.start_, token.end_) +
614 "' exceeds the allowable range.",
615 token);
616 }
617
618 // If it's representable as a signed integer, construct it as one.
619 if (value <= Value::maxInt)
620 currentValue() = static_cast<Value::Int>(value);
621 else
622 currentValue() = static_cast<Value::UInt>(value);
623 }
624
625 return true;
626}
627
628bool
630{
631 double value = 0;
632 int const bufferSize = 32;
633 int count;
634 int length = int(token.end_ - token.start_);
635 // Sanity check to avoid buffer overflow exploits.
636 if (length < 0)
637 {
638 return addError("Unable to parse token length", token);
639 }
640 // Avoid using a string constant for the format control string given to
641 // sscanf, as this can cause hard to debug crashes on OS X. See here for
642 // more info:
643 //
644 // http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
645 char format[] = "%lf";
646 if (length <= bufferSize)
647 {
648 Char buffer[bufferSize + 1];
649 memcpy(buffer, token.start_, length);
650 buffer[length] = 0;
651 count = sscanf(buffer, format, &value);
652 }
653 else
654 {
655 std::string buffer(token.start_, token.end_);
656 count = sscanf(buffer.c_str(), format, &value);
657 }
658 if (count != 1)
659 return addError(
660 "'" + std::string(token.start_, token.end_) + "' is not a number.",
661 token);
662 currentValue() = value;
663 return true;
664}
665
666bool
668{
669 std::string decoded;
670
671 if (!decodeString(token, decoded))
672 return false;
673
674 currentValue() = decoded;
675 return true;
676}
677
678bool
680{
681 decoded.reserve(token.end_ - token.start_ - 2);
682 Location current = token.start_ + 1; // skip '"'
683 Location end = token.end_ - 1; // do not include '"'
684
685 while (current != end)
686 {
687 Char c = *current++;
688
689 if (c == '"')
690 break;
691 else if (c == '\\')
692 {
693 if (current == end)
694 return addError(
695 "Empty escape sequence in string", token, current);
696
697 Char escape = *current++;
698
699 switch (escape)
700 {
701 case '"':
702 decoded += '"';
703 break;
704
705 case '/':
706 decoded += '/';
707 break;
708
709 case '\\':
710 decoded += '\\';
711 break;
712
713 case 'b':
714 decoded += '\b';
715 break;
716
717 case 'f':
718 decoded += '\f';
719 break;
720
721 case 'n':
722 decoded += '\n';
723 break;
724
725 case 'r':
726 decoded += '\r';
727 break;
728
729 case 't':
730 decoded += '\t';
731 break;
732
733 case 'u': {
734 unsigned int unicode;
735
736 if (!decodeUnicodeCodePoint(token, current, end, unicode))
737 return false;
738
739 decoded += codePointToUTF8(unicode);
740 }
741 break;
742
743 default:
744 return addError(
745 "Bad escape sequence in string", token, current);
746 }
747 }
748 else
749 {
750 decoded += c;
751 }
752 }
753
754 return true;
755}
756
757bool
759 Token& token,
760 Location& current,
761 Location end,
762 unsigned int& unicode)
763{
764 if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
765 return false;
766
767 if (unicode >= 0xD800 && unicode <= 0xDBFF)
768 {
769 // surrogate pairs
770 if (end - current < 6)
771 return addError(
772 "additional six characters expected to parse unicode surrogate "
773 "pair.",
774 token,
775 current);
776
777 unsigned int surrogatePair;
778
779 if (*(current++) == '\\' && *(current++) == 'u')
780 {
781 if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
782 {
783 unicode = 0x10000 + ((unicode & 0x3FF) << 10) +
784 (surrogatePair & 0x3FF);
785 }
786 else
787 return false;
788 }
789 else
790 return addError(
791 "expecting another \\u token to begin the second half of a "
792 "unicode surrogate pair",
793 token,
794 current);
795 }
796
797 return true;
798}
799
800bool
802 Token& token,
803 Location& current,
804 Location end,
805 unsigned int& unicode)
806{
807 if (end - current < 4)
808 return addError(
809 "Bad unicode escape sequence in string: four digits expected.",
810 token,
811 current);
812
813 unicode = 0;
814
815 for (int index = 0; index < 4; ++index)
816 {
817 Char c = *current++;
818 unicode *= 16;
819
820 if (c >= '0' && c <= '9')
821 unicode += c - '0';
822 else if (c >= 'a' && c <= 'f')
823 unicode += c - 'a' + 10;
824 else if (c >= 'A' && c <= 'F')
825 unicode += c - 'A' + 10;
826 else
827 return addError(
828 "Bad unicode escape sequence in string: hexadecimal digit "
829 "expected.",
830 token,
831 current);
832 }
833
834 return true;
835}
836
837bool
838Reader::addError(std::string const& message, Token& token, Location extra)
839{
840 ErrorInfo info;
841 info.token_ = token;
842 info.message_ = message;
843 info.extra_ = extra;
844 errors_.push_back(info);
845 return false;
846}
847
848bool
850{
851 int errorCount = int(errors_.size());
852 Token skip;
853
854 while (true)
855 {
856 if (!readToken(skip))
857 errors_.resize(errorCount); // discard errors caused by recovery
858
859 if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
860 break;
861 }
862
863 errors_.resize(errorCount);
864 return false;
865}
866
867bool
869 std::string const& message,
870 Token& token,
871 TokenType skipUntilToken)
872{
873 addError(message, token);
874 return recoverFromError(skipUntilToken);
875}
876
877Value&
879{
880 return *(nodes_.top());
881}
882
885{
886 if (current_ == end_)
887 return 0;
888
889 return *current_++;
890}
891
892void
893Reader::getLocationLineAndColumn(Location location, int& line, int& column)
894 const
895{
896 Location current = begin_;
897 Location lastLineStart = current;
898 line = 0;
899
900 while (current < location && current != end_)
901 {
902 Char c = *current++;
903
904 if (c == '\r')
905 {
906 if (*current == '\n')
907 ++current;
908
909 lastLineStart = current;
910 ++line;
911 }
912 else if (c == '\n')
913 {
914 lastLineStart = current;
915 ++line;
916 }
917 }
918
919 // column & line start at 1
920 column = int(location - lastLineStart) + 1;
921 ++line;
922}
923
926{
927 int line, column;
928 getLocationLineAndColumn(location, line, column);
929 return "Line " + std::to_string(line) + ", Column " +
930 std::to_string(column);
931}
932
935{
936 std::string formattedMessage;
937
938 for (Errors::const_iterator itError = errors_.begin();
939 itError != errors_.end();
940 ++itError)
941 {
942 ErrorInfo const& error = *itError;
943 formattedMessage +=
944 "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
945 formattedMessage += " " + error.message_ + "\n";
946
947 if (error.extra_)
948 formattedMessage += "See " +
949 getLocationLineAndColumn(error.extra_) + " for detail.\n";
950 }
951
952 return formattedMessage;
953}
954
957{
958 Json::Reader reader;
959 bool ok = reader.parse(sin, root);
960
961 // XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
962 if (!ok)
963 ripple::Throw<std::runtime_error>(reader.getFormatedErrorMessages());
964
965 return sin;
966}
967
968} // namespace Json
T begin(T... args)
T c_str(T... args)
Unserialize a JSON document into a Value.
Definition: json_reader.h:39
Errors errors_
Definition: json_reader.h:203
void skipCommentTokens(Token &token)
bool addErrorAndRecover(std::string const &message, Token &token, TokenType skipUntilToken)
bool decodeDouble(Token &token)
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
Location current_
Definition: json_reader.h:207
Location begin_
Definition: json_reader.h:205
bool match(Location pattern, int patternLength)
void skipSpaces()
bool expectToken(TokenType type, Token &token, char const *message)
bool decodeNumber(Token &token)
Char const * Location
Definition: json_reader.h:42
bool decodeUnicodeEscapeSequence(Token &token, Location &current, Location end, unsigned int &unicode)
Location lastValueEnd_
Definition: json_reader.h:208
bool readString()
bool readCppStyleComment()
Location end_
Definition: json_reader.h:206
static constexpr unsigned nest_limit
Definition: json_reader.h:92
bool readToken(Token &token)
bool readValue(unsigned depth)
Value & currentValue()
Value * lastValue_
Definition: json_reader.h:209
void getLocationLineAndColumn(Location location, int &line, int &column) const
bool recoverFromError(TokenType skipUntilToken)
bool parse(std::string const &document, Value &root)
Read a Value from a JSON document.
Definition: json_reader.cpp:78
bool decodeUnicodeCodePoint(Token &token, Location &current, Location end, unsigned int &unicode)
@ tokenMemberSeparator
Definition: json_reader.h:108
bool decodeString(Token &token)
bool readArray(Token &token, unsigned depth)
Char getNextChar()
bool addError(std::string const &message, Token &token, Location extra=0)
bool readComment()
bool readCStyleComment()
Reader::TokenType readNumber()
bool readObject(Token &token, unsigned depth)
std::string document_
Definition: json_reader.h:204
Represents a JSON value.
Definition: json_value.h:149
Json::UInt UInt
Definition: json_value.h:156
static UInt const maxUInt
Definition: json_value.h:163
Json::Int Int
Definition: json_value.h:157
static Int const maxInt
Definition: json_value.h:162
T clear(T... args)
T empty(T... args)
T end(T... args)
T find(T... args)
T getline(T... args)
JSON (JavaScript Object Notation).
Definition: json_errors.h:25
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
@ arrayValue
array value (ordered list)
Definition: json_value.h:44
@ objectValue
object value (collection of name/value pairs).
Definition: json_value.h:45
static std::string codePointToUTF8(unsigned int cp)
Definition: json_reader.cpp:38
T pop(T... args)
T push_back(T... args)
T push(T... args)
T reserve(T... args)
T resize(T... args)
T length(T... args)
T to_string(T... args)
T top(T... args)