Files
rippled/src/libxrpl/json/json_reader.cpp
2026-02-19 23:30:00 +00:00

910 lines
20 KiB
C++

#include <xrpl/basics/contract.h>
#include <xrpl/json/json_reader.h>
#include <xrpl/json/json_value.h>
#include <algorithm>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <istream>
#include <stdexcept>
#include <string>
namespace Json {
// Implementation of class Reader
// ////////////////////////////////
static std::string
codePointToUTF8(unsigned int cp)
{
std::string result;
// based on description from http://en.wikipedia.org/wiki/UTF-8
if (cp <= 0x7f)
{
result.resize(1);
result[0] = static_cast<char>(cp);
}
else if (cp <= 0x7FF)
{
result.resize(2);
result[1] = static_cast<char>(0x80 | (0x3f & cp));
result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
}
else if (cp <= 0xFFFF)
{
result.resize(3);
result[2] = static_cast<char>(0x80 | (0x3f & cp));
result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
}
else if (cp <= 0x10FFFF)
{
result.resize(4);
result[3] = static_cast<char>(0x80 | (0x3f & cp));
result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
}
return result;
}
// Class Reader
// //////////////////////////////////////////////////////////////////
bool
Reader::parse(std::string const& document, Value& root)
{
document_ = document;
char const* begin = document_.c_str();
char const* end = begin + document_.length();
return parse(begin, end, root);
}
bool
Reader::parse(std::istream& sin, Value& root)
{
// std::istream_iterator<char> begin(sin);
// std::istream_iterator<char> end;
// Those would allow streamed input from a file, if parse() were a
// template function.
// Since std::string is reference-counted, this at least does not
// create an extra copy.
std::string doc;
std::getline(sin, doc, (char)EOF);
return parse(doc, root);
}
bool
Reader::parse(char const* beginDoc, char const* endDoc, Value& root)
{
begin_ = beginDoc;
end_ = endDoc;
current_ = begin_;
lastValueEnd_ = 0;
lastValue_ = 0;
errors_.clear();
while (!nodes_.empty())
nodes_.pop();
nodes_.push(&root);
bool successful = readValue(0);
Token token;
skipCommentTokens(token);
if (!root.isNull() && !root.isArray() && !root.isObject())
{
// Set error location to start of doc, ideally should be first token
// found in doc
token.type_ = tokenError;
token.start_ = beginDoc;
token.end_ = endDoc;
addError("A valid JSON document must be either an array or an object value.", token);
return false;
}
return successful;
}
bool
Reader::readValue(unsigned depth)
{
Token token;
skipCommentTokens(token);
if (depth > nest_limit)
return addError("Syntax error: maximum nesting depth exceeded", token);
bool successful = true;
switch (token.type_)
{
case tokenObjectBegin:
successful = readObject(token, depth);
break;
case tokenArrayBegin:
successful = readArray(token, depth);
break;
case tokenInteger:
successful = decodeNumber(token);
break;
case tokenDouble:
successful = decodeDouble(token);
break;
case tokenString:
successful = decodeString(token);
break;
case tokenTrue:
currentValue() = true;
break;
case tokenFalse:
currentValue() = false;
break;
case tokenNull:
currentValue() = Value();
break;
default:
return addError("Syntax error: value, object or array expected.", token);
}
return successful;
}
void
Reader::skipCommentTokens(Token& token)
{
do
{
readToken(token);
} while (token.type_ == tokenComment);
}
bool
Reader::expectToken(TokenType type, Token& token, char const* message)
{
readToken(token);
if (token.type_ != type)
return addError(message, token);
return true;
}
bool
Reader::readToken(Token& token)
{
skipSpaces();
token.start_ = current_;
Char c = getNextChar();
bool ok = true;
switch (c)
{
case '{':
token.type_ = tokenObjectBegin;
break;
case '}':
token.type_ = tokenObjectEnd;
break;
case '[':
token.type_ = tokenArrayBegin;
break;
case ']':
token.type_ = tokenArrayEnd;
break;
case '"':
token.type_ = tokenString;
ok = readString();
break;
case '/':
token.type_ = tokenComment;
ok = readComment();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '-':
token.type_ = readNumber();
break;
case 't':
token.type_ = tokenTrue;
ok = match("rue", 3);
break;
case 'f':
token.type_ = tokenFalse;
ok = match("alse", 4); // cspell:disable-line
break;
case 'n':
token.type_ = tokenNull;
ok = match("ull", 3);
break;
case ',':
token.type_ = tokenArraySeparator;
break;
case ':':
token.type_ = tokenMemberSeparator;
break;
case 0:
token.type_ = tokenEndOfStream;
break;
default:
ok = false;
break;
}
if (!ok)
token.type_ = tokenError;
token.end_ = current_;
return true;
}
void
Reader::skipSpaces()
{
while (current_ != end_)
{
Char c = *current_;
if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
++current_;
else
break;
}
}
bool
Reader::match(Location pattern, int patternLength)
{
if (end_ - current_ < patternLength)
return false;
int index = patternLength;
while (index--)
if (current_[index] != pattern[index])
return false;
current_ += patternLength;
return true;
}
bool
Reader::readComment()
{
Char c = getNextChar();
if (c == '*')
return readCStyleComment();
if (c == '/')
return readCppStyleComment();
return false;
}
bool
Reader::readCStyleComment()
{
while (current_ != end_)
{
Char c = getNextChar();
if (c == '*' && *current_ == '/')
break;
}
return getNextChar() == '/';
}
bool
Reader::readCppStyleComment()
{
while (current_ != end_)
{
Char c = getNextChar();
if (c == '\r' || c == '\n')
break;
}
return true;
}
Reader::TokenType
Reader::readNumber()
{
static char const extended_tokens[] = {'.', 'e', 'E', '+', '-'};
TokenType type = tokenInteger;
if (current_ != end_)
{
if (*current_ == '-')
++current_;
while (current_ != end_)
{
if (!std::isdigit(static_cast<unsigned char>(*current_)))
{
auto ret =
std::find(std::begin(extended_tokens), std::end(extended_tokens), *current_);
if (ret == std::end(extended_tokens))
break;
type = tokenDouble;
}
++current_;
}
}
return type;
}
bool
Reader::readString()
{
Char c = 0;
while (current_ != end_)
{
c = getNextChar();
if (c == '\\')
getNextChar();
else if (c == '"')
break;
}
return c == '"';
}
bool
Reader::readObject(Token& tokenStart, unsigned depth)
{
Token tokenName;
std::string name;
currentValue() = Value(objectValue);
while (readToken(tokenName))
{
bool initialTokenOk = true;
while (tokenName.type_ == tokenComment && initialTokenOk)
initialTokenOk = readToken(tokenName);
if (!initialTokenOk)
break;
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
return true;
if (tokenName.type_ != tokenString)
break;
name = "";
if (!decodeString(tokenName, name))
return recoverFromError(tokenObjectEnd);
Token colon;
if (!readToken(colon) || colon.type_ != tokenMemberSeparator)
{
return addErrorAndRecover(
"Missing ':' after object member name", colon, tokenObjectEnd);
}
// Reject duplicate names
if (currentValue().isMember(name))
return addError("Key '" + name + "' appears twice.", tokenName);
Value& value = currentValue()[name];
nodes_.push(&value);
bool ok = readValue(depth + 1);
nodes_.pop();
if (!ok) // error already set
return recoverFromError(tokenObjectEnd);
Token comma;
if (!readToken(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
comma.type_ != tokenComment))
{
return addErrorAndRecover(
"Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
}
bool finalizeTokenOk = true;
while (comma.type_ == tokenComment && finalizeTokenOk)
finalizeTokenOk = readToken(comma);
if (comma.type_ == tokenObjectEnd)
return true;
}
return addErrorAndRecover("Missing '}' or object member name", tokenName, tokenObjectEnd);
}
bool
Reader::readArray(Token& tokenStart, unsigned depth)
{
currentValue() = Value(arrayValue);
skipSpaces();
if (*current_ == ']') // empty array
{
Token endArray;
readToken(endArray);
return true;
}
int index = 0;
while (true)
{
Value& value = currentValue()[index++];
nodes_.push(&value);
bool ok = readValue(depth + 1);
nodes_.pop();
if (!ok) // error already set
return recoverFromError(tokenArrayEnd);
Token token;
// Accept Comment after last item in the array.
ok = readToken(token);
while (token.type_ == tokenComment && ok)
{
ok = readToken(token);
}
bool badTokenType = (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
if (!ok || badTokenType)
{
return addErrorAndRecover(
"Missing ',' or ']' in array declaration", token, tokenArrayEnd);
}
if (token.type_ == tokenArrayEnd)
break;
}
return true;
}
bool
Reader::decodeNumber(Token& token)
{
Location current = token.start_;
bool isNegative = *current == '-';
if (isNegative)
++current;
if (current == token.end_)
{
return addError(
"'" + std::string(token.start_, token.end_) + "' is not a valid number.", token);
}
// The existing Json integers are 32-bit so using a 64-bit value here avoids
// overflows in the conversion code below.
std::int64_t value = 0;
static_assert(
sizeof(value) > sizeof(Value::maxUInt),
"The JSON integer overflow logic will need to be reworked.");
while (current < token.end_ && (value <= Value::maxUInt))
{
Char c = *current++;
if (c < '0' || c > '9')
{
return addError(
"'" + std::string(token.start_, token.end_) + "' is not a number.", token);
}
value = (value * 10) + (c - '0');
}
// More tokens left -> input is larger than largest possible return value
if (current != token.end_)
{
return addError(
"'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.", token);
}
if (isNegative)
{
value = -value;
if (value < Value::minInt || value > Value::maxInt)
{
return addError(
"'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
token);
}
currentValue() = static_cast<Value::Int>(value);
}
else
{
if (value > Value::maxUInt)
{
return addError(
"'" + std::string(token.start_, token.end_) + "' exceeds the allowable range.",
token);
}
// If it's representable as a signed integer, construct it as one.
if (value <= Value::maxInt)
currentValue() = static_cast<Value::Int>(value);
else
currentValue() = static_cast<Value::UInt>(value);
}
return true;
}
bool
Reader::decodeDouble(Token& token)
{
double value = 0;
int const bufferSize = 32;
int count;
int length = int(token.end_ - token.start_);
// Sanity check to avoid buffer overflow exploits.
if (length < 0)
{
return addError("Unable to parse token length", token);
}
// Avoid using a string constant for the format control string given to
// sscanf, as this can cause hard to debug crashes on OS X. See here for
// more info:
//
// http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
char format[] = "%lf";
if (length <= bufferSize)
{
Char buffer[bufferSize + 1];
memcpy(buffer, token.start_, length);
buffer[length] = 0;
count = sscanf(buffer, format, &value);
}
else
{
std::string buffer(token.start_, token.end_);
count = sscanf(buffer.c_str(), format, &value);
}
if (count != 1)
return addError("'" + std::string(token.start_, token.end_) + "' is not a number.", token);
currentValue() = value;
return true;
}
bool
Reader::decodeString(Token& token)
{
std::string decoded;
if (!decodeString(token, decoded))
return false;
currentValue() = decoded;
return true;
}
bool
Reader::decodeString(Token& token, std::string& decoded)
{
decoded.reserve(token.end_ - token.start_ - 2);
Location current = token.start_ + 1; // skip '"'
Location end = token.end_ - 1; // do not include '"'
while (current != end)
{
Char c = *current++;
if (c == '"')
break;
else if (c == '\\')
{
if (current == end)
return addError("Empty escape sequence in string", token, current);
Char escape = *current++;
switch (escape)
{
case '"':
decoded += '"';
break;
case '/':
decoded += '/';
break;
case '\\':
decoded += '\\';
break;
case 'b':
decoded += '\b';
break;
case 'f':
decoded += '\f';
break;
case 'n':
decoded += '\n';
break;
case 'r':
decoded += '\r';
break;
case 't':
decoded += '\t';
break;
case 'u': {
unsigned int unicode;
if (!decodeUnicodeCodePoint(token, current, end, unicode))
return false;
decoded += codePointToUTF8(unicode);
}
break;
default:
return addError("Bad escape sequence in string", token, current);
}
}
else
{
decoded += c;
}
}
return true;
}
bool
Reader::decodeUnicodeCodePoint(Token& token, Location& current, Location end, unsigned int& unicode)
{
if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
return false;
if (unicode >= 0xD800 && unicode <= 0xDBFF)
{
// surrogate pairs
if (end - current < 6)
return addError(
"additional six characters expected to parse unicode surrogate "
"pair.",
token,
current);
unsigned int surrogatePair;
if (*(current++) == '\\' && *(current++) == 'u')
{
if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair))
{
unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
}
else
return false;
}
else
return addError(
"expecting another \\u token to begin the second half of a "
"unicode surrogate pair",
token,
current);
}
return true;
}
bool
Reader::decodeUnicodeEscapeSequence(
Token& token,
Location& current,
Location end,
unsigned int& unicode)
{
if (end - current < 4)
return addError(
"Bad unicode escape sequence in string: four digits expected.", token, current);
unicode = 0;
for (int index = 0; index < 4; ++index)
{
Char c = *current++;
unicode *= 16;
if (c >= '0' && c <= '9')
unicode += c - '0';
else if (c >= 'a' && c <= 'f')
unicode += c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
unicode += c - 'A' + 10;
else
return addError(
"Bad unicode escape sequence in string: hexadecimal digit "
"expected.",
token,
current);
}
return true;
}
bool
Reader::addError(std::string const& message, Token& token, Location extra)
{
ErrorInfo info;
info.token_ = token;
info.message_ = message;
info.extra_ = extra;
errors_.push_back(info);
return false;
}
bool
Reader::recoverFromError(TokenType skipUntilToken)
{
int errorCount = int(errors_.size());
Token skip;
while (true)
{
if (!readToken(skip))
errors_.resize(errorCount); // discard errors caused by recovery
if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
break;
}
errors_.resize(errorCount);
return false;
}
bool
Reader::addErrorAndRecover(std::string const& message, Token& token, TokenType skipUntilToken)
{
addError(message, token);
return recoverFromError(skipUntilToken);
}
Value&
Reader::currentValue()
{
return *(nodes_.top());
}
Reader::Char
Reader::getNextChar()
{
if (current_ == end_)
return 0;
return *current_++;
}
void
Reader::getLocationLineAndColumn(Location location, int& line, int& column) const
{
Location current = begin_;
Location lastLineStart = current;
line = 0;
while (current < location && current != end_)
{
Char c = *current++;
if (c == '\r')
{
if (*current == '\n')
++current;
lastLineStart = current;
++line;
}
else if (c == '\n')
{
lastLineStart = current;
++line;
}
}
// column & line start at 1
column = int(location - lastLineStart) + 1;
++line;
}
std::string
Reader::getLocationLineAndColumn(Location location) const
{
int line, column;
getLocationLineAndColumn(location, line, column);
return "Line " + std::to_string(line) + ", Column " + std::to_string(column);
}
std::string
Reader::getFormattedErrorMessages() const
{
std::string formattedMessage;
for (Errors::const_iterator itError = errors_.begin(); itError != errors_.end(); ++itError)
{
ErrorInfo const& error = *itError;
formattedMessage += "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
formattedMessage += " " + error.message_ + "\n";
if (error.extra_)
formattedMessage += "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
}
return formattedMessage;
}
std::istream&
operator>>(std::istream& sin, Value& root)
{
Json::Reader reader;
bool ok = reader.parse(sin, root);
// XRPL_ASSERT(ok, "Json::operator>>() : parse succeeded");
if (!ok)
xrpl::Throw<std::runtime_error>(reader.getFormattedErrorMessages());
return sin;
}
} // namespace Json