rippled/modules/ripple_basics/json/json_reader.cpp

//------------------------------------------------------------------------------
/*
    Copyright (c) 2011-2013, OpenCoin, Inc.
*/
//==============================================================================

namespace Json
{

// Implementation of class Features
// ////////////////////////////////

Features::Features ()
    : allowComments_ ( true )
    , strictRoot_ ( false )
{
}


Features
Features::all ()
{
    return Features ();
}


Features
Features::strictMode ()
{
    Features features;
    features.allowComments_ = false;
    features.strictRoot_ = true;
    return features;
}

// Implementation of class Reader
// ////////////////////////////////


static inline bool
in ( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
{
    return c == c1  ||  c == c2  ||  c == c3  ||  c == c4;
}

static inline bool
in ( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
{
    return c == c1  ||  c == c2  ||  c == c3  ||  c == c4  ||  c == c5;
}


static bool
containsNewLine ( Reader::Location begin,
                  Reader::Location end )
{
    for ( ; begin < end; ++begin )
        if ( *begin == '\n'  ||  *begin == '\r' )
            return true;

    return false;
}

static std::string codePointToUTF8 (unsigned int cp)
{
    std::string result;

    // based on description from http://en.wikipedia.org/wiki/UTF-8

    if (cp <= 0x7f)
    {
        result.resize (1);
        result[0] = static_cast<char> (cp);
    }
    else if (cp <= 0x7FF)
    {
        result.resize (2);
        result[1] = static_cast<char> (0x80 | (0x3f & cp));
        result[0] = static_cast<char> (0xC0 | (0x1f & (cp >> 6)));
    }
    else if (cp <= 0xFFFF)
    {
        result.resize (3);
        result[2] = static_cast<char> (0x80 | (0x3f & cp));
        result[1] = 0x80 | static_cast<char> ((0x3f & (cp >> 6)));
        result[0] = 0xE0 | static_cast<char> ((0xf & (cp >> 12)));
    }
    else if (cp <= 0x10FFFF)
    {
        result.resize (4);
        result[3] = static_cast<char> (0x80 | (0x3f & cp));
        result[2] = static_cast<char> (0x80 | (0x3f & (cp >> 6)));
        result[1] = static_cast<char> (0x80 | (0x3f & (cp >> 12)));
        result[0] = static_cast<char> (0xF0 | (0x7 & (cp >> 18)));
    }

    return result;
}


// Class Reader
// //////////////////////////////////////////////////////////////////

Reader::Reader ()
    : features_ ( Features::all () )
{
}


Reader::Reader ( const Features& features )
    : features_ ( features )
{
}


bool
Reader::parse ( const std::string& document,
                Value& root,
                bool collectComments )
{
    document_ = document;
    const char* begin = document_.c_str ();
    const char* end = begin + document_.length ();
    return parse ( begin, end, root, collectComments );
}


bool
Reader::parse ( std::istream& sin,
                Value& root,
                bool collectComments )
{
    //std::istream_iterator<char> begin(sin);
    //std::istream_iterator<char> end;
    // Those would allow streamed input from a file, if parse() were a
    // template function.

    // Since std::string is reference-counted, this at least does not
    // create an extra copy.
    std::string doc;
    std::getline (sin, doc, (char)EOF);
    return parse ( doc, root, collectComments );
}

bool
Reader::parse ( const char* beginDoc, const char* endDoc,
                Value& root,
                bool collectComments )
{
    if ( !features_.allowComments_ )
    {
        collectComments = false;
    }

    begin_ = beginDoc;
    end_ = endDoc;
    collectComments_ = collectComments;
    current_ = begin_;
    lastValueEnd_ = 0;
    lastValue_ = 0;
    commentsBefore_ = "";
    errors_.clear ();

    while ( !nodes_.empty () )
        nodes_.pop ();

    nodes_.push ( &root );

    bool successful = readValue ();
    Token token;
    skipCommentTokens ( token );

    if ( collectComments_  &&  !commentsBefore_.empty () )
        root.setComment ( commentsBefore_, commentAfter );

    if ( features_.strictRoot_ )
    {
        if ( !root.isArray ()  &&  !root.isObject () )
        {
            // Set error location to start of doc, ideally should be first token found in doc
            token.type_ = tokenError;
            token.start_ = beginDoc;
            token.end_ = endDoc;
            addError ( "A valid JSON document must be either an array or an object value.",
                       token );
            return false;
        }
    }

    return successful;
}


bool
Reader::readValue ()
{
    Token token;
    skipCommentTokens ( token );
    bool successful = true;

    if ( collectComments_  &&  !commentsBefore_.empty () )
    {
        currentValue ().setComment ( commentsBefore_, commentBefore );
        commentsBefore_ = "";
    }


    switch ( token.type_ )
    {
    case tokenObjectBegin:
        successful = readObject ( token );
        break;

    case tokenArrayBegin:
        successful = readArray ( token );
        break;

    case tokenNumber:
        successful = decodeNumber ( token );
        break;

    case tokenString:
        successful = decodeString ( token );
        break;

    case tokenTrue:
        currentValue () = true;
        break;

    case tokenFalse:
        currentValue () = false;
        break;

    case tokenNull:
        currentValue () = Value ();
        break;

    default:
        return addError ( "Syntax error: value, object or array expected.", token );
    }

    if ( collectComments_ )
    {
        lastValueEnd_ = current_;
        lastValue_ = &currentValue ();
    }

    return successful;
}


void
Reader::skipCommentTokens ( Token& token )
{
    if ( features_.allowComments_ )
    {
        do
        {
            readToken ( token );
        }
        while ( token.type_ == tokenComment );
    }
    else
    {
        readToken ( token );
    }
}


bool
Reader::expectToken ( TokenType type, Token& token, const char* message )
{
    readToken ( token );

    if ( token.type_ != type )
        return addError ( message, token );

    return true;
}


bool
Reader::readToken ( Token& token )
{
    skipSpaces ();
    token.start_ = current_;
    Char c = getNextChar ();
    bool ok = true;

    switch ( c )
    {
    case '{':
        token.type_ = tokenObjectBegin;
        break;

    case '}':
        token.type_ = tokenObjectEnd;
        break;

    case '[':
        token.type_ = tokenArrayBegin;
        break;

    case ']':
        token.type_ = tokenArrayEnd;
        break;

    case '"':
        token.type_ = tokenString;
        ok = readString ();
        break;

    case '/':
        token.type_ = tokenComment;
        ok = readComment ();
        break;

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case '-':
        token.type_ = tokenNumber;
        readNumber ();
        break;

    case 't':
        token.type_ = tokenTrue;
        ok = match ( "rue", 3 );
        break;

    case 'f':
        token.type_ = tokenFalse;
        ok = match ( "alse", 4 );
        break;

    case 'n':
        token.type_ = tokenNull;
        ok = match ( "ull", 3 );
        break;

    case ',':
        token.type_ = tokenArraySeparator;
        break;

    case ':':
        token.type_ = tokenMemberSeparator;
        break;

    case 0:
        token.type_ = tokenEndOfStream;
        break;

    default:
        ok = false;
        break;
    }

    if ( !ok )
        token.type_ = tokenError;

    token.end_ = current_;
    return true;
}


void
Reader::skipSpaces ()
{
    while ( current_ != end_ )
    {
        Char c = *current_;

        if ( c == ' '  ||  c == '\t'  ||  c == '\r'  ||  c == '\n' )
            ++current_;
        else
            break;
    }
}


bool
Reader::match ( Location pattern,
                int patternLength )
{
    if ( end_ - current_ < patternLength )
        return false;

    int index = patternLength;

    while ( index-- )
        if ( current_[index] != pattern[index] )
            return false;

    current_ += patternLength;
    return true;
}


bool
Reader::readComment ()
{
    Location commentBegin = current_ - 1;
    Char c = getNextChar ();
    bool successful = false;

    if ( c == '*' )
        successful = readCStyleComment ();
    else if ( c == '/' )
        successful = readCppStyleComment ();

    if ( !successful )
        return false;

    if ( collectComments_ )
    {
        CommentPlacement placement = commentBefore;

        if ( lastValueEnd_  &&  !containsNewLine ( lastValueEnd_, commentBegin ) )
        {
            if ( c != '*'  ||  !containsNewLine ( commentBegin, current_ ) )
                placement = commentAfterOnSameLine;
        }

        addComment ( commentBegin, current_, placement );
    }

    return true;
}


void
Reader::addComment ( Location begin,
                     Location end,
                     CommentPlacement placement )
{
    assert ( collectComments_ );

    if ( placement == commentAfterOnSameLine )
    {
        assert ( lastValue_ != 0 );
        lastValue_->setComment ( std::string ( begin, end ), placement );
    }
    else
    {
        if ( !commentsBefore_.empty () )
            commentsBefore_ += "\n";

        commentsBefore_ += std::string ( begin, end );
    }
}


bool
Reader::readCStyleComment ()
{
    while ( current_ != end_ )
    {
        Char c = getNextChar ();

        if ( c == '*'  &&  *current_ == '/' )
            break;
    }

    return getNextChar () == '/';
}


bool
Reader::readCppStyleComment ()
{
    while ( current_ != end_ )
    {
        Char c = getNextChar ();

        if (  c == '\r'  ||  c == '\n' )
            break;
    }

    return true;
}


void
Reader::readNumber ()
{
    while ( current_ != end_ )
    {
        if ( ! (*current_ >= '0'  &&  *current_ <= '9')  &&
                !in ( *current_, '.', 'e', 'E', '+', '-' ) )
            break;

        ++current_;
    }
}

bool
Reader::readString ()
{
    Char c = 0;

    while ( current_ != end_ )
    {
        c = getNextChar ();

        if ( c == '\\' )
            getNextChar ();
        else if ( c == '"' )
            break;
    }

    return c == '"';
}


bool
Reader::readObject ( Token& tokenStart )
{
    Token tokenName;
    std::string name;
    currentValue () = Value ( objectValue );

    while ( readToken ( tokenName ) )
    {
        bool initialTokenOk = true;

        while ( tokenName.type_ == tokenComment  &&  initialTokenOk )
            initialTokenOk = readToken ( tokenName );

        if  ( !initialTokenOk )
            break;

        if ( tokenName.type_ == tokenObjectEnd  &&  name.empty () ) // empty object
            return true;

        if ( tokenName.type_ != tokenString )
            break;

        name = "";

        if ( !decodeString ( tokenName, name ) )
            return recoverFromError ( tokenObjectEnd );

        Token colon;

        if ( !readToken ( colon ) ||  colon.type_ != tokenMemberSeparator )
        {
            return addErrorAndRecover ( "Missing ':' after object member name",
                                        colon,
                                        tokenObjectEnd );
        }

        // Reject duplicate names
        if (currentValue ().isMember (name))
            return addError ( "Key '" + name + "' appears twice.", tokenName );

        Value& value = currentValue ()[ name ];
        nodes_.push ( &value );
        bool ok = readValue ();
        nodes_.pop ();

        if ( !ok ) // error already set
            return recoverFromError ( tokenObjectEnd );

        Token comma;

        if ( !readToken ( comma )
                ||  ( comma.type_ != tokenObjectEnd  &&
                      comma.type_ != tokenArraySeparator &&
                      comma.type_ != tokenComment ) )
        {
            return addErrorAndRecover ( "Missing ',' or '}' in object declaration",
                                        comma,
                                        tokenObjectEnd );
        }

        bool finalizeTokenOk = true;

        while ( comma.type_ == tokenComment &&
                finalizeTokenOk )
            finalizeTokenOk = readToken ( comma );

        if ( comma.type_ == tokenObjectEnd )
            return true;
    }

    return addErrorAndRecover ( "Missing '}' or object member name",
                                tokenName,
                                tokenObjectEnd );
}


bool
Reader::readArray ( Token& tokenStart )
{
    currentValue () = Value ( arrayValue );
    skipSpaces ();

    if ( *current_ == ']' ) // empty array
    {
        Token endArray;
        readToken ( endArray );
        return true;
    }

    int index = 0;

    while ( true )
    {
        Value& value = currentValue ()[ index++ ];
        nodes_.push ( &value );
        bool ok = readValue ();
        nodes_.pop ();

        if ( !ok ) // error already set
            return recoverFromError ( tokenArrayEnd );

        Token token;
        // Accept Comment after last item in the array.
        ok = readToken ( token );

        while ( token.type_ == tokenComment  &&  ok )
        {
            ok = readToken ( token );
        }

        bool badTokenType = ( token.type_ == tokenArraySeparator  &&
                              token.type_ == tokenArrayEnd );

        if ( !ok  ||  badTokenType )
        {
            return addErrorAndRecover ( "Missing ',' or ']' in array declaration",
                                        token,
                                        tokenArrayEnd );
        }

        if ( token.type_ == tokenArrayEnd )
            break;
    }

    return true;
}


bool
Reader::decodeNumber ( Token& token )
{
    bool isDouble = false;

    for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
    {
        isDouble = isDouble
                   ||  in ( *inspect, '.', 'e', 'E', '+' )
                   ||  ( *inspect == '-'  &&  inspect != token.start_ );
    }

    if ( isDouble )
        return decodeDouble ( token );

    Location current = token.start_;
    bool isNegative = *current == '-';

    if ( isNegative )
        ++current;

    Value::UInt threshold = (isNegative ? Value::UInt (-Value::minInt)
                             : Value::maxUInt) / 10;
    Value::UInt value = 0;

    while ( current < token.end_ )
    {
        Char c = *current++;

        if ( c < '0'  ||  c > '9' )
            return addError ( "'" + std::string ( token.start_, token.end_ ) + "' is not a number.", token );

        if ( value >= threshold )
            return decodeDouble ( token );

        value = value * 10 + Value::UInt (c - '0');
    }

    if ( isNegative )
        currentValue () = -Value::Int ( value );
    else if ( value <= Value::UInt (Value::maxInt) )
        currentValue () = Value::Int ( value );
    else
        currentValue () = value;

    return true;
}


bool
Reader::decodeDouble ( Token& token )
{
    double value = 0;
    const int bufferSize = 32;
    int count;
    int length = int (token.end_ - token.start_);

    if ( length <= bufferSize )
    {
        Char buffer[bufferSize];
        memcpy ( buffer, token.start_, length );
        buffer[length] = 0;
        count = sscanf ( buffer, "%lf", &value );
    }
    else
    {
        std::string buffer ( token.start_, token.end_ );
        count = sscanf ( buffer.c_str (), "%lf", &value );
    }

    if ( count != 1 )
        return addError ( "'" + std::string ( token.start_, token.end_ ) + "' is not a number.", token );

    currentValue () = value;
    return true;
}


bool
Reader::decodeString ( Token& token )
{
    std::string decoded;

    if ( !decodeString ( token, decoded ) )
        return false;

    currentValue () = decoded;
    return true;
}


bool
Reader::decodeString ( Token& token, std::string& decoded )
{
    decoded.reserve ( token.end_ - token.start_ - 2 );
    Location current = token.start_ + 1; // skip '"'
    Location end = token.end_ - 1;      // do not include '"'

    while ( current != end )
    {
        Char c = *current++;

        if ( c == '"' )
            break;
        else if ( c == '\\' )
        {
            if ( current == end )
                return addError ( "Empty escape sequence in string", token, current );

            Char escape = *current++;

            switch ( escape )
            {
            case '"':
                decoded += '"';
                break;

            case '/':
                decoded += '/';
                break;

            case '\\':
                decoded += '\\';
                break;

            case 'b':
                decoded += '\b';
                break;

            case 'f':
                decoded += '\f';
                break;

            case 'n':
                decoded += '\n';
                break;

            case 'r':
                decoded += '\r';
                break;

            case 't':
                decoded += '\t';
                break;

            case 'u':
            {
                unsigned int unicode;

                if ( !decodeUnicodeCodePoint ( token, current, end, unicode ) )
                    return false;

                decoded += codePointToUTF8 (unicode);
            }
            break;

            default:
                return addError ( "Bad escape sequence in string", token, current );
            }
        }
        else
        {
            decoded += c;
        }
    }

    return true;
}

bool
Reader::decodeUnicodeCodePoint ( Token& token,
                                 Location& current,
                                 Location end,
                                 unsigned int& unicode )
{

    if ( !decodeUnicodeEscapeSequence ( token, current, end, unicode ) )
        return false;

    if (unicode >= 0xD800 && unicode <= 0xDBFF)
    {
        // surrogate pairs
        if (end - current < 6)
            return addError ( "additional six characters expected to parse unicode surrogate pair.", token, current );

        unsigned int surrogatePair;

        if (* (current++) == '\\' && * (current++) == 'u')
        {
            if (decodeUnicodeEscapeSequence ( token, current, end, surrogatePair ))
            {
                unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
            }
            else
                return false;
        }
        else
            return addError ( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
    }

    return true;
}

bool
Reader::decodeUnicodeEscapeSequence ( Token& token,
                                      Location& current,
                                      Location end,
                                      unsigned int& unicode )
{
    if ( end - current < 4 )
        return addError ( "Bad unicode escape sequence in string: four digits expected.", token, current );

    unicode = 0;

    for ( int index = 0; index < 4; ++index )
    {
        Char c = *current++;
        unicode *= 16;

        if ( c >= '0'  &&  c <= '9' )
            unicode += c - '0';
        else if ( c >= 'a'  &&  c <= 'f' )
            unicode += c - 'a' + 10;
        else if ( c >= 'A'  &&  c <= 'F' )
            unicode += c - 'A' + 10;
        else
            return addError ( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
    }

    return true;
}


bool
Reader::addError ( const std::string& message,
                   Token& token,
                   Location extra )
{
    ErrorInfo info;
    info.token_ = token;
    info.message_ = message;
    info.extra_ = extra;
    errors_.push_back ( info );
    return false;
}


bool
Reader::recoverFromError ( TokenType skipUntilToken )
{
    int errorCount = int (errors_.size ());
    Token skip;

    while ( true )
    {
        if ( !readToken (skip) )
            errors_.resize ( errorCount ); // discard errors caused by recovery

        if ( skip.type_ == skipUntilToken  ||  skip.type_ == tokenEndOfStream )
            break;
    }

    errors_.resize ( errorCount );
    return false;
}


bool
Reader::addErrorAndRecover ( const std::string& message,
                             Token& token,
                             TokenType skipUntilToken )
{
    addError ( message, token );
    return recoverFromError ( skipUntilToken );
}


Value&
Reader::currentValue ()
{
    return * (nodes_.top ());
}


Reader::Char
Reader::getNextChar ()
{
    if ( current_ == end_ )
        return 0;

    return *current_++;
}


void
Reader::getLocationLineAndColumn ( Location location,
                                   int& line,
                                   int& column ) const
{
    Location current = begin_;
    Location lastLineStart = current;
    line = 0;

    while ( current < location  &&  current != end_ )
    {
        Char c = *current++;

        if ( c == '\r' )
        {
            if ( *current == '\n' )
                ++current;

            lastLineStart = current;
            ++line;
        }
        else if ( c == '\n' )
        {
            lastLineStart = current;
            ++line;
        }
    }

    // column & line start at 1
    column = int (location - lastLineStart) + 1;
    ++line;
}


std::string
Reader::getLocationLineAndColumn ( Location location ) const
{
    int line, column;
    getLocationLineAndColumn ( location, line, column );
    char buffer[18 + 16 + 16 + 1];
    sprintf ( buffer, "Line %d, Column %d", line, column );
    return buffer;
}


std::string
Reader::getFormatedErrorMessages () const
{
    std::string formattedMessage;

    for ( Errors::const_iterator itError = errors_.begin ();
            itError != errors_.end ();
            ++itError )
    {
        const ErrorInfo& error = *itError;
        formattedMessage += "* " + getLocationLineAndColumn ( error.token_.start_ ) + "\n";
        formattedMessage += "  " + error.message_ + "\n";

        if ( error.extra_ )
            formattedMessage += "See " + getLocationLineAndColumn ( error.extra_ ) + " for detail.\n";
    }

    return formattedMessage;
}


std::istream& operator>> ( std::istream& sin, Value& root )
{
    Json::Reader reader;
    bool ok = reader.parse (sin, root, true);

    //JSON_ASSERT( ok );
    if (!ok) throw std::runtime_error (reader.getFormatedErrorMessages ());

    return sin;
}


} // namespace Json