mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
General beast update, fixes, optimizations, features:
* Clean ups, optimizations, and new File::commonDocumentsDirectory enum * Replace sortArray with std::sort for performance * More error tolerance in XML parser, speedups * Refactor some byte-order mark detection code * Add String::appendCharPointer overloads * More XML parser optimisations and better error detection * Misc performance tweaks * Fixes for support of non utf8 strings * Increased precision when storing strings in XmlElement * Minor clean-ups * Minor fix to XmlDocument * Cleanups to CriticalSection and related synchronization primitives * Fix DynamicArray unit test
This commit is contained in:
@@ -24,12 +24,18 @@
|
||||
XmlDocument::XmlDocument (const String& documentText)
|
||||
: originalText (documentText),
|
||||
input (nullptr),
|
||||
outOfData (false),
|
||||
errorOccurred (false),
|
||||
needToLoadDTD (false),
|
||||
ignoreEmptyTextElements (true)
|
||||
{
|
||||
}
|
||||
|
||||
XmlDocument::XmlDocument (const File& file)
|
||||
: input (nullptr),
|
||||
outOfData (false),
|
||||
errorOccurred (false),
|
||||
needToLoadDTD (false),
|
||||
ignoreEmptyTextElements (true),
|
||||
inputSource (new FileInputSource (file))
|
||||
{
|
||||
@@ -77,68 +83,69 @@ namespace XmlIdentifierChars
|
||||
: isIdentifierCharSlow (c);
|
||||
}
|
||||
|
||||
/*static void generateIdentifierCharConstants()
|
||||
/*
|
||||
static void generateIdentifierCharConstants()
|
||||
{
|
||||
uint32 n[8] = { 0 };
|
||||
for (int i = 0; i < 256; ++i)
|
||||
if (isIdentifierCharSlow (i))
|
||||
n[i >> 5] |= (1 << (i & 31));
|
||||
if (isIdentifierCharSlow (i))
|
||||
n[i >> 5] |= (1 << (i & 31));
|
||||
|
||||
String s;
|
||||
for (int i = 0; i < 8; ++i)
|
||||
s << "0x" << String::toHexString ((int) n[i]) << ", ";
|
||||
|
||||
DBG (s);
|
||||
}*/
|
||||
}
|
||||
*/
|
||||
|
||||
static String::CharPointerType findEndOfToken (String::CharPointerType p)
|
||||
{
|
||||
while (isIdentifierChar (*p))
|
||||
++p;
|
||||
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
|
||||
{
|
||||
String textToParse (originalText);
|
||||
|
||||
if (textToParse.isEmpty() && inputSource != nullptr)
|
||||
if (originalText.isEmpty() && inputSource != nullptr)
|
||||
{
|
||||
ScopedPointer <InputStream> in (inputSource->createInputStream());
|
||||
ScopedPointer<InputStream> in (inputSource->createInputStream());
|
||||
|
||||
if (in != nullptr)
|
||||
{
|
||||
MemoryOutputStream data;
|
||||
data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
|
||||
textToParse = data.toString();
|
||||
|
||||
if (! onlyReadOuterDocumentElement)
|
||||
originalText = textToParse;
|
||||
#if BEAST_STRING_UTF_TYPE == 8
|
||||
if (data.getDataSize() > 2)
|
||||
{
|
||||
data.writeByte (0);
|
||||
const char* text = static_cast<const char*> (data.getData());
|
||||
|
||||
if (CharPointer_UTF16::isByteOrderMarkBigEndian (text)
|
||||
|| CharPointer_UTF16::isByteOrderMarkLittleEndian (text))
|
||||
{
|
||||
originalText = data.toString();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (CharPointer_UTF8::isByteOrderMark (text))
|
||||
text += 3;
|
||||
|
||||
// parse the input buffer directly to avoid copying it all to a string..
|
||||
return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
|
||||
}
|
||||
}
|
||||
#else
|
||||
originalText = data.toString();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
input = textToParse.getCharPointer();
|
||||
lastError = String::empty;
|
||||
errorOccurred = false;
|
||||
outOfData = false;
|
||||
needToLoadDTD = true;
|
||||
|
||||
if (textToParse.isEmpty())
|
||||
{
|
||||
lastError = "not enough input";
|
||||
}
|
||||
else
|
||||
{
|
||||
skipHeader();
|
||||
|
||||
if (input.getAddress() != nullptr)
|
||||
{
|
||||
ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
|
||||
|
||||
if (! errorOccurred)
|
||||
return result.release();
|
||||
}
|
||||
else
|
||||
{
|
||||
lastError = "incorrect xml header";
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
|
||||
}
|
||||
|
||||
const String& XmlDocument::getLastParseError() const noexcept
|
||||
@@ -156,7 +163,7 @@ String XmlDocument::getFileContents (const String& filename) const
|
||||
{
|
||||
if (inputSource != nullptr)
|
||||
{
|
||||
const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
|
||||
const ScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
|
||||
|
||||
if (in != nullptr)
|
||||
return in->readEntireStreamAsString();
|
||||
@@ -178,33 +185,56 @@ beast_wchar XmlDocument::readNextChar() noexcept
|
||||
return c;
|
||||
}
|
||||
|
||||
int XmlDocument::findNextTokenLength() noexcept
|
||||
XmlElement* XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
|
||||
const bool onlyReadOuterDocumentElement)
|
||||
{
|
||||
int len = 0;
|
||||
beast_wchar c = *input;
|
||||
input = textToParse;
|
||||
errorOccurred = false;
|
||||
outOfData = false;
|
||||
needToLoadDTD = true;
|
||||
|
||||
while (XmlIdentifierChars::isIdentifierChar (c))
|
||||
c = input [++len];
|
||||
if (textToParse.isEmpty())
|
||||
{
|
||||
lastError = "not enough input";
|
||||
}
|
||||
else if (! parseHeader())
|
||||
{
|
||||
lastError = "malformed header";
|
||||
}
|
||||
else if (! parseDTD())
|
||||
{
|
||||
lastError = "malformed DTD";
|
||||
}
|
||||
else
|
||||
{
|
||||
lastError = String::empty;
|
||||
|
||||
return len;
|
||||
ScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
|
||||
|
||||
if (! errorOccurred)
|
||||
return result.release();
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void XmlDocument::skipHeader()
|
||||
bool XmlDocument::parseHeader()
|
||||
{
|
||||
const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
|
||||
skipNextWhiteSpace();
|
||||
|
||||
if (headerStart >= 0)
|
||||
if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0)
|
||||
{
|
||||
const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
|
||||
if (headerEnd < 0)
|
||||
return;
|
||||
const String::CharPointerType headerEnd (CharacterFunctions::find (input, CharPointer_ASCII ("?>")));
|
||||
|
||||
if (headerEnd.isEmpty())
|
||||
return false;
|
||||
|
||||
#if BEAST_DEBUG
|
||||
const String header (input + headerStart, (size_t) (headerEnd - headerStart));
|
||||
const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
|
||||
.fromFirstOccurrenceOf ("=", false, false)
|
||||
.fromFirstOccurrenceOf ("\"", false, false)
|
||||
.upToFirstOccurrenceOf ("\"", false, false).trim());
|
||||
const String encoding (String (input, headerEnd)
|
||||
.fromFirstOccurrenceOf ("encoding", false, true)
|
||||
.fromFirstOccurrenceOf ("=", false, false)
|
||||
.fromFirstOccurrenceOf ("\"", false, false)
|
||||
.upToFirstOccurrenceOf ("\"", false, false).trim());
|
||||
|
||||
/* If you load an XML document with a non-UTF encoding type, it may have been
|
||||
loaded wrongly.. Since all the files are read via the normal beast file streams,
|
||||
@@ -216,58 +246,59 @@ void XmlDocument::skipHeader()
|
||||
bassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
|
||||
#endif
|
||||
|
||||
input += headerEnd + 2;
|
||||
input = headerEnd + 2;
|
||||
skipNextWhiteSpace();
|
||||
}
|
||||
|
||||
skipNextWhiteSpace();
|
||||
return true;
|
||||
}
|
||||
|
||||
const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
|
||||
if (docTypeIndex < 0)
|
||||
return;
|
||||
|
||||
input += docTypeIndex + 9;
|
||||
const String::CharPointerType docType (input);
|
||||
|
||||
int n = 1;
|
||||
|
||||
while (n > 0)
|
||||
bool XmlDocument::parseDTD()
|
||||
{
|
||||
if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<!DOCTYPE"), 9) == 0)
|
||||
{
|
||||
const beast_wchar c = readNextChar();
|
||||
input += 9;
|
||||
const String::CharPointerType dtdStart (input);
|
||||
|
||||
if (outOfData)
|
||||
return;
|
||||
for (int n = 1; n > 0;)
|
||||
{
|
||||
const beast_wchar c = readNextChar();
|
||||
|
||||
if (c == '<')
|
||||
++n;
|
||||
else if (c == '>')
|
||||
--n;
|
||||
if (outOfData)
|
||||
return false;
|
||||
|
||||
if (c == '<')
|
||||
++n;
|
||||
else if (c == '>')
|
||||
--n;
|
||||
}
|
||||
|
||||
dtdText = String (dtdStart, input - 1).trim();
|
||||
}
|
||||
|
||||
dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
|
||||
return true;
|
||||
}
|
||||
|
||||
void XmlDocument::skipNextWhiteSpace()
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
beast_wchar c = *input;
|
||||
input = input.findEndOfWhitespace();
|
||||
|
||||
while (CharacterFunctions::isWhitespace (c))
|
||||
c = *++input;
|
||||
|
||||
if (c == 0)
|
||||
if (input.isEmpty())
|
||||
{
|
||||
outOfData = true;
|
||||
break;
|
||||
}
|
||||
else if (c == '<')
|
||||
|
||||
if (*input == '<')
|
||||
{
|
||||
if (input[1] == '!'
|
||||
&& input[2] == '-'
|
||||
&& input[3] == '-')
|
||||
{
|
||||
input += 4;
|
||||
const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
|
||||
const int closeComment = input.indexOf (CharPointer_ASCII ("-->"));
|
||||
|
||||
if (closeComment < 0)
|
||||
{
|
||||
@@ -278,10 +309,11 @@ void XmlDocument::skipNextWhiteSpace()
|
||||
input += closeComment + 3;
|
||||
continue;
|
||||
}
|
||||
else if (input[1] == '?')
|
||||
|
||||
if (input[1] == '?')
|
||||
{
|
||||
input += 2;
|
||||
const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
|
||||
const int closeBracket = input.indexOf (CharPointer_ASCII ("?>"));
|
||||
|
||||
if (closeBracket < 0)
|
||||
{
|
||||
@@ -318,7 +350,6 @@ void XmlDocument::readQuotedString (String& result)
|
||||
else
|
||||
{
|
||||
const String::CharPointerType start (input);
|
||||
size_t numChars = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
@@ -326,13 +357,13 @@ void XmlDocument::readQuotedString (String& result)
|
||||
|
||||
if (character == quote)
|
||||
{
|
||||
result.appendCharPointer (start, numChars);
|
||||
result.appendCharPointer (start, input);
|
||||
++input;
|
||||
return;
|
||||
}
|
||||
else if (character == '&')
|
||||
{
|
||||
result.appendCharPointer (start, numChars);
|
||||
result.appendCharPointer (start, input);
|
||||
break;
|
||||
}
|
||||
else if (character == 0)
|
||||
@@ -343,7 +374,6 @@ void XmlDocument::readQuotedString (String& result)
|
||||
}
|
||||
|
||||
++input;
|
||||
++numChars;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -357,28 +387,26 @@ XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
|
||||
if (outOfData)
|
||||
return nullptr;
|
||||
|
||||
const int openBracket = input.indexOf ((beast_wchar) '<');
|
||||
|
||||
if (openBracket >= 0)
|
||||
if (*input == '<')
|
||||
{
|
||||
input += openBracket + 1;
|
||||
int tagLen = findNextTokenLength();
|
||||
++input;
|
||||
String::CharPointerType endOfToken (XmlIdentifierChars::findEndOfToken (input));
|
||||
|
||||
if (tagLen == 0)
|
||||
if (endOfToken == input)
|
||||
{
|
||||
// no tag name - but allow for a gap after the '<' before giving an error
|
||||
skipNextWhiteSpace();
|
||||
tagLen = findNextTokenLength();
|
||||
endOfToken = XmlIdentifierChars::findEndOfToken (input);
|
||||
|
||||
if (tagLen == 0)
|
||||
if (endOfToken == input)
|
||||
{
|
||||
setLastError ("tag name missing", false);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
node = new XmlElement (String (input, (size_t) tagLen));
|
||||
input += tagLen;
|
||||
node = new XmlElement (String (input, endOfToken));
|
||||
input = endOfToken;
|
||||
LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
|
||||
|
||||
// look for attributes
|
||||
@@ -409,12 +437,12 @@ XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
|
||||
// get an attribute..
|
||||
if (XmlIdentifierChars::isIdentifierChar (c))
|
||||
{
|
||||
const int attNameLen = findNextTokenLength();
|
||||
String::CharPointerType attNameEnd (XmlIdentifierChars::findEndOfToken (input));
|
||||
|
||||
if (attNameLen > 0)
|
||||
if (attNameEnd != input)
|
||||
{
|
||||
const String::CharPointerType attNameStart (input);
|
||||
input += attNameLen;
|
||||
input = attNameEnd;
|
||||
|
||||
skipNextWhiteSpace();
|
||||
|
||||
@@ -427,7 +455,7 @@ XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
|
||||
if (nextChar == '"' || nextChar == '\'')
|
||||
{
|
||||
XmlElement::XmlAttributeNode* const newAtt
|
||||
= new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
|
||||
= new XmlElement::XmlAttributeNode (String (attNameStart, attNameEnd),
|
||||
String::empty);
|
||||
|
||||
readQuotedString (newAtt->value);
|
||||
@@ -435,6 +463,12 @@ XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
setLastError ("expected '=' after attribute '"
|
||||
+ String (attNameStart, attNameEnd) + "'", false);
|
||||
return node;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -467,7 +501,9 @@ void XmlDocument::readChildElements (XmlElement* parent)
|
||||
|
||||
if (*input == '<')
|
||||
{
|
||||
if (input[1] == '/')
|
||||
const beast_wchar c1 = input[1];
|
||||
|
||||
if (c1 == '/')
|
||||
{
|
||||
// our close tag..
|
||||
const int closeTag = input.indexOf ((beast_wchar) '>');
|
||||
@@ -477,41 +513,33 @@ void XmlDocument::readChildElements (XmlElement* parent)
|
||||
|
||||
break;
|
||||
}
|
||||
else if (input[1] == '!'
|
||||
&& input[2] == '['
|
||||
&& input[3] == 'C'
|
||||
&& input[4] == 'D'
|
||||
&& input[5] == 'A'
|
||||
&& input[6] == 'T'
|
||||
&& input[7] == 'A'
|
||||
&& input[8] == '[')
|
||||
|
||||
if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0)
|
||||
{
|
||||
input += 9;
|
||||
const String::CharPointerType inputStart (input);
|
||||
|
||||
size_t len = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (*input == 0)
|
||||
const beast_wchar c0 = *input;
|
||||
|
||||
if (c0 == 0)
|
||||
{
|
||||
setLastError ("unterminated CDATA section", false);
|
||||
outOfData = true;
|
||||
break;
|
||||
}
|
||||
else if (input[0] == ']'
|
||||
else if (c0 == ']'
|
||||
&& input[1] == ']'
|
||||
&& input[2] == '>')
|
||||
{
|
||||
childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
|
||||
input += 3;
|
||||
break;
|
||||
}
|
||||
|
||||
++input;
|
||||
++len;
|
||||
}
|
||||
|
||||
childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -522,7 +550,7 @@ void XmlDocument::readChildElements (XmlElement* parent)
|
||||
break;
|
||||
}
|
||||
}
|
||||
else // must be a character block
|
||||
else // must be a character block
|
||||
{
|
||||
input = preWhitespaceInput; // roll back to include the leading whitespace
|
||||
String textElementContent;
|
||||
@@ -575,17 +603,15 @@ void XmlDocument::readChildElements (XmlElement* parent)
|
||||
else
|
||||
{
|
||||
const String::CharPointerType start (input);
|
||||
size_t len = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
const beast_wchar nextChar = *input;
|
||||
|
||||
if (nextChar == '<' || nextChar == '&')
|
||||
{
|
||||
break;
|
||||
}
|
||||
else if (nextChar == 0)
|
||||
|
||||
if (nextChar == 0)
|
||||
{
|
||||
setLastError ("unmatched tags", false);
|
||||
outOfData = true;
|
||||
@@ -593,17 +619,14 @@ void XmlDocument::readChildElements (XmlElement* parent)
|
||||
}
|
||||
|
||||
++input;
|
||||
++len;
|
||||
}
|
||||
|
||||
textElementContent.appendCharPointer (start, len);
|
||||
textElementContent.appendCharPointer (start, input);
|
||||
}
|
||||
}
|
||||
|
||||
if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
|
||||
{
|
||||
childAppender.append (XmlElement::createTextElement (textElementContent));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -613,27 +636,27 @@ void XmlDocument::readEntity (String& result)
|
||||
// skip over the ampersand
|
||||
++input;
|
||||
|
||||
if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
|
||||
if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("amp;"), 4) == 0)
|
||||
{
|
||||
input += 4;
|
||||
result += '&';
|
||||
}
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("quot;"), 5) == 0)
|
||||
{
|
||||
input += 5;
|
||||
result += '"';
|
||||
}
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("apos;"), 5) == 0)
|
||||
{
|
||||
input += 5;
|
||||
result += '\'';
|
||||
}
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("lt;"), 3) == 0)
|
||||
{
|
||||
input += 3;
|
||||
result += '<';
|
||||
}
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
|
||||
else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("gt;"), 3) == 0)
|
||||
{
|
||||
input += 3;
|
||||
result += '>';
|
||||
@@ -712,11 +735,11 @@ void XmlDocument::readEntity (String& result)
|
||||
|
||||
String XmlDocument::expandEntity (const String& ent)
|
||||
{
|
||||
if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
|
||||
if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
|
||||
if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
|
||||
if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
|
||||
if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
|
||||
if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
|
||||
if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
|
||||
if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
|
||||
if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
|
||||
if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
|
||||
|
||||
if (ent[0] == '#')
|
||||
{
|
||||
@@ -845,4 +868,4 @@ String XmlDocument::getParameterEntity (const String& entity)
|
||||
}
|
||||
|
||||
return entity;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user