mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Refine parseUrl regular expression (RIPD-1708):
The new parse logic is more strict but handles more cases. If an exception is thrown, just bail. * Allow parsing unenclosed IPv6 addresses without port * Improve string construction * Reduce nesting levels of code
This commit is contained in:
committed by
Manoj doshi
parent
63eeb8d734
commit
0c20e2eb8b
@@ -95,14 +95,14 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
|
||||
static boost::regex reUrl (
|
||||
"(?i)\\`\\s*"
|
||||
// required scheme
|
||||
"([[:alpha:]][-+.[:alpha:][:digit:]]*):"
|
||||
"([[:alpha:]][-+.[:alpha:][:digit:]]*?):"
|
||||
// We choose to support only URIs whose `hier-part` has the form
|
||||
// `"//" authority path-abempty`.
|
||||
"//"
|
||||
// optional userinfo
|
||||
"(?:([^/]*?)(?::([^/]*?))?@)?"
|
||||
"(?:([^:@/]*?)(?::([^@/]*?))?@)?"
|
||||
// optional host
|
||||
"([^/]*?)"
|
||||
"([[:digit:]:]*[[:digit:]]|\\[[^]]+\\]|[^:/?#]*?)"
|
||||
// optional port
|
||||
"(?::([[:digit:]]+))?"
|
||||
// optional path
|
||||
@@ -110,31 +110,34 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
|
||||
"\\s*?\\'");
|
||||
boost::smatch smMatch;
|
||||
|
||||
bool bMatch = boost::regex_match (strUrl, smMatch, reUrl); // Match status code.
|
||||
|
||||
if (bMatch)
|
||||
{
|
||||
pUrl.scheme = smMatch[1];
|
||||
boost::algorithm::to_lower (pUrl.scheme);
|
||||
pUrl.username = smMatch[2];
|
||||
pUrl.password = smMatch[3];
|
||||
const std::string domain = smMatch[4];
|
||||
// We need to use Endpoint to parse the domain to
|
||||
// strip surrounding brackets from IPv6 addresses,
|
||||
// e.g. [::1] => ::1.
|
||||
const auto result {beast::IP::Endpoint::from_string_checked (domain)};
|
||||
pUrl.domain = result.second
|
||||
? result.first.address().to_string()
|
||||
: domain;
|
||||
const std::string port = smMatch[5];
|
||||
if (!port.empty())
|
||||
{
|
||||
pUrl.port = beast::lexicalCast <std::uint16_t> (port);
|
||||
}
|
||||
pUrl.path = smMatch[6];
|
||||
// Bail if there is no match.
|
||||
try {
|
||||
if (! boost::regex_match (strUrl, smMatch, reUrl))
|
||||
return false;
|
||||
} catch (...) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return bMatch;
|
||||
pUrl.scheme = smMatch[1];
|
||||
boost::algorithm::to_lower (pUrl.scheme);
|
||||
pUrl.username = smMatch[2];
|
||||
pUrl.password = smMatch[3];
|
||||
const std::string domain = smMatch[4];
|
||||
// We need to use Endpoint to parse the domain to
|
||||
// strip surrounding brackets from IPv6 addresses,
|
||||
// e.g. [::1] => ::1.
|
||||
const auto result {beast::IP::Endpoint::from_string_checked (domain)};
|
||||
pUrl.domain = result.second
|
||||
? result.first.address().to_string()
|
||||
: domain;
|
||||
const std::string port = smMatch[5];
|
||||
if (!port.empty())
|
||||
{
|
||||
pUrl.port = beast::lexicalCast <std::uint16_t> (port);
|
||||
}
|
||||
pUrl.path = smMatch[6];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string trim_whitespace (std::string str)
|
||||
|
||||
@@ -78,6 +78,7 @@ public:
|
||||
// Do we want to normalize paths?
|
||||
BEAST_EXPECT(pUrl.path.empty());
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme:///"));
|
||||
@@ -88,6 +89,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "lower://domain"));
|
||||
@@ -98,6 +100,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path.empty());
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/"));
|
||||
@@ -108,6 +111,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 234);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path"));
|
||||
@@ -118,6 +122,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path"));
|
||||
@@ -128,6 +133,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/path");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321"));
|
||||
@@ -138,6 +144,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321"));
|
||||
@@ -148,6 +155,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321"));
|
||||
@@ -158,6 +166,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321"));
|
||||
@@ -168,6 +177,7 @@ public:
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321"));
|
||||
@@ -178,6 +188,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321"));
|
||||
@@ -188,6 +199,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321"));
|
||||
@@ -198,6 +210,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321"));
|
||||
@@ -208,6 +221,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file"));
|
||||
@@ -218,6 +232,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/to/file");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (
|
||||
@@ -229,6 +244,7 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (
|
||||
@@ -240,17 +256,29 @@ public:
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(*pUrl.port == 999);
|
||||
BEAST_EXPECT(pUrl.domain == ":999");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "http://::1:1234/validators"));
|
||||
BEAST_EXPECT(pUrl.scheme == "http");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "::0.1.18.52");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/validators");
|
||||
}
|
||||
|
||||
// Expected fails.
|
||||
{
|
||||
parsedURL pUrl;
|
||||
@@ -259,6 +287,12 @@ public:
|
||||
BEAST_EXPECT(! parseUrl (pUrl, "://"));
|
||||
BEAST_EXPECT(! parseUrl (pUrl, ":///"));
|
||||
}
|
||||
|
||||
{
|
||||
std::string strUrl("s://" + std::string(8192, ':'));
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(! parseUrl (pUrl, strUrl));
|
||||
}
|
||||
}
|
||||
|
||||
void testToString ()
|
||||
|
||||
Reference in New Issue
Block a user