Refine parseUrl regular expression (RIPD-1708):

The new parse logic is more strict but handles more cases. If an exception
is thrown, just bail.

* Allow parsing unenclosed IPv6 addresses without port
* Improve string construction
* Reduce nesting levels of code
This commit is contained in:
John Freeman
2019-02-18 16:29:21 -06:00
committed by Manoj doshi
parent 63eeb8d734
commit 0c20e2eb8b
2 changed files with 65 additions and 28 deletions

View File

@@ -95,14 +95,14 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
static boost::regex reUrl (
"(?i)\\`\\s*"
// required scheme
"([[:alpha:]][-+.[:alpha:][:digit:]]*):"
"([[:alpha:]][-+.[:alpha:][:digit:]]*?):"
// We choose to support only URIs whose `hier-part` has the form
// `"//" authority path-abempty`.
"//"
// optional userinfo
"(?:([^/]*?)(?::([^/]*?))?@)?"
"(?:([^:@/]*?)(?::([^@/]*?))?@)?"
// optional host
"([^/]*?)"
"([[:digit:]:]*[[:digit:]]|\\[[^]]+\\]|[^:/?#]*?)"
// optional port
"(?::([[:digit:]]+))?"
// optional path
@@ -110,31 +110,34 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
"\\s*?\\'");
boost::smatch smMatch;
bool bMatch = boost::regex_match (strUrl, smMatch, reUrl); // Match status code.
if (bMatch)
{
pUrl.scheme = smMatch[1];
boost::algorithm::to_lower (pUrl.scheme);
pUrl.username = smMatch[2];
pUrl.password = smMatch[3];
const std::string domain = smMatch[4];
// We need to use Endpoint to parse the domain to
// strip surrounding brackets from IPv6 addresses,
// e.g. [::1] => ::1.
const auto result {beast::IP::Endpoint::from_string_checked (domain)};
pUrl.domain = result.second
? result.first.address().to_string()
: domain;
const std::string port = smMatch[5];
if (!port.empty())
{
pUrl.port = beast::lexicalCast <std::uint16_t> (port);
}
pUrl.path = smMatch[6];
// Bail if there is no match.
try {
if (! boost::regex_match (strUrl, smMatch, reUrl))
return false;
} catch (...) {
return false;
}
return bMatch;
pUrl.scheme = smMatch[1];
boost::algorithm::to_lower (pUrl.scheme);
pUrl.username = smMatch[2];
pUrl.password = smMatch[3];
const std::string domain = smMatch[4];
// We need to use Endpoint to parse the domain to
// strip surrounding brackets from IPv6 addresses,
// e.g. [::1] => ::1.
const auto result {beast::IP::Endpoint::from_string_checked (domain)};
pUrl.domain = result.second
? result.first.address().to_string()
: domain;
const std::string port = smMatch[5];
if (!port.empty())
{
pUrl.port = beast::lexicalCast <std::uint16_t> (port);
}
pUrl.path = smMatch[6];
return true;
}
std::string trim_whitespace (std::string str)

View File

@@ -78,6 +78,7 @@ public:
// Do we want to normalize paths?
BEAST_EXPECT(pUrl.path.empty());
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme:///"));
@@ -88,6 +89,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "lower://domain"));
@@ -98,6 +100,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path.empty());
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/"));
@@ -108,6 +111,7 @@ public:
BEAST_EXPECT(*pUrl.port == 234);
BEAST_EXPECT(pUrl.path == "/");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path"));
@@ -118,6 +122,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/path");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path"));
@@ -128,6 +133,7 @@ public:
BEAST_EXPECT(*pUrl.port == 123);
BEAST_EXPECT(pUrl.path == "/path");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321"));
@@ -138,6 +144,7 @@ public:
BEAST_EXPECT(*pUrl.port == 123);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321"));
@@ -148,6 +155,7 @@ public:
BEAST_EXPECT(*pUrl.port == 123);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321"));
@@ -158,6 +166,7 @@ public:
BEAST_EXPECT(*pUrl.port == 123);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321"));
@@ -168,6 +177,7 @@ public:
BEAST_EXPECT(*pUrl.port == 123);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321"));
@@ -178,6 +188,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321"));
@@ -188,6 +199,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321"));
@@ -198,6 +210,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321"));
@@ -208,6 +221,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/abc:321");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file"));
@@ -218,6 +232,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/path/to/file");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (
@@ -229,6 +244,7 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (
@@ -240,17 +256,29 @@ public:
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/"));
BEAST_EXPECT(pUrl.scheme == "scheme");
BEAST_EXPECT(pUrl.username.empty());
BEAST_EXPECT(pUrl.password.empty());
BEAST_EXPECT(pUrl.domain.empty());
BEAST_EXPECT(*pUrl.port == 999);
BEAST_EXPECT(pUrl.domain == ":999");
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/");
}
{
parsedURL pUrl;
BEAST_EXPECT(parseUrl (pUrl, "http://::1:1234/validators"));
BEAST_EXPECT(pUrl.scheme == "http");
BEAST_EXPECT(pUrl.username.empty());
BEAST_EXPECT(pUrl.password.empty());
BEAST_EXPECT(pUrl.domain == "::0.1.18.52");
BEAST_EXPECT(! pUrl.port);
BEAST_EXPECT(pUrl.path == "/validators");
}
// Expected fails.
{
parsedURL pUrl;
@@ -259,6 +287,12 @@ public:
BEAST_EXPECT(! parseUrl (pUrl, "://"));
BEAST_EXPECT(! parseUrl (pUrl, ":///"));
}
{
std::string strUrl("s://" + std::string(8192, ':'));
parsedURL pUrl;
BEAST_EXPECT(! parseUrl (pUrl, strUrl));
}
}
void testToString ()