mirror of
https://github.com/Xahau/xahaud.git
synced 2025-12-06 17:27:52 +00:00
Prefer regex to manual parsing in parseURL:
Although `parseURL` used a regex to pull the authority out of the URL being parsed, it performed manual parsing of the hostname and port. This commit rolls the parsing of the username and password, if any, directly into the regex. The hostname can be a name, an IPv4 or an IPv6 address. Fixes #2751
This commit is contained in:
committed by
Nik Bougalis
parent
c1a02440dc
commit
dc4d76f626
@@ -70,6 +70,8 @@ struct parsedURL
|
||||
explicit parsedURL() = default;
|
||||
|
||||
std::string scheme;
|
||||
std::string username;
|
||||
std::string password;
|
||||
std::string domain;
|
||||
boost::optional<std::uint16_t> port;
|
||||
std::string path;
|
||||
|
||||
@@ -94,9 +94,19 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
|
||||
// scheme://username:password@hostname:port/rest
|
||||
static boost::regex reUrl (
|
||||
"(?i)\\`\\s*"
|
||||
"([[:alpha:]][-+.[:alpha:][:digit:]]*):" //scheme
|
||||
"//([^/]+)?" // hostname
|
||||
"(/.*)?" // path and parameters
|
||||
// required scheme
|
||||
"([[:alpha:]][-+.[:alpha:][:digit:]]*):"
|
||||
// We choose to support only URIs whose `hier-part` has the form
|
||||
// `"//" authority path-abempty`.
|
||||
"//"
|
||||
// optional userinfo
|
||||
"(?:([^/]*?)(?::([^/]*?))?@)?"
|
||||
// optional host
|
||||
"([^/]*?)"
|
||||
// optional port
|
||||
"(?::([[:digit:]]+))?"
|
||||
// optional path
|
||||
"(/.*)?"
|
||||
"\\s*?\\'");
|
||||
boost::smatch smMatch;
|
||||
|
||||
@@ -106,29 +116,22 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl)
|
||||
{
|
||||
pUrl.scheme = smMatch[1];
|
||||
boost::algorithm::to_lower (pUrl.scheme);
|
||||
pUrl.path = smMatch[3];
|
||||
pUrl.domain = smMatch[2];
|
||||
|
||||
// now consider the domain/port fragment
|
||||
auto colonPos = pUrl.domain.find_last_of(':');
|
||||
if (colonPos != std::string::npos)
|
||||
pUrl.username = smMatch[2];
|
||||
pUrl.password = smMatch[3];
|
||||
const std::string domain = smMatch[4];
|
||||
// We need to use Endpoint to parse the domain to
|
||||
// strip surrounding brackets from IPv6 addresses,
|
||||
// e.g. [::1] => ::1.
|
||||
const auto result {beast::IP::Endpoint::from_string_checked (domain)};
|
||||
pUrl.domain = result.second
|
||||
? result.first.address().to_string()
|
||||
: domain;
|
||||
const std::string port = smMatch[5];
|
||||
if (!port.empty())
|
||||
{
|
||||
// use Endpoint class to see if this thing looks
|
||||
// like an IP addr...
|
||||
auto result {beast::IP::Endpoint::from_string_checked (pUrl.domain)};
|
||||
if (result.second)
|
||||
{
|
||||
pUrl.domain = result.first.address().to_string();
|
||||
pUrl.port = result.first.port();
|
||||
}
|
||||
else // otherwise we are DNS name + port
|
||||
{
|
||||
pUrl.port = beast::lexicalCast <std::uint16_t> (
|
||||
pUrl.domain.substr(colonPos+1));
|
||||
pUrl.domain = pUrl.domain.substr(0, colonPos);
|
||||
}
|
||||
pUrl.port = beast::lexicalCast <std::uint16_t> (port);
|
||||
}
|
||||
//else, the whole thing is domain, not port
|
||||
pUrl.path = smMatch[6];
|
||||
}
|
||||
|
||||
return bMatch;
|
||||
|
||||
@@ -63,27 +63,202 @@ public:
|
||||
{
|
||||
testcase ("parseUrl");
|
||||
|
||||
parsedURL pUrl;
|
||||
// Expected passes.
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
// RFC 3986:
|
||||
// > In general, a URI that uses the generic syntax for authority
|
||||
// with an empty path should be normalized to a path of "/".
|
||||
// Do we want to normalize paths?
|
||||
BEAST_EXPECT(pUrl.path.empty());
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme:///"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "lower://domain"));
|
||||
BEAST_EXPECT(pUrl.scheme == "lower");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path.empty());
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/"));
|
||||
BEAST_EXPECT(pUrl.scheme == "upper");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(*pUrl.port == 234);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path"));
|
||||
BEAST_EXPECT(pUrl.scheme == "mixed");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "::1");
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/path");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username == "user");
|
||||
BEAST_EXPECT(pUrl.password == "pass");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username == "user");
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password == "pass");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username == "user");
|
||||
BEAST_EXPECT(pUrl.password == "pass");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username == "user");
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password == "pass");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/abc:321");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/to/file");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (
|
||||
pUrl, "scheme://user:pass@domain/path/with/an@sign"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username == "user");
|
||||
BEAST_EXPECT(pUrl.password == "pass");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (
|
||||
pUrl, "scheme://domain/path/with/an@sign"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "/path/with/an@sign");
|
||||
}
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/"));
|
||||
BEAST_EXPECT(pUrl.scheme == "scheme");
|
||||
BEAST_EXPECT(pUrl.username.empty());
|
||||
BEAST_EXPECT(pUrl.password.empty());
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(*pUrl.port == 999);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
}
|
||||
|
||||
BEAST_EXPECT(parseUrl (pUrl, "lower://domain"));
|
||||
BEAST_EXPECT(pUrl.scheme == "lower");
|
||||
BEAST_EXPECT(pUrl.domain == "domain");
|
||||
BEAST_EXPECT(! pUrl.port);
|
||||
BEAST_EXPECT(pUrl.path == "");
|
||||
BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/"));
|
||||
BEAST_EXPECT(pUrl.scheme == "upper");
|
||||
BEAST_EXPECT(*pUrl.port == 234);
|
||||
BEAST_EXPECT(pUrl.path == "/");
|
||||
BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path"));
|
||||
BEAST_EXPECT(pUrl.scheme == "mixed");
|
||||
BEAST_EXPECT(pUrl.path == "/path");
|
||||
BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path"));
|
||||
BEAST_EXPECT(*pUrl.port == 123);
|
||||
BEAST_EXPECT(pUrl.domain == "::1");
|
||||
BEAST_EXPECT(parseUrl(pUrl, "nodomain:///path/path/path"));
|
||||
BEAST_EXPECT(pUrl.scheme == "nodomain");
|
||||
BEAST_EXPECT(pUrl.domain.empty());
|
||||
BEAST_EXPECT(pUrl.path == "/path/path/path");
|
||||
// Expected fails.
|
||||
{
|
||||
parsedURL pUrl;
|
||||
BEAST_EXPECT(! parseUrl (pUrl, ""));
|
||||
BEAST_EXPECT(! parseUrl (pUrl, "nonsense"));
|
||||
BEAST_EXPECT(! parseUrl (pUrl, "://"));
|
||||
BEAST_EXPECT(! parseUrl (pUrl, ":///"));
|
||||
}
|
||||
}
|
||||
|
||||
void testToString ()
|
||||
|
||||
Reference in New Issue
Block a user