diff --git a/src/ripple/basics/StringUtilities.h b/src/ripple/basics/StringUtilities.h index f3d5039ec..f67f2a145 100644 --- a/src/ripple/basics/StringUtilities.h +++ b/src/ripple/basics/StringUtilities.h @@ -70,6 +70,8 @@ struct parsedURL explicit parsedURL() = default; std::string scheme; + std::string username; + std::string password; std::string domain; boost::optional port; std::string path; diff --git a/src/ripple/basics/impl/StringUtilities.cpp b/src/ripple/basics/impl/StringUtilities.cpp index 0650360f9..1bc61a520 100644 --- a/src/ripple/basics/impl/StringUtilities.cpp +++ b/src/ripple/basics/impl/StringUtilities.cpp @@ -94,9 +94,19 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) // scheme://username:password@hostname:port/rest static boost::regex reUrl ( "(?i)\\`\\s*" - "([[:alpha:]][-+.[:alpha:][:digit:]]*):" //scheme - "//([^/]+)?" // hostname - "(/.*)?" // path and parameters + // required scheme + "([[:alpha:]][-+.[:alpha:][:digit:]]*):" + // We choose to support only URIs whose `hier-part` has the form + // `"//" authority path-abempty`. + "//" + // optional userinfo + "(?:([^/]*?)(?::([^/]*?))?@)?" + // optional host + "([^/]*?)" + // optional port + "(?::([[:digit:]]+))?" + // optional path + "(/.*)?" "\\s*?\\'"); boost::smatch smMatch; @@ -106,29 +116,22 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) { pUrl.scheme = smMatch[1]; boost::algorithm::to_lower (pUrl.scheme); - pUrl.path = smMatch[3]; - pUrl.domain = smMatch[2]; - - // now consider the domain/port fragment - auto colonPos = pUrl.domain.find_last_of(':'); - if (colonPos != std::string::npos) + pUrl.username = smMatch[2]; + pUrl.password = smMatch[3]; + const std::string domain = smMatch[4]; + // We need to use Endpoint to parse the domain to + // strip surrounding brackets from IPv6 addresses, + // e.g. [::1] => ::1. + const auto result {beast::IP::Endpoint::from_string_checked (domain)}; + pUrl.domain = result.second + ? result.first.address().to_string() + : domain; + const std::string port = smMatch[5]; + if (!port.empty()) { - // use Endpoint class to see if this thing looks - // like an IP addr... - auto result {beast::IP::Endpoint::from_string_checked (pUrl.domain)}; - if (result.second) - { - pUrl.domain = result.first.address().to_string(); - pUrl.port = result.first.port(); - } - else // otherwise we are DNS name + port - { - pUrl.port = beast::lexicalCast ( - pUrl.domain.substr(colonPos+1)); - pUrl.domain = pUrl.domain.substr(0, colonPos); - } + pUrl.port = beast::lexicalCast (port); } - //else, the whole thing is domain, not port + pUrl.path = smMatch[6]; } return bMatch; diff --git a/src/test/basics/StringUtilities_test.cpp b/src/test/basics/StringUtilities_test.cpp index 2744ee8f7..e6462c714 100644 --- a/src/test/basics/StringUtilities_test.cpp +++ b/src/test/basics/StringUtilities_test.cpp @@ -63,27 +63,202 @@ public: { testcase ("parseUrl"); - parsedURL pUrl; + // Expected passes. + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + // RFC 3986: + // > In general, a URI that uses the generic syntax for authority + // with an empty path should be normalized to a path of "/". + // Do we want to normalize paths? + BEAST_EXPECT(pUrl.path.empty()); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme:///")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "lower://domain")); + BEAST_EXPECT(pUrl.scheme == "lower"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path.empty()); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/")); + BEAST_EXPECT(pUrl.scheme == "upper"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 234); + BEAST_EXPECT(pUrl.path == "/"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path")); + BEAST_EXPECT(pUrl.scheme == "mixed"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "::1"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/path"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/to/file"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl ( + pUrl, "scheme://user:pass@domain/path/with/an@sign")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl ( + pUrl, "scheme://domain/path/with/an@sign")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(*pUrl.port == 999); + BEAST_EXPECT(pUrl.path == "/"); + } - BEAST_EXPECT(parseUrl (pUrl, "lower://domain")); - BEAST_EXPECT(pUrl.scheme == "lower"); - BEAST_EXPECT(pUrl.domain == "domain"); - BEAST_EXPECT(! pUrl.port); - BEAST_EXPECT(pUrl.path == ""); - BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/")); - BEAST_EXPECT(pUrl.scheme == "upper"); - BEAST_EXPECT(*pUrl.port == 234); - BEAST_EXPECT(pUrl.path == "/"); - BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path")); - BEAST_EXPECT(pUrl.scheme == "mixed"); - BEAST_EXPECT(pUrl.path == "/path"); - BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path")); - BEAST_EXPECT(*pUrl.port == 123); - BEAST_EXPECT(pUrl.domain == "::1"); - BEAST_EXPECT(parseUrl(pUrl, "nodomain:///path/path/path")); - BEAST_EXPECT(pUrl.scheme == "nodomain"); - BEAST_EXPECT(pUrl.domain.empty()); - BEAST_EXPECT(pUrl.path == "/path/path/path"); + // Expected fails. + { + parsedURL pUrl; + BEAST_EXPECT(! parseUrl (pUrl, "")); + BEAST_EXPECT(! parseUrl (pUrl, "nonsense")); + BEAST_EXPECT(! parseUrl (pUrl, "://")); + BEAST_EXPECT(! parseUrl (pUrl, ":///")); + } } void testToString ()