From dc4d76f62641faa80dc2b8cf71b84e2c4c736ff1 Mon Sep 17 00:00:00 2001 From: John Freeman Date: Wed, 7 Nov 2018 11:26:48 -0800 Subject: [PATCH] Prefer regex to manual parsing in parseURL: Although `parseURL` used a regex to pull the authority out of the URL being parsed, it performed manual parsing of the hostname and port. This commit rolls the parsing of the username and password, if any, directly into the regex. The hostname can be a name, an IPv4 or an IPv6 address. Fixes #2751 --- src/ripple/basics/StringUtilities.h | 2 + src/ripple/basics/impl/StringUtilities.cpp | 51 ++--- src/test/basics/StringUtilities_test.cpp | 215 +++++++++++++++++++-- 3 files changed, 224 insertions(+), 44 deletions(-) diff --git a/src/ripple/basics/StringUtilities.h b/src/ripple/basics/StringUtilities.h index f3d5039ec..f67f2a145 100644 --- a/src/ripple/basics/StringUtilities.h +++ b/src/ripple/basics/StringUtilities.h @@ -70,6 +70,8 @@ struct parsedURL explicit parsedURL() = default; std::string scheme; + std::string username; + std::string password; std::string domain; boost::optional port; std::string path; diff --git a/src/ripple/basics/impl/StringUtilities.cpp b/src/ripple/basics/impl/StringUtilities.cpp index 0650360f9..1bc61a520 100644 --- a/src/ripple/basics/impl/StringUtilities.cpp +++ b/src/ripple/basics/impl/StringUtilities.cpp @@ -94,9 +94,19 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) // scheme://username:password@hostname:port/rest static boost::regex reUrl ( "(?i)\\`\\s*" - "([[:alpha:]][-+.[:alpha:][:digit:]]*):" //scheme - "//([^/]+)?" // hostname - "(/.*)?" // path and parameters + // required scheme + "([[:alpha:]][-+.[:alpha:][:digit:]]*):" + // We choose to support only URIs whose `hier-part` has the form + // `"//" authority path-abempty`. + "//" + // optional userinfo + "(?:([^/]*?)(?::([^/]*?))?@)?" + // optional host + "([^/]*?)" + // optional port + "(?::([[:digit:]]+))?" + // optional path + "(/.*)?" "\\s*?\\'"); boost::smatch smMatch; @@ -106,29 +116,22 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) { pUrl.scheme = smMatch[1]; boost::algorithm::to_lower (pUrl.scheme); - pUrl.path = smMatch[3]; - pUrl.domain = smMatch[2]; - - // now consider the domain/port fragment - auto colonPos = pUrl.domain.find_last_of(':'); - if (colonPos != std::string::npos) + pUrl.username = smMatch[2]; + pUrl.password = smMatch[3]; + const std::string domain = smMatch[4]; + // We need to use Endpoint to parse the domain to + // strip surrounding brackets from IPv6 addresses, + // e.g. [::1] => ::1. + const auto result {beast::IP::Endpoint::from_string_checked (domain)}; + pUrl.domain = result.second + ? result.first.address().to_string() + : domain; + const std::string port = smMatch[5]; + if (!port.empty()) { - // use Endpoint class to see if this thing looks - // like an IP addr... - auto result {beast::IP::Endpoint::from_string_checked (pUrl.domain)}; - if (result.second) - { - pUrl.domain = result.first.address().to_string(); - pUrl.port = result.first.port(); - } - else // otherwise we are DNS name + port - { - pUrl.port = beast::lexicalCast ( - pUrl.domain.substr(colonPos+1)); - pUrl.domain = pUrl.domain.substr(0, colonPos); - } + pUrl.port = beast::lexicalCast (port); } - //else, the whole thing is domain, not port + pUrl.path = smMatch[6]; } return bMatch; diff --git a/src/test/basics/StringUtilities_test.cpp b/src/test/basics/StringUtilities_test.cpp index 2744ee8f7..e6462c714 100644 --- a/src/test/basics/StringUtilities_test.cpp +++ b/src/test/basics/StringUtilities_test.cpp @@ -63,27 +63,202 @@ public: { testcase ("parseUrl"); - parsedURL pUrl; + // Expected passes. + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + // RFC 3986: + // > In general, a URI that uses the generic syntax for authority + // with an empty path should be normalized to a path of "/". + // Do we want to normalize paths? + BEAST_EXPECT(pUrl.path.empty()); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme:///")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "lower://domain")); + BEAST_EXPECT(pUrl.scheme == "lower"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path.empty()); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/")); + BEAST_EXPECT(pUrl.scheme == "upper"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 234); + BEAST_EXPECT(pUrl.path == "/"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path")); + BEAST_EXPECT(pUrl.scheme == "mixed"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "::1"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/path"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(*pUrl.port == 123); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/abc:321"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/to/file"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl ( + pUrl, "scheme://user:pass@domain/path/with/an@sign")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username == "user"); + BEAST_EXPECT(pUrl.password == "pass"); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl ( + pUrl, "scheme://domain/path/with/an@sign")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "domain"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); + } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/")); + BEAST_EXPECT(pUrl.scheme == "scheme"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain.empty()); + BEAST_EXPECT(*pUrl.port == 999); + BEAST_EXPECT(pUrl.path == "/"); + } - BEAST_EXPECT(parseUrl (pUrl, "lower://domain")); - BEAST_EXPECT(pUrl.scheme == "lower"); - BEAST_EXPECT(pUrl.domain == "domain"); - BEAST_EXPECT(! pUrl.port); - BEAST_EXPECT(pUrl.path == ""); - BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/")); - BEAST_EXPECT(pUrl.scheme == "upper"); - BEAST_EXPECT(*pUrl.port == 234); - BEAST_EXPECT(pUrl.path == "/"); - BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path")); - BEAST_EXPECT(pUrl.scheme == "mixed"); - BEAST_EXPECT(pUrl.path == "/path"); - BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path")); - BEAST_EXPECT(*pUrl.port == 123); - BEAST_EXPECT(pUrl.domain == "::1"); - BEAST_EXPECT(parseUrl(pUrl, "nodomain:///path/path/path")); - BEAST_EXPECT(pUrl.scheme == "nodomain"); - BEAST_EXPECT(pUrl.domain.empty()); - BEAST_EXPECT(pUrl.path == "/path/path/path"); + // Expected fails. + { + parsedURL pUrl; + BEAST_EXPECT(! parseUrl (pUrl, "")); + BEAST_EXPECT(! parseUrl (pUrl, "nonsense")); + BEAST_EXPECT(! parseUrl (pUrl, "://")); + BEAST_EXPECT(! parseUrl (pUrl, ":///")); + } } void testToString ()