From 0c20e2eb8b33e45b35628ff0a721c609624c6719 Mon Sep 17 00:00:00 2001 From: John Freeman Date: Mon, 18 Feb 2019 16:29:21 -0600 Subject: [PATCH] Refine parseUrl regular expression (RIPD-1708): The new parse logic is more strict but handles more cases. If an exception is thrown, just bail. * Allow parsing unenclosed IPv6 addresses without port * Improve string construction * Reduce nesting levels of code --- src/ripple/basics/impl/StringUtilities.cpp | 55 ++++++++++++---------- src/test/basics/StringUtilities_test.cpp | 38 ++++++++++++++- 2 files changed, 65 insertions(+), 28 deletions(-) diff --git a/src/ripple/basics/impl/StringUtilities.cpp b/src/ripple/basics/impl/StringUtilities.cpp index 1bc61a5207..a988e4446f 100644 --- a/src/ripple/basics/impl/StringUtilities.cpp +++ b/src/ripple/basics/impl/StringUtilities.cpp @@ -95,14 +95,14 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) static boost::regex reUrl ( "(?i)\\`\\s*" // required scheme - "([[:alpha:]][-+.[:alpha:][:digit:]]*):" + "([[:alpha:]][-+.[:alpha:][:digit:]]*?):" // We choose to support only URIs whose `hier-part` has the form // `"//" authority path-abempty`. "//" // optional userinfo - "(?:([^/]*?)(?::([^/]*?))?@)?" + "(?:([^:@/]*?)(?::([^@/]*?))?@)?" // optional host - "([^/]*?)" + "([[:digit:]:]*[[:digit:]]|\\[[^]]+\\]|[^:/?#]*?)" // optional port "(?::([[:digit:]]+))?" // optional path @@ -110,31 +110,34 @@ bool parseUrl (parsedURL& pUrl, std::string const& strUrl) "\\s*?\\'"); boost::smatch smMatch; - bool bMatch = boost::regex_match (strUrl, smMatch, reUrl); // Match status code. - - if (bMatch) - { - pUrl.scheme = smMatch[1]; - boost::algorithm::to_lower (pUrl.scheme); - pUrl.username = smMatch[2]; - pUrl.password = smMatch[3]; - const std::string domain = smMatch[4]; - // We need to use Endpoint to parse the domain to - // strip surrounding brackets from IPv6 addresses, - // e.g. [::1] => ::1. - const auto result {beast::IP::Endpoint::from_string_checked (domain)}; - pUrl.domain = result.second - ? result.first.address().to_string() - : domain; - const std::string port = smMatch[5]; - if (!port.empty()) - { - pUrl.port = beast::lexicalCast (port); - } - pUrl.path = smMatch[6]; + // Bail if there is no match. + try { + if (! boost::regex_match (strUrl, smMatch, reUrl)) + return false; + } catch (...) { + return false; } - return bMatch; + pUrl.scheme = smMatch[1]; + boost::algorithm::to_lower (pUrl.scheme); + pUrl.username = smMatch[2]; + pUrl.password = smMatch[3]; + const std::string domain = smMatch[4]; + // We need to use Endpoint to parse the domain to + // strip surrounding brackets from IPv6 addresses, + // e.g. [::1] => ::1. + const auto result {beast::IP::Endpoint::from_string_checked (domain)}; + pUrl.domain = result.second + ? result.first.address().to_string() + : domain; + const std::string port = smMatch[5]; + if (!port.empty()) + { + pUrl.port = beast::lexicalCast (port); + } + pUrl.path = smMatch[6]; + + return true; } std::string trim_whitespace (std::string str) diff --git a/src/test/basics/StringUtilities_test.cpp b/src/test/basics/StringUtilities_test.cpp index e6462c714e..ee69d9aaa8 100644 --- a/src/test/basics/StringUtilities_test.cpp +++ b/src/test/basics/StringUtilities_test.cpp @@ -78,6 +78,7 @@ public: // Do we want to normalize paths? BEAST_EXPECT(pUrl.path.empty()); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme:///")); @@ -88,6 +89,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "lower://domain")); @@ -98,6 +100,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path.empty()); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "UPPER://domain:234/")); @@ -108,6 +111,7 @@ public: BEAST_EXPECT(*pUrl.port == 234); BEAST_EXPECT(pUrl.path == "/"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "Mixed://domain/path")); @@ -118,6 +122,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/path"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://[::1]:123/path")); @@ -128,6 +133,7 @@ public: BEAST_EXPECT(*pUrl.port == 123); BEAST_EXPECT(pUrl.path == "/path"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain:123/abc:321")); @@ -138,6 +144,7 @@ public: BEAST_EXPECT(*pUrl.port == 123); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain:123/abc:321")); @@ -148,6 +155,7 @@ public: BEAST_EXPECT(*pUrl.port == 123); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain:123/abc:321")); @@ -158,6 +166,7 @@ public: BEAST_EXPECT(*pUrl.port == 123); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://domain:123/abc:321")); @@ -168,6 +177,7 @@ public: BEAST_EXPECT(*pUrl.port == 123); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://user:pass@domain/abc:321")); @@ -178,6 +188,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://user@domain/abc:321")); @@ -188,6 +199,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://:pass@domain/abc:321")); @@ -198,6 +210,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://domain/abc:321")); @@ -208,6 +221,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/abc:321"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme:///path/to/file")); @@ -218,6 +232,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/path/to/file"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl ( @@ -229,6 +244,7 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl ( @@ -240,17 +256,29 @@ public: BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/path/with/an@sign"); } + { parsedURL pUrl; BEAST_EXPECT(parseUrl (pUrl, "scheme://:999/")); BEAST_EXPECT(pUrl.scheme == "scheme"); BEAST_EXPECT(pUrl.username.empty()); BEAST_EXPECT(pUrl.password.empty()); - BEAST_EXPECT(pUrl.domain.empty()); - BEAST_EXPECT(*pUrl.port == 999); + BEAST_EXPECT(pUrl.domain == ":999"); + BEAST_EXPECT(! pUrl.port); BEAST_EXPECT(pUrl.path == "/"); } + { + parsedURL pUrl; + BEAST_EXPECT(parseUrl (pUrl, "http://::1:1234/validators")); + BEAST_EXPECT(pUrl.scheme == "http"); + BEAST_EXPECT(pUrl.username.empty()); + BEAST_EXPECT(pUrl.password.empty()); + BEAST_EXPECT(pUrl.domain == "::0.1.18.52"); + BEAST_EXPECT(! pUrl.port); + BEAST_EXPECT(pUrl.path == "/validators"); + } + // Expected fails. { parsedURL pUrl; @@ -259,6 +287,12 @@ public: BEAST_EXPECT(! parseUrl (pUrl, "://")); BEAST_EXPECT(! parseUrl (pUrl, ":///")); } + + { + std::string strUrl("s://" + std::string(8192, ':')); + parsedURL pUrl; + BEAST_EXPECT(! parseUrl (pUrl, strUrl)); + } } void testToString ()