rippled
StringUtilities.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/basics/Slice.h>
21 #include <ripple/basics/StringUtilities.h>
22 #include <ripple/basics/ToString.h>
23 #include <ripple/basics/contract.h>
24 #include <ripple/beast/core/LexicalCast.h>
25 #include <ripple/beast/net/IPEndpoint.h>
26 #include <boost/algorithm/string.hpp>
27 #include <boost/regex.hpp>
28 #include <algorithm>
29 #include <cstdarg>
30 
31 namespace ripple {
32 
33 uint64_t
34 uintFromHex(std::string const& strSrc)
35 {
36  uint64_t uValue(0);
37 
38  if (strSrc.size() > 16)
39  Throw<std::invalid_argument>("overlong 64-bit value");
40 
41  for (auto c : strSrc)
42  {
43  int ret = charUnHex(c);
44 
45  if (ret == -1)
46  Throw<std::invalid_argument>("invalid hex digit");
47 
48  uValue = (uValue << 4) | ret;
49  }
50 
51  return uValue;
52 }
53 
54 bool
55 parseUrl(parsedURL& pUrl, std::string const& strUrl)
56 {
57  // scheme://username:password@hostname:port/rest
58  static boost::regex reUrl(
59  "(?i)\\`\\s*"
60  // required scheme
61  "([[:alpha:]][-+.[:alpha:][:digit:]]*?):"
62  // We choose to support only URIs whose `hier-part` has the form
63  // `"//" authority path-abempty`.
64  "//"
65  // optional userinfo
66  "(?:([^:@/]*?)(?::([^@/]*?))?@)?"
67  // optional host
68  "([[:digit:]:]*[[:digit:]]|\\[[^]]+\\]|[^:/?#]*?)"
69  // optional port
70  "(?::([[:digit:]]+))?"
71  // optional path
72  "(/.*)?"
73  "\\s*?\\'");
74  boost::smatch smMatch;
75 
76  // Bail if there is no match.
77  try
78  {
79  if (!boost::regex_match(strUrl, smMatch, reUrl))
80  return false;
81  }
82  catch (...)
83  {
84  return false;
85  }
86 
87  pUrl.scheme = smMatch[1];
88  boost::algorithm::to_lower(pUrl.scheme);
89  pUrl.username = smMatch[2];
90  pUrl.password = smMatch[3];
91  const std::string domain = smMatch[4];
92  // We need to use Endpoint to parse the domain to
93  // strip surrounding brackets from IPv6 addresses,
94  // e.g. [::1] => ::1.
95  const auto result = beast::IP::Endpoint::from_string_checked(domain);
96  pUrl.domain = result ? result->address().to_string() : domain;
97  const std::string port = smMatch[5];
98  if (!port.empty())
99  {
100  pUrl.port = beast::lexicalCast<std::uint16_t>(port);
101  }
102  pUrl.path = smMatch[6];
103 
104  return true;
105 }
106 
109 {
110  boost::trim(str);
111  return str;
112 }
113 
114 boost::optional<std::uint64_t>
115 to_uint64(std::string const& s)
116 {
117  std::uint64_t result;
118  if (beast::lexicalCastChecked(result, s))
119  return result;
120  return boost::none;
121 }
122 
123 bool
125 {
126  // The domain must be between 4 and 128 characters long
127  if (domain.size() < 4 || domain.size() > 128)
128  return false;
129 
130  // This regular expression should do a decent job of weeding out
131  // obviously wrong domain names but it isn't perfect. It does not
132  // really support IDNs. If this turns out to be an issue, a more
133  // thorough regex can be used or this check can just be removed.
134  static boost::regex const re(
135  "^" // Beginning of line
136  "(" // Beginning of a segment
137  "(?!-)" // - must not begin with '-'
138  "[a-zA-Z0-9-]{1,63}" // - only alphanumeric and '-'
139  "(?<!-)" // - must not end with '-'
140  "\\." // segment separator
141  ")+" // 1 or more segments
142  "[A-Za-z]{2,63}" // TLD
143  "$" // End of line
144  ,
145  boost::regex_constants::optimize);
146 
147  return boost::regex_match(domain, re);
148 }
149 
150 } // namespace ripple
cstdarg
std::string
STL class.
ripple::charUnHex
int charUnHex(unsigned char c)
Converts a hex digit to the corresponding integer.
Definition: strHex.cpp:27
ripple::parsedURL
Definition: StringUtilities.h:123
ripple::parsedURL::password
std::string password
Definition: StringUtilities.h:129
ripple::uintFromHex
uint64_t uintFromHex(std::string const &strSrc)
Definition: StringUtilities.cpp:34
std::string::size
T size(T... args)
ripple::to_uint64
boost::optional< std::uint64_t > to_uint64(std::string const &s)
ripple::parsedURL::username
std::string username
Definition: StringUtilities.h:128
ripple::parsedURL::path
std::string path
Definition: StringUtilities.h:132
ripple::trim_whitespace
std::string trim_whitespace(std::string str)
algorithm
ripple::parseUrl
bool parseUrl(parsedURL &pUrl, std::string const &strUrl)
Definition: StringUtilities.cpp:55
std::uint64_t
ripple::parsedURL::port
boost::optional< std::uint16_t > port
Definition: StringUtilities.h:131
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
beast::lexicalCastChecked
bool lexicalCastChecked(Out &out, In in)
Intelligently convert from one type to another.
Definition: LexicalCast.h:266
std::string::empty
T empty(T... args)
ripple::parsedURL::scheme
std::string scheme
Definition: StringUtilities.h:127
ripple::isProperlyFormedTomlDomain
bool isProperlyFormedTomlDomain(std::string const &domain)
Determines if the given string looks like a TOML-file hosting domain.
ripple::parsedURL::domain
std::string domain
Definition: StringUtilities.h:130
beast::IP::Endpoint::from_string_checked
static boost::optional< Endpoint > from_string_checked(std::string const &s)
Create an Endpoint from a string.
Definition: IPEndpoint.cpp:35