move utf8 checking to header file

This commit is contained in:
Richard Holland
2023-12-18 13:56:11 +00:00
parent 9215b87511
commit b42be37d78
2 changed files with 51 additions and 51 deletions

View File

@@ -103,57 +103,7 @@ URIToken::preflight(PreflightContext const& ctx)
return temMALFORMED;
}
if (!([](std::vector<uint8_t> const& u) -> bool {
// this code is from
// https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
uint8_t const* s = (uint8_t const*)u.data();
uint8_t const* end = s + u.size();
while (s < end)
{
if (*s < 0x80)
/* 0xxxxxxx */
s++;
else if ((s[0] & 0xe0) == 0xc0)
{
/* 110XXXXx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 ||
(s[0] & 0xfe) == 0xc0) /* overlong? */
return false;
else
s += 2;
}
else if ((s[0] & 0xf0) == 0xe0)
{
/* 1110XXXX 10Xxxxxx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 ||
(s[0] == 0xe0 &&
(s[1] & 0xe0) == 0x80) || /* overlong? */
(s[0] == 0xed &&
(s[1] & 0xe0) == 0xa0) || /* surrogate? */
(s[0] == 0xef && s[1] == 0xbf &&
(s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */
return false;
else
s += 3;
}
else if ((s[0] & 0xf8) == 0xf0)
{
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 ||
(s[3] & 0xc0) != 0x80 ||
(s[0] == 0xf0 &&
(s[1] & 0xf0) == 0x80) || /* overlong? */
(s[0] == 0xf4 && s[1] > 0x8f) ||
s[0] > 0xf4) /* > U+10FFFF? */
return false;
else
s += 4;
}
else
return false;
}
return true;
})(uri))
if (!validateUTF8(uri))
{
JLOG(ctx.j.warn()) << "Malformed transaction. URI must be a "
"valid utf-8 string.";

View File

@@ -30,6 +30,56 @@ namespace ripple {
class URIToken : public Transactor
{
public:
bool inline static validateUTF8(std::vector<uint8_t> const& u)
{
// this code is from
// https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
uint8_t const* s = (uint8_t const*)u.data();
uint8_t const* end = s + u.size();
while (s < end)
{
if (*s < 0x80)
/* 0xxxxxxx */
s++;
else if ((s[0] & 0xe0) == 0xc0)
{
/* 110XXXXx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 ||
(s[0] & 0xfe) == 0xc0) /* overlong? */
return false;
else
s += 2;
}
else if ((s[0] & 0xf0) == 0xe0)
{
/* 1110XXXX 10Xxxxxx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 ||
(s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */
(s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */
(s[0] == 0xef && s[1] == 0xbf &&
(s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */
return false;
else
s += 3;
}
else if ((s[0] & 0xf8) == 0xf0)
{
/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
if ((s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 ||
(s[3] & 0xc0) != 0x80 ||
(s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */
(s[0] == 0xf4 && s[1] > 0x8f) ||
s[0] > 0xf4) /* > U+10FFFF? */
return false;
else
s += 4;
}
else
return false;
}
return true;
}
static constexpr ConsequencesFactoryType ConsequencesFactory{Normal};
explicit URIToken(ApplyContext& ctx) : Transactor(ctx)