New header-only basic_parser:

The basic_parser is rewritten to be header-only. The nodejs parser is
removed from the include subtree and placed into the test directory.
Other changes:

* Parser specific error codes in parse_error.hpp
* Add parser-bench performance testing, nodejs vs beast
* New random message generator for fuzz tests
* Test for header-only parser using random message generator
* Augmented some existing message tests to check more cases
This commit is contained in:
Vinnie Falco
2016-04-25 05:27:34 -04:00
parent 4cfa1d5cd3
commit 61a8f7f078
32 changed files with 4380 additions and 870 deletions

View File

@@ -8,150 +8,355 @@
#ifndef BEAST_HTTP_BASIC_PARSER_HPP
#define BEAST_HTTP_BASIC_PARSER_HPP
#include <beast/http/method.hpp>
#include <beast/http/impl/http_parser.h>
#include <beast/http/message.hpp>
#include <beast/http/parse_error.hpp>
#include <beast/http/rfc7230.hpp>
#include <beast/http/detail/basic_parser.hpp>
#include <beast/type_check.hpp>
#include <boost/asio/buffer.hpp>
#include <boost/system/error_code.hpp>
#include <beast/detail/ci_char_traits.hpp>
#include <array>
#include <cassert>
#include <climits>
#include <cstdint>
#include <string>
#include <type_traits>
namespace beast {
namespace http {
namespace parse_flag {
enum values
{
chunked = 1 << 0,
connection_keep_alive = 1 << 1,
connection_close = 1 << 2,
connection_upgrade = 1 << 3,
trailing = 1 << 4,
upgrade = 1 << 5,
skipbody = 1 << 6,
contentlength = 1 << 7
};
} // parse_flag
/** Parser for producing HTTP requests and responses.
Callbacks:
During parsing, callbacks will be made to the derived class
if those members are present (detected through SFINAE). The
signatures which can be present in the derived class are:<br>
If a is an object of type Derived, and the call expression is
valid then the stated effects will take place:
@li `void on_method(boost::string_ref const&, error_code& ec)`
a.on_start()
Called for each piece of the Request-Method
Called once when a new message begins.
@li `void on_uri(boost::string_ref const&, error_code& ec)`
a.on_field(std::string field, std::string value)
Called for each piece of the Request-URI
Called for each field
@li `void on_reason(boost::string_ref const&, error_code& ec)`
a.on_headers_complete(error_code&)
Called for each piece of the reason-phrase
Called when all the header fields have been received, but
before any part of the body if any is received.
@li `void on_request(error_code& ec)`
a.on_request(method_t method, std::string url,
int major, int minor, bool keep_alive, bool upgrade)
Called after the entire Request-Line has been parsed successfully.
Called for requests when all the headers have been received.
This will precede any content body.
@li `void on_response(error_code& ec)`
When keep_alive is false:
* Server roles respond with a "Connection: close" header.
* Client roles close the connection.
Called after the entire Response-Line has been parsed successfully.
a.on_response(int status, std::string text,
int major, int minor, bool keep_alive,
bool upgrade)
@li `void on_field(boost::string_ref const&, error_code& ec)`
Called for responses when all the headers have been received.
This will precede any content body.
Called for each piece of the current header field.
When keep_alive is `false`:
* Client roles close the connection.
* Server roles respond with a "Connection: close" header.
@li `void on_value(boost::string_ref const&, error_code& ec)`
This function should return `true` if upgrade is false and
a content body is expected. When upgrade is true, no
content-body is expected, and the return value is ignored.
Called for each piece of the current header value.
a.on_body(void const* data, std::size_t bytes, error_code&)
@li `int on_headers(error_code& ec)`
Called zero or more times for the content body. Any transfer
encoding is already decoded in the memory pointed to by data.
Called when all the headers have been parsed successfully.
a.on_complete()
@li `void on_body(boost::string_ref const&, error_code& ec)`
Called when parsing completes successfully.
Called for each piece of the body. If the headers indicated
chunked encoding, the chunk encoding is removed from the
buffer before being passed to the callback.
@li `void on_complete(error_code& ec)`
Called when the entire message has been parsed successfully.
At this point, basic_parser::complete() returns `true`, and
the parser is ready to parse another message if keep_alive()
would return `true`.
The return value of `on_headers` is special, it controls whether
or not the parser should expect a body. These are the return values:
@li *0* The parser should expect a body
@li *1* The parser should skip the body. For example, this is
used when sending a response to a HEAD request.
@li *2* The parser should skip ths body, this is an
upgrade to a different protocol.
The parser uses traits to determine if the callback is possible.
If the Derived type omits the callbacks, they are simply skipped
with no compilation error.
If the Derived type omits one or more callbacks, they are simply
skipped with no compilation error. The default behavior of on_body
when the derived class does not provide the member, is to specify that
the body should not be skipped.
If a callback sets an error, parsing stops at the current octet
and the error is returned to the caller.
*/
/*
VFALCO TODO is_call_possible, enable_if_t on Derived calls
use boost::string_ref instead of std::string
*/
template<class Derived>
template<bool isRequest, class Derived>
class basic_parser
{
http_parser state_;
boost::system::error_code* ec_;
bool complete_ = false;
std::string url_;
std::string status_;
std::string field_;
std::string value_;
private:
using self = basic_parser;
typedef void(self::*pmf_t)(error_code&, boost::string_ref const&);
static std::uint64_t constexpr no_content_length =
std::numeric_limits<std::uint64_t>::max();
enum state : std::uint8_t
{
s_closed = 1,
s_req_start,
s_req_method_start,
s_req_method,
s_req_space_before_url,
s_req_url_start,
s_req_url,
s_req_http_start,
s_req_http_H,
s_req_http_HT,
s_req_http_HTT,
s_req_http_HTTP,
s_req_major_start,
s_req_major,
s_req_minor_start,
s_req_minor,
s_req_line_end,
s_res_start,
s_res_H,
s_res_HT,
s_res_HTT,
s_res_HTTP,
s_res_major_start,
s_res_major,
s_res_minor_start,
s_res_minor,
s_res_status_code_start,
s_res_status_code,
s_res_status_start,
s_res_status,
s_res_line_almost_done,
s_res_line_done,
s_header_field_start,
s_header_field,
s_header_value_start,
s_header_value_discard_lWs0,
s_header_value_discard_ws0,
s_header_value_almost_done0,
s_header_value_text_start,
s_header_value_discard_lWs,
s_header_value_discard_ws,
s_header_value_text,
s_header_value_almost_done,
s_headers_almost_done,
s_headers_done,
s_chunk_size_start,
s_chunk_size,
s_chunk_parameters,
s_chunk_size_almost_done,
// states below do not count towards
// the limit on the size of the message
s_body_identity0,
s_body_identity,
s_body_identity_eof0,
s_body_identity_eof,
s_chunk_data_start,
s_chunk_data,
s_chunk_data_almost_done,
s_chunk_data_done,
s_complete,
s_restart
};
enum field_state : std::uint8_t
{
h_general = 0,
h_C,
h_CO,
h_CON,
h_matching_connection,
h_matching_proxy_connection,
h_matching_content_length,
h_matching_transfer_encoding,
h_matching_upgrade,
h_connection,
h_content_length,
h_transfer_encoding,
h_upgrade,
h_matching_transfer_encoding_chunked,
h_matching_connection_token_start,
h_matching_connection_keep_alive,
h_matching_connection_close,
h_matching_connection_upgrade,
h_matching_connection_token,
h_transfer_encoding_chunked,
h_connection_keep_alive,
h_connection_close,
h_connection_upgrade,
};
std::uint64_t content_length_;
std::uint64_t nread_;
pmf_t cb_;
state s_ : 8;
unsigned flags_ : 8;
unsigned fs_ : 8;
unsigned pos_ : 8; // position in field state
unsigned http_major_ : 16;
unsigned http_minor_ : 16;
unsigned status_code_ : 16;
bool upgrade_ : 1; // true if parser exited for upgrade
public:
using error_code = boost::system::error_code;
/// Copy constructor.
basic_parser(basic_parser const&) = default;
/** Move constructor.
/// Copy assignment.
basic_parser& operator=(basic_parser const&) = default;
The state of the moved-from object is undefined,
but safe to destroy.
*/
basic_parser(basic_parser&& other);
/// Constructor
basic_parser()
{
init(std::integral_constant<bool, isRequest>{});
}
/** Move assignment.
/// Returns internal flags associated with the parser.
unsigned
flags() const
{
return flags_;
}
The state of the moved-from object is undefined,
but safe to destroy.
*/
basic_parser&
operator=(basic_parser&& other);
/** Returns `true` if the message end is indicated by eof.
/** Copy constructor. */
basic_parser(basic_parser const& other);
This function returns true if the semantics of the message require
that the end of the message is signaled by an end of file. For
example, if the message is a HTTP/1.0 message and the Content-Length
is unspecified, the end of the message is indicated by an end of file.
/** Copy assignment. */
basic_parser& operator=(basic_parser const& other);
/** Construct the parser.
@param request If `true`, the parser is setup for a request.
*/
explicit
basic_parser(bool request) noexcept;
/** Returns `true` if parsing is complete.
This is only defined when no errors have been returned.
@return `true` if write_eof must be used to indicate the message end.
*/
bool
complete() const noexcept
needs_eof() const
{
return complete_;
return needs_eof(
std::integral_constant<bool, isRequest>{});
}
/** Returns the major HTTP version number.
Examples:
* Returns 1 for HTTP/1.1
* Returns 1 for HTTP/1.0
@return The HTTP major version number.
*/
unsigned
http_major() const
{
return http_major_;
}
/** Returns the minor HTTP version number.
Examples:
* Returns 1 for HTTP/1.1
* Returns 0 for HTTP/1.0
@return The HTTP minor version number.
*/
unsigned
http_minor() const
{
return http_minor_;
}
/** Returns `true` if the message is an upgrade message.
A value of `true` indicates that the parser has successfully
completed parsing a HTTP upgrade message.
@return `true` if the message is an upgrade message.
*/
bool
upgrade() const
{
return upgrade_;
}
/** Returns the numeric HTTP Status-Code of a response.
@return The Status-Code.
*/
unsigned
status_code() const
{
return status_code_;
}
/** Returns `true` if the connection should be kept open.
@note This function is only valid to call when the parser
is complete.
*/
bool
keep_alive() const;
/** Returns `true` if the parse has completed succesfully.
When the parse has completed successfully, and the semantics
of the parsed message indicate that the connection is still
active, a subsequent call to `write` will begin parsing a
new message.
@return `true` If the parsing has completed successfully.
*/
bool
complete() const
{
return s_ == s_restart;
}
/** Write data to the parser.
@param data A pointer to a buffer representing the input sequence.
@param size The number of bytes in the buffer pointed to by data.
@param buffers An object meeting the requirements of
ConstBufferSequence that represents the input sequence.
@throws boost::system::system_error Thrown on failure.
@param ec Set to the error, if any error occurred.
@return The number of bytes consumed in the input sequence.
*/
template<class ConstBufferSequence>
std::size_t
write(void const* data, std::size_t size)
{
error_code ec;
auto const used = write(data, size, ec);
if(ec)
throw boost::system::system_error{ec};
return used;
}
write(ConstBufferSequence const& buffers, error_code& ec);
/** Write data to the parser.
@@ -162,41 +367,7 @@ public:
@return The number of bytes consumed in the input sequence.
*/
std::size_t
write(void const* data, std::size_t size,
error_code& ec);
/** Write data to the parser.
@param buffers An object meeting the requirements of
ConstBufferSequence that represents the input sequence.
@throws boost::system::system_error Thrown on failure.
@return The number of bytes consumed in the input sequence.
*/
template<class ConstBufferSequence>
std::size_t
write(ConstBufferSequence const& buffers)
{
error_code ec;
auto const used = write(buffers, ec);
if(ec)
throw boost::system::system_error{ec};
return used;
}
/** Write data to the parser.
@param buffers An object meeting the requirements of
ConstBufferSequence that represents the input sequence.
@param ec Set to the error, if any error occurred.
@return The number of bytes consumed in the input sequence.
*/
template<class ConstBufferSequence>
std::size_t
write(ConstBufferSequence const& buffers,
error_code& ec);
write(void const* data, std::size_t size, error_code& ec);
/** Called to indicate the end of file.
@@ -210,26 +381,6 @@ public:
@throws boost::system::system_error Thrown on failure.
*/
void
write_eof()
{
error_code ec;
write_eof(ec);
if(ec)
throw boost::system::system_error{ec};
}
/** Called to indicate the end of file.
HTTP needs to know where the end of the stream is. For example,
sometimes servers send responses without Content-Length and
expect the client to consume input (for the body) until EOF.
Callbacks and errors will still be processed as usual.
@note This is typically called when a socket read returns eof.
@param ec Set to the error, if any error occurred.
*/
void
write_eof(error_code& ec);
private:
@@ -239,299 +390,182 @@ private:
return *static_cast<Derived*>(this);
}
template<class C>
class has_on_start_t
{
template<class T, class R =
decltype(std::declval<T>().on_start(), std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_start =
std::integral_constant<bool, has_on_start_t<C>::value>;
void
call_on_start(std::true_type)
init(std::true_type)
{
impl().on_start();
s_ = s_req_start;
}
void
call_on_start(std::false_type)
init(std::false_type)
{
}
template<class C>
class has_on_field_t
{
template<class T, class R =
decltype(std::declval<T>().on_field(
std::declval<std::string const&>(),
std::declval<std::string const&>()),
std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_field =
std::integral_constant<bool, has_on_field_t<C>::value>;
void
call_on_field(std::string const& field,
std::string const& value, std::true_type)
{
impl().on_field(field, value);
}
void
call_on_field(std::string const&, std::string const&,
std::false_type)
{
}
template<class C>
class has_on_headers_complete_t
{
template<class T, class R =
decltype(std::declval<T>().on_headers_complete(
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_headers_complete =
std::integral_constant<bool, has_on_headers_complete_t<C>::value>;
void
call_on_headers_complete(error_code& ec, std::true_type)
{
impl().on_headers_complete(ec);
}
void
call_on_headers_complete(error_code&, std::false_type)
{
}
template<class C>
class has_on_request_t
{
template<class T, class R =
decltype(std::declval<T>().on_request(
std::declval<method_t>(), std::declval<std::string>(),
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_request =
std::integral_constant<bool, has_on_request_t<C>::value>;
void
call_on_request(method_t method, std::string url,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
impl().on_request(
method, url, major, minor, keep_alive, upgrade);
}
void
call_on_request(method_t, std::string, int, int, bool, bool,
std::false_type)
{
}
template<class C>
class has_on_response_t
{
template<class T, class R =
decltype(std::declval<T>().on_response(
std::declval<int>(), std::declval<std::string>,
std::declval<int>(), std::declval<int>(),
std::declval<bool>(), std::declval<bool>()),
std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
#if 0
using type = decltype(check<C>(0));
#else
// VFALCO Trait seems broken for http::parser
using type = std::true_type;
#endif
public:
static bool const value = type::value;
};
template<class C>
using has_on_response =
std::integral_constant<bool, has_on_response_t<C>::value>;
bool
call_on_response(int status, std::string text,
int major, int minor, bool keep_alive, bool upgrade,
std::true_type)
{
return impl().on_response(
status, text, major, minor, keep_alive, upgrade);
s_ = s_res_start;
}
bool
call_on_response(int, std::string, int, int, bool, bool,
std::false_type)
needs_eof(std::true_type) const;
bool
needs_eof(std::false_type) const;
void call_on_method(error_code& ec,
boost::string_ref const& s, std::true_type)
{
// VFALCO Certainly incorrect
return true;
impl().on_method(s, ec);
}
template<class C>
class has_on_body_t
{
template<class T, class R =
decltype(std::declval<T>().on_body(
std::declval<void const*>(), std::declval<std::size_t>(),
std::declval<error_code&>()), std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_body =
std::integral_constant<bool, has_on_body_t<C>::value>;
void
call_on_body(void const* data, std::size_t bytes,
error_code& ec, std::true_type)
{
impl().on_body(data, bytes, ec);
}
void
call_on_body(void const*, std::size_t,
error_code&, std::false_type)
void call_on_method(error_code&,
boost::string_ref const&, std::false_type)
{
}
template<class C>
class has_on_complete_t
void call_on_method(error_code& ec,
boost::string_ref const& s)
{
template<class T, class R =
decltype(std::declval<T>().on_complete(), std::true_type{})>
static R check(int);
template <class>
static std::false_type check(...);
using type = decltype(check<C>(0));
public:
static bool const value = type::value;
};
template<class C>
using has_on_complete =
std::integral_constant<bool, has_on_complete_t<C>::value>;
void
call_on_complete(std::true_type)
{
impl().on_complete();
call_on_method(ec, s, std::integral_constant<bool,
isRequest && detail::has_on_method<Derived>::value>{});
}
void
call_on_complete(std::false_type)
void call_on_uri(error_code& ec,
boost::string_ref const& s, std::true_type)
{
impl().on_uri(s, ec);
}
void call_on_uri(error_code&,
boost::string_ref const&, std::false_type)
{
}
void
check_header();
static int cb_message_start(http_parser*);
static int cb_url(http_parser*, char const*, std::size_t);
static int cb_status(http_parser*, char const*, std::size_t);
static int cb_header_field(http_parser*, char const*, std::size_t);
static int cb_header_value(http_parser*, char const*, std::size_t);
static int cb_headers_complete(http_parser*);
static int cb_body(http_parser*, char const*, std::size_t);
static int cb_message_complete(http_parser*);
static int cb_chunk_header(http_parser*);
static int cb_chunk_complete(http_parser*);
struct hooks_t : http_parser_settings
void call_on_uri(error_code& ec, boost::string_ref const& s)
{
hooks_t()
{
http_parser_settings_init(this);
on_message_begin = &basic_parser::cb_message_start;
on_url = &basic_parser::cb_url;
on_status = &basic_parser::cb_status;
on_header_field = &basic_parser::cb_header_field;
on_header_value = &basic_parser::cb_header_value;
on_headers_complete = &basic_parser::cb_headers_complete;
on_body = &basic_parser::cb_body;
on_message_complete = &basic_parser::cb_message_complete;
on_chunk_header = &basic_parser::cb_chunk_header;
on_chunk_complete = &basic_parser::cb_chunk_complete;
}
};
call_on_uri(ec, s, std::integral_constant<bool,
isRequest && detail::has_on_uri<Derived>::value>{});
}
static
http_parser_settings const*
hooks();
void call_on_reason(error_code& ec,
boost::string_ref const& s, std::true_type)
{
impl().on_reason(s, ec);
}
void call_on_reason(error_code&,
boost::string_ref const&, std::false_type)
{
}
void call_on_reason(error_code& ec, boost::string_ref const& s)
{
call_on_reason(ec, s, std::integral_constant<bool,
! isRequest && detail::has_on_reason<Derived>::value>{});
}
void call_on_request(error_code& ec, std::true_type)
{
impl().on_request(ec);
}
void call_on_request(error_code&, std::false_type)
{
}
void call_on_request(error_code& ec)
{
call_on_request(ec, detail::has_on_request<Derived>{});
}
void call_on_response(error_code& ec, std::true_type)
{
impl().on_response(ec);
}
void call_on_response(error_code&, std::false_type)
{
}
void call_on_response(error_code& ec)
{
call_on_response(ec, detail::has_on_response<Derived>{});
}
void call_on_field(error_code& ec,
boost::string_ref const& s, std::true_type)
{
impl().on_field(s, ec);
}
void call_on_field(error_code&,
boost::string_ref const&, std::false_type)
{
}
void call_on_field(error_code& ec, boost::string_ref const& s)
{
call_on_field(ec, s, detail::has_on_field<Derived>{});
}
void call_on_value(error_code& ec,
boost::string_ref const& s, std::true_type)
{
impl().on_value(s, ec);
}
void call_on_value(error_code&,
boost::string_ref const&, std::false_type)
{
}
void call_on_value(error_code& ec, boost::string_ref const& s)
{
call_on_value(ec, s, detail::has_on_value<Derived>{});
}
int call_on_headers(error_code& ec, std::true_type)
{
return impl().on_headers(ec);
}
int call_on_headers(error_code& ec, std::false_type)
{
return 0;
}
int call_on_headers(error_code& ec)
{
return call_on_headers(ec, detail::has_on_headers<Derived>{});
}
void call_on_body(error_code& ec,
boost::string_ref const& s, std::true_type)
{
impl().on_body(s, ec);
}
void call_on_body(error_code&,
boost::string_ref const&, std::false_type)
{
}
void call_on_body(error_code& ec, boost::string_ref const& s)
{
call_on_body(ec, s, detail::has_on_body<Derived>{});
}
void call_on_complete(error_code& ec, std::true_type)
{
impl().on_complete(ec);
}
void call_on_complete(error_code&, std::false_type)
{
}
void call_on_complete(error_code& ec)
{
call_on_complete(ec, detail::has_on_complete<Derived>{});
}
};
template<class Derived>
template<class ConstBufferSequence>
std::size_t
basic_parser<Derived>::write(
ConstBufferSequence const& buffers, error_code& ec)
{
static_assert(beast::is_ConstBufferSequence<
ConstBufferSequence>::value,
"ConstBufferSequence requirements not met");
using boost::asio::buffer_cast;
using boost::asio::buffer_size;
std::size_t bytes_used = 0;
for (auto const& buffer : buffers)
{
auto const n = write(
buffer_cast<void const*>(buffer),
buffer_size(buffer), ec);
if(ec)
return 0;
bytes_used += n;
if(complete())
break;
}
return bytes_used;
}
template<class Derived>
http_parser_settings const*
basic_parser<Derived>::hooks()
{
static hooks_t const h;
return &h;
}
} // http
} // beast