Improvements to HTTP parsing

This commit is contained in:
Vinnie Falco
2014-06-26 16:56:27 -07:00
parent 0a93f8db22
commit 0c5c901222
6 changed files with 442 additions and 51 deletions

View File

@@ -24,6 +24,7 @@
#include <beast/http/impl/basic_url.cpp>
#include <beast/http/impl/get.cpp>
#include <beast/http/impl/joyent_parser.cpp>
#include <beast/http/impl/message_parser.cpp>
#include <beast/http/impl/ParsedURL.cpp>
#include <beast/http/impl/raw_parser.cpp>
#include <beast/http/impl/URL.cpp>
@@ -32,3 +33,4 @@
#include <beast/http/tests/client_session.test.cpp>
#include <beast/http/tests/ParsedURL.cpp>
#include <beast/http/tests/urls_large_data.cpp>

View File

@@ -18,9 +18,7 @@
//==============================================================================
#include <beast/http/impl/joyent_parser.h>
#include <beast/http/basic_message.h>
#include <beast/http/method.h>
#include <boost/system/error_code.hpp>
namespace beast {
@@ -60,50 +58,50 @@ struct is_error_condition_enum <beast::joyent::http_errno>
namespace beast {
namespace joyent {
http::method::methodc_t
http::method_t
convert_http_method (joyent::http_method m)
{
switch (m)
{
case HTTP_DELETE: return http::method::http_delete;
case HTTP_GET: return http::method::http_get;
case HTTP_HEAD: return http::method::http_head;
case HTTP_POST: return http::method::http_post;
case HTTP_PUT: return http::method::http_put;
case HTTP_DELETE: return http::method_t::http_delete;
case HTTP_GET: return http::method_t::http_get;
case HTTP_HEAD: return http::method_t::http_head;
case HTTP_POST: return http::method_t::http_post;
case HTTP_PUT: return http::method_t::http_put;
// pathological
case HTTP_CONNECT: return http::method::http_connect;
case HTTP_OPTIONS: return http::method::http_options;
case HTTP_TRACE: return http::method::http_trace;
case HTTP_CONNECT: return http::method_t::http_connect;
case HTTP_OPTIONS: return http::method_t::http_options;
case HTTP_TRACE: return http::method_t::http_trace;
// webdav
case HTTP_COPY: return http::method::http_copy;
case HTTP_LOCK: return http::method::http_lock;
case HTTP_MKCOL: return http::method::http_mkcol;
case HTTP_MOVE: return http::method::http_move;
case HTTP_PROPFIND: return http::method::http_propfind;
case HTTP_PROPPATCH: return http::method::http_proppatch;
case HTTP_SEARCH: return http::method::http_search;
case HTTP_UNLOCK: return http::method::http_unlock;
case HTTP_COPY: return http::method_t::http_copy;
case HTTP_LOCK: return http::method_t::http_lock;
case HTTP_MKCOL: return http::method_t::http_mkcol;
case HTTP_MOVE: return http::method_t::http_move;
case HTTP_PROPFIND: return http::method_t::http_propfind;
case HTTP_PROPPATCH: return http::method_t::http_proppatch;
case HTTP_SEARCH: return http::method_t::http_search;
case HTTP_UNLOCK: return http::method_t::http_unlock;
// subversion
case HTTP_REPORT: return http::method::http_report;
case HTTP_MKACTIVITY: return http::method::http_mkactivity;
case HTTP_CHECKOUT: return http::method::http_checkout;
case HTTP_MERGE: return http::method::http_merge;
case HTTP_REPORT: return http::method_t::http_report;
case HTTP_MKACTIVITY: return http::method_t::http_mkactivity;
case HTTP_CHECKOUT: return http::method_t::http_checkout;
case HTTP_MERGE: return http::method_t::http_merge;
// upnp
case HTTP_MSEARCH: return http::method::http_msearch;
case HTTP_NOTIFY: return http::method::http_notify;
case HTTP_SUBSCRIBE: return http::method::http_subscribe;
case HTTP_UNSUBSCRIBE: return http::method::http_unsubscribe;
case HTTP_MSEARCH: return http::method_t::http_msearch;
case HTTP_NOTIFY: return http::method_t::http_notify;
case HTTP_SUBSCRIBE: return http::method_t::http_subscribe;
case HTTP_UNSUBSCRIBE: return http::method_t::http_unsubscribe;
// RFC-5789
case HTTP_PATCH: return http::method::http_patch;
case HTTP_PURGE: return http::method::http_purge;
case HTTP_PATCH: return http::method_t::http_patch;
case HTTP_PURGE: return http::method_t::http_purge;
};
return http::method::http_get;
return http::method_t::http_get;
}
boost::system::error_code

View File

@@ -20,7 +20,7 @@
#ifndef BEAST_HTTP_JOYENT_PARSER_H_INCLUDED
#define BEAST_HTTP_JOYENT_PARSER_H_INCLUDED
#include <beast/http/basic_message.h>
#include <beast/http/method.h>
// TODO Use <system_error>
#include <boost/system/error_code.hpp>
@@ -32,7 +32,7 @@ namespace joyent {
#include <beast/http/impl/http-parser/http_parser.h>
http::method::methodc_t
http::method_t
convert_http_method (joyent::http_method m);
boost::system::error_code

View File

@@ -0,0 +1,231 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2013, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include <beast/http/message_parser.h>
#include <beast/http/impl/joyent_parser.h>
namespace beast {
namespace http {
message_parser::message_parser (bool request)
: complete_ (false)
, checked_url_ (false)
{
static_assert (sizeof(joyent::http_parser) == sizeof(state_t),
"state_t size must match http_parser size");
static_assert (sizeof(joyent::http_parser_settings) == sizeof(hooks_t),
"hooks_t size must match http_parser_settings size");
auto s (reinterpret_cast <joyent::http_parser*> (&state_));
s->data = this;
auto h (reinterpret_cast <joyent::http_parser_settings*> (&hooks_));
h->on_message_begin = &message_parser::cb_message_start;
h->on_url = &message_parser::cb_url;
h->on_status = &message_parser::cb_status;
h->on_header_field = &message_parser::cb_header_field;
h->on_header_value = &message_parser::cb_header_value;
h->on_headers_complete = &message_parser::cb_headers_done;
h->on_body = &message_parser::cb_body;
h->on_message_complete = &message_parser::cb_message_complete;
joyent::http_parser_init (s, request
? joyent::http_parser_type::HTTP_REQUEST
: joyent::http_parser_type::HTTP_RESPONSE);
}
std::pair <message_parser::error_code, std::size_t>
message_parser::write_one (void const* in, std::size_t bytes)
{
std::pair <error_code, std::size_t> result (error_code(), 0);
auto s (reinterpret_cast <joyent::http_parser*> (&state_));
auto h (reinterpret_cast <joyent::http_parser_settings const*> (&hooks_));
result.second = joyent::http_parser_execute (s, h,
static_cast <const char*> (in), bytes);
result.first = ec_;
return result;
}
//------------------------------------------------------------------------------
int
message_parser::check_url()
{
if (! checked_url_)
{
checked_url_ = true;
auto const p (reinterpret_cast <joyent::http_parser const*> (&state_));
ec_ = on_request (joyent::convert_http_method (
joyent::http_method(p->method)), p->http_major, p->http_minor, url_);
if (ec_)
return 1;
}
return 0;
}
int
message_parser::do_message_start ()
{
return ec_ ? 1 : 0;
}
int
message_parser::do_url (char const* in, std::size_t bytes)
{
url_.append (static_cast <char const*> (in), bytes);
return 0;
}
int
message_parser::do_status (char const* in, std::size_t bytes)
{
auto const p (reinterpret_cast <joyent::http_parser const*> (&state_));
return ec_ ? 1 : 0;
}
int
message_parser::do_header_field (char const* in, std::size_t bytes)
{
if (check_url())
return 1;
if (! value_.empty())
{
ec_ = on_field (field_, value_);
if (ec_)
return 1;
field_.clear();
value_.clear();
}
field_.append (static_cast <char const*> (in), bytes);
return 0;
}
int
message_parser::do_header_value (char const* in, std::size_t bytes)
{
value_.append (static_cast <char const*> (in), bytes);
return 0;
}
// Returning 1 from here tells the joyent parser
// that the message has no body (e.g. a HEAD request).
//
int
message_parser::do_headers_done ()
{
if (check_url())
return 1;
auto const p (reinterpret_cast <joyent::http_parser const*> (&state_));
bool const keep_alive (joyent::http_should_keep_alive (p) != 0);
if (! value_.empty())
{
ec_ = on_field (field_, value_);
if (ec_)
return 1;
field_.clear();
value_.clear();
}
return ec_ ? 1 : 0;
}
int
message_parser::do_body (char const* in, std::size_t bytes)
{
auto const p (reinterpret_cast <joyent::http_parser const*> (&state_));
bool const is_final (
joyent::http_body_is_final (p) != 0);
return ec_ ? 1 : 0;
}
int
message_parser::do_message_complete ()
{
auto const p (reinterpret_cast <joyent::http_parser const*> (&state_));
bool const keep_alive (joyent::http_should_keep_alive (p) != 0);
complete_ = true;
return 0;
}
//------------------------------------------------------------------------------
int
message_parser::cb_message_start (joyent::http_parser* p)
{
return reinterpret_cast <message_parser*> (
p->data)->do_message_start();
}
int
message_parser::cb_url (joyent::http_parser* p,
char const* in, std::size_t bytes)
{
return reinterpret_cast <message_parser*> (
p->data)->do_url (in, bytes);
}
int
message_parser::cb_status (joyent::http_parser* p,
char const* in, std::size_t bytes)
{
return reinterpret_cast <message_parser*> (
p->data)->do_status (in, bytes);
}
int
message_parser::cb_header_field (joyent::http_parser* p,
char const* in, std::size_t bytes)
{
return reinterpret_cast <message_parser*> (
p->data)->do_header_field (in, bytes);
}
int
message_parser::cb_header_value (joyent::http_parser* p,
char const* in, std::size_t bytes)
{
return reinterpret_cast <message_parser*> (
p->data)->do_header_value (in, bytes);
}
int
message_parser::cb_headers_done (joyent::http_parser* p)
{
return reinterpret_cast <message_parser*> (
p->data)->do_headers_done();
}
int
message_parser::cb_body (joyent::http_parser* p,
char const* in, std::size_t bytes)
{
return reinterpret_cast <message_parser*> (
p->data)->do_body (
in, bytes);
}
int
message_parser::cb_message_complete (joyent::http_parser* p)
{
return reinterpret_cast <message_parser*> (
p->data)->do_message_complete();
}
} // http
} // beast

175
beast/http/message_parser.h Normal file
View File

@@ -0,0 +1,175 @@
//------------------------------------------------------------------------------
/*
This file is part of Beast: https://github.com/vinniefalco/Beast
Copyright 2013, Vinnie Falco <vinnie.falco@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#ifndef BEAST_HTTP_MESSAGE_PARSER_H_INCLUDED
#define BEAST_HTTP_MESSAGE_PARSER_H_INCLUDED
#include <beast/http/method.h>
#include <boost/system/error_code.hpp>
#include <array>
#include <cstdint>
#include <memory>
#include <string>
namespace beast {
namespace joyent {
struct http_parser;
};
namespace http {
class message_parser
{
public:
typedef boost::system::error_code error_code;
private:
// These structures must exactly match the
// declarations in joyent http_parser.h include
//
struct state_t
{
unsigned int type : 2;
unsigned int flags : 6;
unsigned int state : 8;
unsigned int header_state : 8;
unsigned int index : 8;
std::uint32_t nread;
std::uint64_t content_length;
unsigned short http_major;
unsigned short http_minor;
unsigned int status_code : 16;
unsigned int method : 8;
unsigned int http_errno : 7;
unsigned int upgrade : 1;
void *data;
};
typedef int (*data_cb_t) (
state_t*, const char *at, size_t length);
typedef int (*cb_t) (state_t*);
struct hooks_t
{
cb_t on_message_begin;
data_cb_t on_url;
data_cb_t on_status;
data_cb_t on_header_field;
data_cb_t on_header_value;
cb_t on_headers_complete;
data_cb_t on_body;
cb_t on_message_complete;
};
error_code ec_;
char state_ [sizeof(state_t)];
char hooks_ [sizeof(hooks_t)];
bool complete_;
std::string url_;
bool checked_url_;
std::string field_;
std::string value_;
protected:
/** Construct the parser.
If `request` is `true` this sets up the parser to
process an HTTP request.
*/
explicit
message_parser (bool request);
public:
/** Returns `true` if parsing is complete.
This is only defined when no errors have been returned.
*/
bool
complete() const
{
return complete_;
}
/** Write data to the parser.
The return value includes the error code if any,
and the number of bytes consumed in the input sequence.
*/
std::pair <error_code, std::size_t>
write_one (void const* in, std::size_t bytes);
template <class ConstBuffer>
std::pair <error_code, std::size_t>
write_one (ConstBuffer const& buffer)
{
return write_one (boost::asio::buffer_cast <void const*> (buffer),
boost::asio::buffer_size (buffer));
}
template <class ConstBufferSequence>
std::pair <error_code, std::size_t>
write (ConstBufferSequence const& buffers)
{
std::pair <error_code, std::size_t> result (error_code(), 0);
for (auto const& buffer : buffers)
{
std::size_t bytes_consumed;
std::tie (result.first, bytes_consumed) = write_one (buffer);
if (result.first)
break;
result.second += bytes_consumed;
}
return result;
}
protected:
virtual
error_code
on_request (method_t method, int http_major,
int http_minor, std::string const& url) = 0;
virtual
error_code
on_field (std::string const& field, std::string const& value) = 0;
private:
int check_url();
int do_message_start ();
int do_url (char const* in, std::size_t bytes);
int do_status (char const* in, std::size_t bytes);
int do_header_field (char const* in, std::size_t bytes);
int do_header_value (char const* in, std::size_t bytes);
int do_headers_done ();
int do_body (char const* in, std::size_t bytes);
int do_message_complete ();
static int cb_message_start (joyent::http_parser*);
static int cb_url (joyent::http_parser*, char const*, std::size_t);
static int cb_status (joyent::http_parser*, char const*, std::size_t);
static int cb_header_field (joyent::http_parser*, char const*, std::size_t);
static int cb_header_value (joyent::http_parser*, char const*, std::size_t);
static int cb_headers_done (joyent::http_parser*);
static int cb_body (joyent::http_parser*, char const*, std::size_t);
static int cb_message_complete (joyent::http_parser*);
};
} // http
} // beast
#endif

View File

@@ -17,16 +17,15 @@
*/
//==============================================================================
#ifndef BEAST_HTTP_BASIC_MESSAGE_H_INCLUDED
#define BEAST_HTTP_BASIC_MESSAGE_H_INCLUDED
#ifndef BEAST_HTTP_METHOD_H_INCLUDED
#define BEAST_HTTP_METHOD_H_INCLUDED
#include <memory>
namespace beast {
namespace http {
namespace method {
enum methodc_t
enum class method_t
{
http_delete,
http_get,
@@ -65,20 +64,6 @@ enum methodc_t
http_patch,
http_purge
};
} // method
class basic_message
{
private:
public:
};
class basic_request
{
public:
};
}
}