Refactor PeerFinder:

Previously, the PeerFinder manager constructed with a Callback object
provided by the owner which was used to perform operations like connecting,
disconnecting, and sending messages. This made it difficult to change the
overlay code because a single call into the PeerFinder could cause both
OverlayImpl and PeerImp to be re-entered one or more times, sometimes while
holding a recursive mutex. This change eliminates the callback by changing
PeerFinder functions to return values indicating the action the caller should
take.

As a result of this change the PeerFinder no longer needs its own dedicated
thread. OverlayImpl is changed to call into PeerFinder on a timer to perform
periodic activities. Furthermore the Checker class used to perform connectivity
checks has been refactored. It no longer uses an abstract base class, in order
to not type-erase the handler passed to async_connect (ensuring compatibility
with coroutines). To allow unit tests that don't need a network, the Logic
class is now templated on the Checker type. Currently the Manager provides its
own io_service. However, this can easily be changed so that the io_service is
provided upon construction.

Summary
* Remove unused SiteFiles dependency injection
* Remove Callback and update signatures for public APIs
* Remove obsolete functions
* Move timer to overlay
* Steps toward a shared io_service
* Templated, simplified Checker
* Tidy up Checker declaration
This commit is contained in:
Vinnie Falco
2014-10-07 18:00:14 -07:00
parent 5f59282ba1
commit 7c0c2419f7
18 changed files with 816 additions and 1120 deletions

View File

@@ -20,7 +20,6 @@
#include <ripple/overlay/impl/OverlayImpl.h>
#include <ripple/overlay/impl/PeerDoor.h>
#include <ripple/overlay/impl/PeerImp.h>
#include <beast/ByteOrder.h>
#if DOXYGEN
@@ -63,15 +62,12 @@ OverlayImpl::OverlayImpl (Stoppable& parent,
, m_child_count (1)
, m_journal (deprecatedLogs().journal("Overlay"))
, m_resourceManager (resourceManager)
, m_peerFinder (add (PeerFinder::Manager::New (
*this,
siteFiles,
pathToDbFileOrDirectory,
*this,
get_seconds_clock (),
deprecatedLogs().journal("PeerFinder"))))
, m_peerFinder (add (PeerFinder::Manager::New (*this,
pathToDbFileOrDirectory, get_seconds_clock (),
deprecatedLogs().journal("PeerFinder"))))
, m_io_service (io_service)
, m_ssl_context (ssl_context)
, timer_(io_service)
, m_resolver (resolver)
, m_nextShortId (0)
{
@@ -222,89 +218,6 @@ OverlayImpl::remove (PeerFinder::Slot::ptr const& slot)
release();
}
//--------------------------------------------------------------------------
//
// PeerFinder::Callback
//
//--------------------------------------------------------------------------
void
OverlayImpl::connect (std::vector <beast::IP::Endpoint> const& list)
{
for (std::vector <beast::IP::Endpoint>::const_iterator iter (list.begin());
iter != list.end(); ++iter)
connect (*iter);
}
void
OverlayImpl::activate (PeerFinder::Slot::ptr const& slot)
{
m_journal.trace <<
"Activate " << slot->remote_endpoint();
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
PeersBySlot::iterator const iter (m_peers.find (slot));
assert (iter != m_peers.end ());
PeerImp::ptr const peer (iter->second.lock());
assert (peer != nullptr);
peer->activate ();
}
void
OverlayImpl::send (PeerFinder::Slot::ptr const& slot,
std::vector <PeerFinder::Endpoint> const& endpoints)
{
typedef std::vector <PeerFinder::Endpoint> List;
protocol::TMEndpoints tm;
for (List::const_iterator iter (endpoints.begin());
iter != endpoints.end(); ++iter)
{
PeerFinder::Endpoint const& ep (*iter);
protocol::TMEndpoint& tme (*tm.add_endpoints());
if (ep.address.is_v4())
tme.mutable_ipv4()->set_ipv4(
beast::toNetworkByteOrder (ep.address.to_v4().value));
else
tme.mutable_ipv4()->set_ipv4(0);
tme.mutable_ipv4()->set_ipv4port (ep.address.port());
tme.set_hops (ep.hops);
}
tm.set_version (1);
Message::pointer msg (
std::make_shared <Message> (
tm, protocol::mtENDPOINTS));
{
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
PeersBySlot::iterator const iter (m_peers.find (slot));
assert (iter != m_peers.end ());
PeerImp::ptr const peer (iter->second.lock());
assert (peer != nullptr);
peer->send (msg);
}
}
void
OverlayImpl::disconnect (PeerFinder::Slot::ptr const& slot, bool graceful)
{
if (m_journal.trace) m_journal.trace <<
"Disconnect " << slot->remote_endpoint () <<
(graceful ? " gracefully" : "");
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
PeersBySlot::iterator const iter (m_peers.find (slot));
assert (iter != m_peers.end ());
PeerImp::ptr const peer (iter->second.lock());
assert (peer != nullptr);
peer->close (graceful);
//peer->detach ("disc", false);
}
//--------------------------------------------------------------------------
//
// Stoppable
@@ -405,6 +318,10 @@ OverlayImpl::onPrepare ()
void
OverlayImpl::onStart ()
{
// mutex not needed since we aren't running
++m_child_count;
boost::asio::spawn (m_io_service, std::bind (
&OverlayImpl::do_timer, this, std::placeholders::_1));
}
/** Close all peer connections.
@@ -423,13 +340,16 @@ OverlayImpl::close_all (bool graceful)
// ~PeerImp is pre-empted before it calls m_peers.remove()
//
if (peer != nullptr)
peer->close (graceful);
peer->close();
}
}
void
OverlayImpl::onStop ()
{
error_code ec;
timer_.cancel(ec);
if (m_doorDirect)
m_doorDirect->stop();
if (m_doorProxy)
@@ -467,7 +387,7 @@ OverlayImpl::onWrite (beast::PropertyStream::Map& stream)
are known.
*/
void
OverlayImpl::onPeerActivated (Peer::ptr const& peer)
OverlayImpl::activate (Peer::ptr const& peer)
{
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
@@ -561,6 +481,56 @@ OverlayImpl::findPeerByShortID (Peer::ShortId const& id)
//------------------------------------------------------------------------------
void
OverlayImpl::autoconnect()
{
auto const result = m_peerFinder->autoconnect();
for (auto addr : result)
connect (addr);
}
void
OverlayImpl::sendpeers()
{
auto const result = m_peerFinder->sendpeers();
for (auto const& e : result)
{
// VFALCO TODO Make sure this doesn't race with closing the peer
PeerImp::ptr peer;
{
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
PeersBySlot::iterator const iter = m_peers.find (e.first);
if (iter != m_peers.end())
peer = iter->second.lock();
}
if (peer)
peer->send_endpoints (e.second.begin(), e.second.end());
}
}
void
OverlayImpl::do_timer (yield_context yield)
{
for(;;)
{
m_peerFinder->once_per_second();
sendpeers();
autoconnect();
timer_.expires_from_now (std::chrono::seconds(1));
error_code ec;
timer_.async_wait (yield[ec]);
if (ec == boost::asio::error::operation_aborted)
break;
}
// Take off a reference
std::lock_guard <decltype(m_mutex)> lock (m_mutex);
release();
}
//------------------------------------------------------------------------------
std::unique_ptr <Overlay>
make_Overlay (
beast::Stoppable& parent,

View File

@@ -27,13 +27,14 @@
#include <ripple/common/UnorderedContainers.h>
#include <ripple/peerfinder/Manager.h>
#include <ripple/resource/api/Manager.h>
#include <boost/asio/ip/tcp.hpp>
#include <boost/asio/ssl/context.hpp>
#include <boost/asio/basic_waitable_timer.hpp>
#include <boost/asio/spawn.hpp>
#include <beast/cxx14/memory.h> // <memory>
#include <atomic>
#include <cassert>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <unordered_map>
@@ -43,12 +44,13 @@ namespace ripple {
class PeerDoor;
class PeerImp;
class OverlayImpl
: public Overlay
, public PeerFinder::Callback
class OverlayImpl : public Overlay
{
private:
typedef boost::asio::ip::tcp::socket socket_type;
using clock_type = std::chrono::steady_clock;
using socket_type = boost::asio::ip::tcp::socket;
using error_code = boost::system::error_code;
using yield_context = boost::asio::yield_context;
typedef hash_map <PeerFinder::Slot::ptr,
std::weak_ptr <PeerImp>> PeersBySlot;
@@ -57,6 +59,7 @@ private:
typedef hash_map <Peer::ShortId, Peer::ptr> PeerByShortId;
// VFALCO TODO Change to regular mutex and eliminate re-entrancy
std::recursive_mutex m_mutex;
// Blocks us until dependent objects have been destroyed
@@ -72,6 +75,7 @@ private:
boost::asio::io_service& m_io_service;
boost::asio::ssl::context& m_ssl_context;
boost::asio::basic_waitable_timer <clock_type> timer_;
/** Associates slots to peers. */
PeersBySlot m_peers;
@@ -149,23 +153,6 @@ public:
void
remove (PeerFinder::Slot::ptr const& slot);
//
// PeerFinder::Callback
//
void
connect (std::vector <beast::IP::Endpoint> const& list);
void
activate (PeerFinder::Slot::ptr const& slot);
void
send (PeerFinder::Slot::ptr const& slot,
std::vector <PeerFinder::Endpoint> const& endpoints);
void
disconnect (PeerFinder::Slot::ptr const& slot, bool graceful);
//
// Stoppable
//
@@ -205,7 +192,7 @@ public:
are known.
*/
void
onPeerActivated (Peer::ptr const& peer);
activate (Peer::ptr const& peer);
/** A peer is being disconnected
This is called during the disconnection of a known, activated peer. It
@@ -216,6 +203,14 @@ public:
onPeerDisconnect (Peer::ptr const& peer);
private:
void
sendpeers();
void
autoconnect();
void
do_timer (yield_context yield);
};
} // ripple

View File

@@ -21,6 +21,7 @@
#include <ripple/overlay/impl/PeerImp.h>
#include <ripple/overlay/impl/Tuning.h>
#include <beast/streams/debug_ostream.h>
#include <functional>
namespace ripple {
@@ -86,20 +87,15 @@ PeerImp::start ()
}
void
PeerImp::activate ()
PeerImp::close()
{
assert (state_ == stateHandshaked);
state_ = stateActive;
assert(shortId_ == 0);
shortId_ = overlay_.next_id();
overlay_.onPeerActivated(shared_from_this ());
}
if (! strand_.running_in_this_thread())
return strand_.post (std::bind (
&PeerImp::close, shared_from_this()));
void
PeerImp::close (bool graceful)
{
was_canceled_ = true;
detach ("stop", graceful);
error_code ec;
timer_.cancel (ec);
socket_->close(ec);
}
//------------------------------------------------------------------------------
@@ -309,10 +305,7 @@ void PeerImp::do_connect ()
usage_ = resourceManager_.newOutboundEndpoint (remote_address_);
if (usage_.disconnect ())
{
detach ("do_connect");
return;
}
return detach ("do_connect");
boost::system::error_code ec;
timer_.expires_from_now (nodeVerifySeconds, ec);
@@ -321,8 +314,7 @@ void PeerImp::do_connect ()
if (ec)
{
journal_.error << "Failed to set verify timer.";
detach ("do_connect");
return;
return detach ("do_connect");
}
socket_->next_layer <NativeSocketType>().async_connect (
@@ -347,15 +339,15 @@ PeerImp::on_connect (error_code ec)
journal_.error <<
"Connect to " << remote_address_ <<
" failed: " << ec.message();
detach ("hc");
return;
return detach ("hc");
}
assert (state_ == stateConnecting);
state_ = stateConnected;
peerFinder_.on_connected (slot_,
beast::IPAddressConversion::from_asio (local_endpoint));
if (! peerFinder_.connected (slot_,
beast::IPAddressConversion::from_asio (local_endpoint)))
return detach("dup");
socket_->set_verify_mode (boost::asio::ssl::verify_none);
socket_->async_handshake (
@@ -895,8 +887,6 @@ PeerImp::on_message (std::shared_ptr <protocol::TMHello> const& m)
{
error_code ec;
bool bDetach (true);
timer_.cancel ();
std::uint32_t const ourTime (getApp().getOPs ().getNetworkTimeNC ());
@@ -915,6 +905,8 @@ PeerImp::on_message (std::shared_ptr <protocol::TMHello> const& m)
auto protocol = BuildInfo::make_protocol(m->protoversion());
// VFALCO TODO Report these failures in the HTTP response
if (m->has_nettime () &&
((m->nettime () < minTime) || (m->nettime () > maxTime)))
{
@@ -974,46 +966,61 @@ PeerImp::on_message (std::shared_ptr <protocol::TMHello> const& m)
"Connected to cluster node " << name_;
assert (state_ == stateConnected);
// VFALCO TODO Remove this needless state
state_ = stateHandshaked;
peerFinder_.on_handshake (slot_, RipplePublicKey (publicKey_),
clusterNode_);
auto const result = peerFinder_.activate (slot_,
RipplePublicKey (publicKey_), clusterNode_);
// XXX Set timer: connection is in grace period to be useful.
// XXX Set timer: connection idle (idle may vary depending on connection type.)
if ((hello_.has_ledgerclosed ()) && (
hello_.ledgerclosed ().size () == (256 / 8)))
if (result == PeerFinder::Result::success)
{
memcpy (closedLedgerHash_.begin (),
hello_.ledgerclosed ().data (), 256 / 8);
state_ = stateActive;
assert(shortId_ == 0);
shortId_ = overlay_.next_id();
overlay_.activate(shared_from_this ());
if ((hello_.has_ledgerprevious ()) &&
(hello_.ledgerprevious ().size () == (256 / 8)))
// XXX Set timer: connection is in grace period to be useful.
// XXX Set timer: connection idle (idle may vary depending on connection type.)
if ((hello_.has_ledgerclosed ()) && (
hello_.ledgerclosed ().size () == (256 / 8)))
{
memcpy (previousLedgerHash_.begin (),
hello_.ledgerprevious ().data (), 256 / 8);
addLedger (previousLedgerHash_);
}
else
{
previousLedgerHash_.zero ();
memcpy (closedLedgerHash_.begin (),
hello_.ledgerclosed ().data (), 256 / 8);
if ((hello_.has_ledgerprevious ()) &&
(hello_.ledgerprevious ().size () == (256 / 8)))
{
memcpy (previousLedgerHash_.begin (),
hello_.ledgerprevious ().data (), 256 / 8);
addLedger (previousLedgerHash_);
}
else
{
previousLedgerHash_.zero();
}
}
sendGetPeers();
return ec;
}
bDetach = false;
if (result == PeerFinder::Result::full)
{
// TODO Provide correct HTTP response
auto const redirects = peerFinder_.redirect (slot_);
send_endpoints (redirects.begin(), redirects.end());
}
else
{
// TODO Duplicate connection
}
}
if (bDetach)
{
//publicKey_.clear ();
//detach ("recvh");
ec = invalid_argument_error();
}
else
{
sendGetPeers ();
}
// VFALCO Commented this out because we return an error code
// to the caller, who calls detach for us.
//publicKey_.clear ();
//detach ("recvh");
ec = invalid_argument_error();
return ec;
}
@@ -2114,10 +2121,7 @@ PeerImp::detach (const char* rsn, bool graceful)
// to have PeerFinder work reliably.
detaching_ = true; // Race is ok.
if (was_canceled_)
peerFinder_.on_cancel (slot_);
else
peerFinder_.on_closed (slot_);
peerFinder_.on_closed (slot_);
if (state_ == stateActive)
overlay_.onPeerDisconnect (shared_from_this ());

View File

@@ -169,9 +169,6 @@ private:
// The slot assigned to us by PeerFinder
PeerFinder::Slot::ptr slot_;
// True if close was called
bool was_canceled_ = false;
boost::asio::streambuf read_buffer_;
boost::optional <beast::http::message> http_message_;
boost::optional <beast::http::parser> http_parser_;
@@ -211,16 +208,9 @@ public:
void
start ();
/** Indicates that the peer must be activated.
A peer is activated after the handshake is completed and if it is not
a second connection from a peer that we already have. Once activated
the peer transitions to `stateActive` and begins operating.
*/
// Cancel all I/O and close the socket
void
activate ();
/** Close the connection. */
void close (bool graceful);
close();
void
getLedger (protocol::TMGetLedger& packet);
@@ -232,6 +222,13 @@ public:
void
send (Message::pointer const& m) override;
/** Send a set of PeerFinder endpoints as a protocol message. */
template <class FwdIt, class = typename std::enable_if_t<std::is_same<
typename std::iterator_traits<FwdIt>::value_type,
PeerFinder::Endpoint>::value>>
void
send_endpoints (FwdIt first, FwdIt last);
beast::IP::Endpoint
getRemoteAddress() const override;
@@ -246,7 +243,7 @@ public:
getShortId () const override;
RippleAddress const&
getNodePublic () const;
getNodePublic () const override;
Json::Value
json() override;
@@ -512,6 +509,31 @@ private:
//------------------------------------------------------------------------------
template <class FwdIt, class>
void
PeerImp::send_endpoints (FwdIt first, FwdIt last)
{
protocol::TMEndpoints tm;
for (;first != last; ++first)
{
auto const& ep = *first;
protocol::TMEndpoint& tme (*tm.add_endpoints());
if (ep.address.is_v4())
tme.mutable_ipv4()->set_ipv4(
beast::toNetworkByteOrder (ep.address.to_v4().value));
else
tme.mutable_ipv4()->set_ipv4(0);
tme.mutable_ipv4()->set_ipv4port (ep.address.port());
tme.set_hops (ep.hops);
}
tm.set_version (1);
send (std::make_shared <Message> (tm, protocol::mtENDPOINTS));
}
//------------------------------------------------------------------------------
// DEPRECATED
const boost::posix_time::seconds PeerImp::nodeVerifySeconds (15);
//------------------------------------------------------------------------------