Allow server to download cache from another clio server (#246)

* Allow server to download cache from another clio server

* Config takes an array of clio peers. If any of these peers have a
  full cache, clio picks a peer at random to download the cache from.
  Otherwise, fall back to downloading cache from the database.
This commit is contained in:
CJ Cobb
2022-10-04 12:29:29 -04:00
committed by GitHub
parent 4c9c606202
commit 97ef66d130
4 changed files with 250 additions and 11 deletions

View File

@@ -22,6 +22,7 @@
"grpc_port":"50051"
}
],
"peers": [],
"dos_guard":
{
"whitelist":["127.0.0.1"]

View File

@@ -913,6 +913,172 @@ ReportingETL::monitor()
}
}
}
bool
ReportingETL::loadCacheFromClioPeer(
uint32_t ledgerIndex,
std::string const& ip,
std::string const& port,
boost::asio::yield_context& yield)
{
BOOST_LOG_TRIVIAL(info)
<< "Loading cache from peer. ip = " << ip << " . port = " << port;
namespace beast = boost::beast; // from <boost/beast.hpp>
namespace http = beast::http; // from <boost/beast/http.hpp>
namespace websocket = beast::websocket; // from
namespace net = boost::asio; // from
using tcp = boost::asio::ip::tcp; // from
try
{
boost::beast::error_code ec;
// These objects perform our I/O
tcp::resolver resolver{ioContext_};
BOOST_LOG_TRIVIAL(trace) << __func__ << " Creating websocket";
auto ws =
std::make_unique<websocket::stream<beast::tcp_stream>>(ioContext_);
// Look up the domain name
auto const results = resolver.async_resolve(ip, port, yield[ec]);
if (ec)
return {};
BOOST_LOG_TRIVIAL(trace) << __func__ << " Connecting websocket";
// Make the connection on the IP address we get from a lookup
ws->next_layer().async_connect(results, yield[ec]);
if (ec)
return false;
BOOST_LOG_TRIVIAL(trace)
<< __func__ << " Performing websocket handshake";
// Perform the websocket handshake
ws->async_handshake(ip, "/", yield[ec]);
if (ec)
return false;
std::optional<boost::json::value> marker;
BOOST_LOG_TRIVIAL(trace) << __func__ << " Sending request";
auto getRequest = [&](auto marker) {
boost::json::object request = {
{"command", "ledger_data"},
{"ledger_index", ledgerIndex},
{"binary", true},
{"out_of_order", true},
{"limit", 2048}};
if (marker)
request["marker"] = *marker;
return request;
};
do
{
// Send the message
ws->async_write(
net::buffer(boost::json::serialize(getRequest(marker))),
yield[ec]);
if (ec)
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " error writing = " << ec.message();
return false;
}
beast::flat_buffer buffer;
ws->async_read(buffer, yield[ec]);
if (ec)
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " error reading = " << ec.message();
return false;
}
auto raw = beast::buffers_to_string(buffer.data());
auto parsed = boost::json::parse(raw);
if (!parsed.is_object())
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " Error parsing response: " << raw;
return false;
}
BOOST_LOG_TRIVIAL(trace)
<< __func__ << " Successfully parsed response " << parsed;
if (auto const& response = parsed.as_object();
response.contains("error"))
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " Response contains error: " << response;
auto const& err = response.at("error");
if (err.is_string() && err.as_string() == "lgrNotFound")
{
BOOST_LOG_TRIVIAL(warning)
<< __func__
<< " ledger not found. ledger = " << ledgerIndex
<< ". trying again";
continue;
}
return false;
}
auto const& response = parsed.as_object()["result"].as_object();
if (!response.contains("cache_full") ||
!response.at("cache_full").as_bool())
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " cache not full for clio node. ip = " << ip;
return false;
}
if (response.contains("marker"))
marker = response.at("marker");
else
marker = {};
auto const& state = response.at("state").as_array();
std::vector<Backend::LedgerObject> objects;
objects.reserve(state.size());
for (auto const& ledgerObject : state)
{
auto const& obj = ledgerObject.as_object();
Backend::LedgerObject stateObject = {};
if (!stateObject.key.parseHex(
obj.at("index").as_string().c_str()))
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " failed to parse object id";
return false;
}
boost::algorithm::unhex(
obj.at("data").as_string().c_str(),
std::back_inserter(stateObject.blob));
objects.push_back(std::move(stateObject));
}
backend_->cache().update(objects, ledgerIndex, true);
if (marker)
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " - At marker " << *marker;
} while (marker);
BOOST_LOG_TRIVIAL(info)
<< __func__
<< " Finished downloading ledger from clio node. ip = " << ip;
backend_->cache().setFull();
return true;
}
catch (std::exception const& e)
{
BOOST_LOG_TRIVIAL(error)
<< __func__ << " Encountered exception : " << e.what()
<< " - ip = " << ip;
return false;
}
}
void
ReportingETL::loadCache(uint32_t seq)
@@ -936,6 +1102,51 @@ ReportingETL::loadCache(uint32_t seq)
assert(false);
return;
}
if (clioPeers.size() > 0)
{
boost::asio::spawn(
ioContext_, [this, seq](boost::asio::yield_context yield) {
for (auto const& peer : clioPeers)
{
// returns true on success
if (loadCacheFromClioPeer(
seq, peer.ip, std::to_string(peer.port), yield))
return;
}
// if we couldn't successfully load from any peers, load from db
loadCacheFromDb(seq);
});
return;
}
else
{
loadCacheFromDb(seq);
}
// If loading synchronously, poll cache until full
while (cacheLoadStyle_ == CacheLoadStyle::SYNC &&
!backend_->cache().isFull())
{
BOOST_LOG_TRIVIAL(debug)
<< "Cache not full. Cache size = " << backend_->cache().size()
<< ". Sleeping ...";
std::this_thread::sleep_for(std::chrono::seconds(10));
BOOST_LOG_TRIVIAL(info)
<< "Cache is full. Cache size = " << backend_->cache().size();
}
}
void
ReportingETL::loadCacheFromDb(uint32_t seq)
{
// sanity check to make sure we are not calling this multiple times
static std::atomic_bool loading = false;
if (loading)
{
assert(false);
return;
}
loading = true;
std::vector<Backend::LedgerObject> diff;
auto append = [](auto&& a, auto&& b) {
a.insert(std::end(a), std::begin(b), std::end(b));
@@ -1044,17 +1255,6 @@ ReportingETL::loadCache(uint32_t seq)
});
}
}};
// If loading synchronously, poll cache until full
while (cacheLoadStyle_ == CacheLoadStyle::SYNC &&
!backend_->cache().isFull())
{
BOOST_LOG_TRIVIAL(debug)
<< "Cache not full. Cache size = " << backend_->cache().size()
<< ". Sleeping ...";
std::this_thread::sleep_for(std::chrono::seconds(10));
BOOST_LOG_TRIVIAL(info)
<< "Cache is full. Cache size = " << backend_->cache().size();
}
}
void
@@ -1160,5 +1360,23 @@ ReportingETL::ReportingETL(
if (cache.contains("page_fetch_size") &&
cache.at("page_fetch_size").is_int64())
cachePageFetchSize_ = cache.at("page_fetch_size").as_int64();
if (cache.contains("peers") && cache.at("peers").is_array())
{
auto const& peers = cache.at("peers").as_array();
for (auto const& peer : peers)
{
auto const& clio = peer.as_object();
auto ip = clio.at("ip").as_string().c_str();
auto port = clio.at("port").as_int64();
clioPeers.emplace_back(ip, port);
}
unsigned seed =
std::chrono::system_clock::now().time_since_epoch().count();
std::shuffle(
clioPeers.begin(),
clioPeers.end(),
std::default_random_engine(seed));
}
}
}

View File

@@ -77,6 +77,14 @@ private:
// thread responsible for syncing the cache on startup
std::thread cacheDownloader_;
struct ClioPeer
{
std::string ip;
int port;
};
std::vector<ClioPeer> clioPeers;
std::thread worker_;
boost::asio::io_context& ioContext_;
@@ -177,6 +185,16 @@ private:
void
loadCache(uint32_t seq);
void
loadCacheFromDb(uint32_t seq);
bool
loadCacheFromClioPeer(
uint32_t ledgerSequence,
std::string const& ip,
std::string const& port,
boost::asio::yield_context& yield);
/// Run ETL. Extracts ledgers and writes them to the database, until a
/// write conflict occurs (or the server shuts down).
/// @note database must already be populated when this function is

View File

@@ -184,6 +184,8 @@ doLedgerData(Context const& context)
objects.push_back(toJson(sle));
}
response[JS(state)] = std::move(objects);
if (outOfOrder)
response["cache_full"] = context.backend->cache().isFull();
auto end2 = std::chrono::system_clock::now();
time = std::chrono::duration_cast<std::chrono::microseconds>(end2 - end)