Add cassandra

This commit is contained in:
CJ Cobb
2020-12-15 15:48:00 -05:00
parent 1d737014e9
commit 4e801db9a3
6 changed files with 1452 additions and 162 deletions

View File

@@ -7,6 +7,7 @@
# Official repository: https://github.com/boostorg/beast
#
set(CMAKE_VERBOSE_MAKEFILE TRUE)
project(reporting)
cmake_minimum_required(VERSION 3.17)
set (CMAKE_CXX_STANDARD 17)
@@ -15,11 +16,16 @@ set(Boost_USE_STATIC_LIBS ON)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME ON)
FIND_PACKAGE( Boost 1.75 COMPONENTS log log_setup thread system REQUIRED )
FIND_PACKAGE( Boost 1.75 COMPONENTS filesystem log log_setup thread system REQUIRED )
add_executable (reporting
websocket_server_async.cpp
)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/deps")
include(ExternalProject)
message(${CMAKE_CURRENT_BINARY_DIR})
message(${CMAKE_MODULE_PATH})
include(cassandra)
@@ -83,7 +89,7 @@ target_include_directories (grpc_pbufs SYSTEM PUBLIC ${GRPC_GEN_DIR})
target_link_libraries (grpc_pbufs ${_REFLECTION} ${_PROTOBUF_LIBPROTOBUF} ${_GRPC_GRPCPP})
target_sources(reporting PRIVATE reporting/ETLSource.cpp)
target_sources(reporting PRIVATE reporting/ETLSource.cpp reporting/ReportingBackend.cpp)
message(${Boost_LIBRARIES})

View File

@@ -25,47 +25,50 @@
#include <boost/log/trivial.hpp>
#include <reporting/ETLSource.h>
namespace ripple {
// Create ETL source without grpc endpoint
// Fetch ledger and load initial ledger will fail for this source
// Primarly used in read-only mode, to monitor when ledgers are validated
ETLSource::ETLSource(std::string ip, std::string wsPort)
: ip_(ip)
, wsPort_(wsPort)
, ws_(std::make_unique<
ETLSource::ETLSource(
boost::json::object const& config,
CassandraFlatMapBackend& backend)
: ws_(std::make_unique<
boost::beast::websocket::stream<boost::beast::tcp_stream>>(
boost::asio::make_strand(ioc_)))
, resolver_(boost::asio::make_strand(ioc_))
, timer_(ioc_)
, backend_(backend)
{
}
ETLSource::ETLSource(std::string ip, std::string wsPort, std::string grpcPort)
: ip_(ip)
, wsPort_(wsPort)
, grpcPort_(grpcPort)
, ws_(std::make_unique<
boost::beast::websocket::stream<boost::beast::tcp_stream>>(
boost::asio::make_strand(ioc_)))
, resolver_(boost::asio::make_strand(ioc_))
, timer_(ioc_)
{
try
if (config.contains("ip"))
{
boost::asio::ip::tcp::endpoint endpoint{
boost::asio::ip::make_address(ip_), std::stoi(grpcPort_)};
std::stringstream ss;
ss << endpoint;
stub_ = org::xrpl::rpc::v1::XRPLedgerAPIService::NewStub(
grpc::CreateChannel(ss.str(), grpc::InsecureChannelCredentials()));
BOOST_LOG_TRIVIAL(debug) << "Made stub for remote = " << toString();
auto ipJs = config.at("ip").as_string();
ip_ = {ipJs.c_str(), ipJs.size()};
}
catch (std::exception const& e)
if (config.contains("ws_port"))
{
BOOST_LOG_TRIVIAL(debug)
<< "Exception while creating stub = " << e.what()
<< " . Remote = " << toString();
auto portjs = config.at("ws_port").as_string();
wsPort_ = {portjs.c_str(), portjs.size()};
}
if (config.contains("grpc_port"))
{
auto portjs = config.at("grpc_port").as_string();
grpcPort_ = {portjs.c_str(), portjs.size()};
try
{
boost::asio::ip::tcp::endpoint endpoint{
boost::asio::ip::make_address(ip_), std::stoi(grpcPort_)};
std::stringstream ss;
ss << endpoint;
stub_ = org::xrpl::rpc::v1::XRPLedgerAPIService::NewStub(
grpc::CreateChannel(
ss.str(), grpc::InsecureChannelCredentials()));
BOOST_LOG_TRIVIAL(debug) << "Made stub for remote = " << toString();
}
catch (std::exception const& e)
{
BOOST_LOG_TRIVIAL(debug)
<< "Exception while creating stub = " << e.what()
<< " . Remote = " << toString();
}
}
}
@@ -388,6 +391,7 @@ public:
process(
std::unique_ptr<org::xrpl::rpc::v1::XRPLedgerAPIService::Stub>& stub,
grpc::CompletionQueue& cq,
CassandraFlatMapBackend& backend,
bool abort = false)
{
std::cout << "Processing calldata" << std::endl;
@@ -429,12 +433,10 @@ public:
for (auto& obj : *(cur_->mutable_ledger_objects()->mutable_objects()))
{
/*
flatMapBackend.store(
backend.store(
std::move(*obj.mutable_key()),
request_.ledger().sequence(),
std::move(*obj.mutable_data()));
*/
}
return more ? CallStatus::MORE : CallStatus::DONE;
@@ -505,7 +507,7 @@ ETLSource::loadInitialLedger(uint32_t sequence)
{
BOOST_LOG_TRIVIAL(debug)
<< "Marker prefix = " << ptr->getMarkerPrefix();
auto result = ptr->process(stub_, cq, abort);
auto result = ptr->process(stub_, cq, backend_, abort);
if (result != AsyncCallData::CallStatus::MORE)
{
numFinished++;
@@ -550,34 +552,18 @@ ETLSource::fetchLedger(uint32_t ledgerSequence, bool getObjects)
}
return {status, std::move(response)};
}
/*
ETLLoadBalancer::ETLLoadBalancer(ReportingETL& etl)
: etl_(etl)
, journal_(etl_.getApplication().journal("ReportingETL::LoadBalancer"))
ETLLoadBalancer::ETLLoadBalancer(
boost::json::array const& config,
CassandraFlatMapBackend& backend)
{
}
void
ETLLoadBalancer::add(
std::string& host,
std::string& websocketPort,
std::string& grpcPort)
{
std::unique_ptr<ETLSource> ptr =
std::make_unique<ETLSource>(host, websocketPort, grpcPort, etl_);
sources_.push_back(std::move(ptr));
BOOST_LOG_TRIVIAL(info) << __func__ << " : added etl source - "
<< sources_.back()->toString();
}
void
ETLLoadBalancer::add(std::string& host, std::string& websocketPort)
{
std::unique_ptr<ETLSource> ptr =
std::make_unique<ETLSource>(host, websocketPort, etl_);
sources_.push_back(std::move(ptr));
BOOST_LOG_TRIVIAL(info) << __func__ << " : added etl source - "
<< sources_.back()->toString();
for (auto& entry : config)
{
std::unique_ptr<ETLSource> source =
std::make_unique<ETLSource>(entry.as_object(), backend);
sources_.push_back(std::move(source));
BOOST_LOG_TRIVIAL(info) << __func__ << " : added etl source - "
<< sources_.back()->toString();
}
}
void
@@ -588,12 +574,9 @@ ETLLoadBalancer::loadInitialLedger(uint32_t sequence)
bool res = source->loadInitialLedger(sequence);
if (!res)
{
BOOST_LOG_TRIVIAL(error) << "Failed to download initial
ledger.
"
<< " Sequence = " << sequence
<< " source = " <<
source->toString();
BOOST_LOG_TRIVIAL(error) << "Failed to download initial ledger."
<< " Sequence = " << sequence
<< " source = " << source->toString();
}
return res;
},
@@ -634,6 +617,7 @@ ETLLoadBalancer::fetchLedger(uint32_t ledgerSequence, bool getObjects)
return {};
}
/*
std::unique_ptr<org::xrpl::rpc::v1::XRPLedgerAPIService::Stub>
ETLLoadBalancer::getP2pForwardingStub() const
{
@@ -691,8 +675,9 @@ ETLSource::getP2pForwardingStub() const
return org::xrpl::rpc::v1::XRPLedgerAPIService::NewStub(
grpc::CreateChannel(
beast::IP::Endpoint(
boost::asio::ip::make_address(ip_),
std::stoi(grpcPort_)) .to_string(), grpc::InsecureChannelCredentials()));
boost::asio::ip::make_address(ip_), std::stoi(grpcPort_))
.to_string(),
grpc::InsecureChannelCredentials()));
}
catch (std::exception const&)
{
@@ -705,8 +690,7 @@ Json::Value
ETLSource::forwardToP2p(RPC::JsonContext& context) const
{
BOOST_LOG_TRIVIAL(debug) << "Attempting to forward request to tx. "
<< "request = " <<
context.params.toStyledString();
<< "request = " << context.params.toStyledString();
Json::Value response;
if (!connected_)
@@ -718,9 +702,10 @@ context.params.toStyledString();
namespace beast = boost::beast; // from <boost/beast.hpp>
namespace http = beast::http; // from <boost/beast/http.hpp>
namespace websocket = beast::websocket; // from
<boost/beast/websocket.hpp> namespace net = boost::asio; // from
<boost/asio.hpp> using tcp = boost::asio::ip::tcp; // from
<boost/asio/ip/tcp.hpp> Json::Value& request = context.params; try
<boost / beast / websocket.hpp> namespace net = boost::asio; // from
<boost / asio.hpp> using tcp = boost::asio::ip::tcp; // from
<boost / asio / ip / tcp.hpp> Json::Value& request = context.params;
try
{
// The io_context is required for all I/O
net::io_context ioc;
@@ -742,7 +727,7 @@ context.params.toStyledString();
// and to tell rippled to charge the client IP for RPC
// resources. See "secure_gateway" in
//
https://github.com/ripple/rippled/blob/develop/cfg/rippled-example.cfg
https: // github.com/ripple/rippled/blob/develop/cfg/rippled-example.cfg
ws->set_option(websocket::stream_base::decorator(
[&context](websocket::request_type& req) {
req.set(
@@ -753,8 +738,8 @@ https://github.com/ripple/rippled/blob/develop/cfg/rippled-example.cfg
http::field::forwarded,
"for=" + context.consumer.to_string());
}));
BOOST_LOG_TRIVIAL(debug) << "client ip: " <<
context.consumer.to_string();
BOOST_LOG_TRIVIAL(debug)
<< "client ip: " << context.consumer.to_string();
BOOST_LOG_TRIVIAL(debug) << "Performing websocket handshake";
// Perform the websocket handshake
@@ -787,7 +772,7 @@ context.consumer.to_string();
return response;
}
}
*/
template <class Func>
bool
ETLLoadBalancer::execute(Func f, uint32_t ledgerSequence)
@@ -796,7 +781,7 @@ ETLLoadBalancer::execute(Func f, uint32_t ledgerSequence)
auto sourceIdx = rand() % sources_.size();
auto numAttempts = 0;
while (!etl_.isStopping())
while (true)
{
auto& source = sources_[sourceIdx];
@@ -836,13 +821,9 @@ ETLLoadBalancer::execute(Func f, uint32_t ledgerSequence)
numAttempts++;
if (numAttempts % sources_.size() == 0)
{
// If another process loaded the ledger into the database, we
can
// abort trying to fetch the ledger from a transaction
processing
// process
if (etl_.getApplication().getLedgerMaster().getLedgerBySeq(
ledgerSequence))
/*
if (etl_.getApplication().getLedgerMaster().getLedgerBySeq(
ledgerSequence))
{
BOOST_LOG_TRIVIAL(warning)
<< __func__ << " : "
@@ -851,6 +832,7 @@ processing
<< " Sequence = " << ledgerSequence;
break;
}
*/
BOOST_LOG_TRIVIAL(error)
<< __func__ << " : "
<< "Error executing function "
@@ -859,7 +841,7 @@ processing
std::this_thread::sleep_for(std::chrono::seconds(2));
}
}
return !etl_.isStopping();
return false;
}
void
@@ -875,5 +857,3 @@ ETLLoadBalancer::stop()
for (auto& source : sources_)
source->stop();
}
*/
} // namespace ripple

View File

@@ -25,14 +25,11 @@
#include <boost/beast/core.hpp>
#include <boost/beast/core/string.hpp>
#include <boost/beast/websocket.hpp>
#include <reporting/ReportingBackend.h>
#include "org/xrpl/rpc/v1/xrp_ledger.grpc.pb.h"
#include <grpcpp/grpcpp.h>
namespace ripple {
class ReportingETL;
/// This class manages a connection to a single ETL source. This is almost
/// always a p2p node, but really could be another reporting node. This class
/// subscribes to the ledgers and transactions_proposed streams of the
@@ -88,6 +85,8 @@ class ETLSource
// used for retrying connections
boost::asio::steady_timer timer_;
CassandraFlatMapBackend& backend_;
public:
bool
isConnected() const
@@ -112,10 +111,9 @@ public:
/// Create ETL source without gRPC endpoint
/// Fetch ledger and load initial ledger will fail for this source
/// Primarly used in read-only mode, to monitor when ledgers are validated
ETLSource(std::string ip, std::string wsPort);
/// Create ETL source with gRPC endpoint
ETLSource(std::string ip, std::string wsPort, std::string grpcPort);
ETLSource(
boost::json::object const& config,
CassandraFlatMapBackend& backend);
/// @param sequence ledger sequence to check for
/// @return true if this source has the desired ledger
@@ -269,8 +267,6 @@ public:
getP2pForwardingStub() const;
*/
};
/*
*
/// This class is used to manage connections to transaction processing processes
/// This class spawns a listener for each etl source, which listens to messages
/// on the ledgers stream (to keep track of which ledgers have been validated by
@@ -280,29 +276,14 @@ public:
class ETLLoadBalancer
{
private:
ReportingETL& etl_;
beast::Journal journal_;
// ReportingETL& etl_;
std::vector<std::unique_ptr<ETLSource>> sources_;
public:
ETLLoadBalancer(ReportingETL& etl);
/// Add an ETL source
/// @param host host or ip of ETL source
/// @param websocketPort port where ETL source accepts websocket connections
/// @param grpcPort port where ETL source accepts gRPC requests
void
add(std::string& host, std::string& websocketPort, std::string& grpcPort);
/// Add an ETL source without gRPC support. This source will send messages
/// on the ledgers and transactions_proposed streams, but will not be able
/// to handle the gRPC requests that are used for ETL
/// @param host host or ip of ETL source
/// @param websocketPort port where ETL source accepts websocket connections
void
add(std::string& host, std::string& websocketPort);
ETLLoadBalancer(
boost::json::array const& config,
CassandraFlatMapBackend& backend);
/// Load the initial ledger, writing data to the queue
/// @param sequence sequence of ledger to download
@@ -336,47 +317,47 @@ public:
/// to clients).
/// @param in ETLSource in question
/// @return true if messages should be forwarded
bool
shouldPropagateTxnStream(ETLSource* in) const
{
for (auto& src : sources_)
{
assert(src);
// We pick the first ETLSource encountered that is connected
if (src->isConnected())
{
if (src.get() == in)
return true;
else
return false;
}
}
// bool
// shouldPropagateTxnStream(ETLSource* in) const
// {
// for (auto& src : sources_)
// {
// assert(src);
// // We pick the first ETLSource encountered that is connected
// if (src->isConnected())
// {
// if (src.get() == in)
// return true;
// else
// return false;
// }
// }
//
// // If no sources connected, then this stream has not been
// forwarded. return true;
// }
// If no sources connected, then this stream has not been forwarded.
return true;
}
Json::Value
toJson() const
{
Json::Value ret(Json::arrayValue);
for (auto& src : sources_)
{
ret.append(src->toJson());
}
return ret;
}
/// Randomly select a p2p node to forward a gRPC request to
/// @return gRPC stub to forward requests to p2p node
std::unique_ptr<org::xrpl::rpc::v1::XRPLedgerAPIService::Stub>
getP2pForwardingStub() const;
/// Forward a JSON RPC request to a randomly selected p2p node
/// @param context context of the request
/// @return response received from p2p node
Json::Value
forwardToP2p(RPC::JsonContext& context) const;
// Json::Value
// toJson() const
// {
// Json::Value ret(Json::arrayValue);
// for (auto& src : sources_)
// {
// ret.append(src->toJson());
// }
// return ret;
// }
//
// /// Randomly select a p2p node to forward a gRPC request to
// /// @return gRPC stub to forward requests to p2p node
// std::unique_ptr<org::xrpl::rpc::v1::XRPLedgerAPIService::Stub>
// getP2pForwardingStub() const;
//
// /// Forward a JSON RPC request to a randomly selected p2p node
// /// @param context context of the request
// /// @return response received from p2p node
// Json::Value
// forwardToP2p(RPC::JsonContext& context) const;
private:
/// f is a function that takes an ETLSource as an argument and returns a
@@ -393,6 +374,4 @@ private:
bool
execute(Func f, uint32_t ledgerSequence);
};
*/
} // namespace ripple
#endif

View File

@@ -0,0 +1,147 @@
#include <reporting/ReportingBackend.h>
// Process the result of an asynchronous write. Retry on error
// @param fut cassandra future associated with the write
// @param cbData struct that holds the request parameters
void
flatMapWriteCallback(CassFuture* fut, void* cbData)
{
CassandraFlatMapBackend::WriteCallbackData& requestParams =
*static_cast<CassandraFlatMapBackend::WriteCallbackData*>(cbData);
CassandraFlatMapBackend& backend = *requestParams.backend;
auto rc = cass_future_error_code(fut);
if (rc != CASS_OK)
{
BOOST_LOG_TRIVIAL(error)
<< "ERROR!!! Cassandra insert error: " << rc << ", "
<< cass_error_desc(rc) << ", retrying ";
// exponential backoff with a max wait of 2^10 ms (about 1 second)
auto wait = std::chrono::milliseconds(
lround(std::pow(2, std::min(10u, requestParams.currentRetries))));
++requestParams.currentRetries;
std::shared_ptr<boost::asio::steady_timer> timer =
std::make_shared<boost::asio::steady_timer>(
backend.ioContext_, std::chrono::steady_clock::now() + wait);
timer->async_wait([timer, &requestParams, &backend](
const boost::system::error_code& error) {
backend.write(requestParams, true);
});
}
else
{
--(backend.numRequestsOutstanding_);
backend.throttleCv_.notify_all();
if (backend.numRequestsOutstanding_ == 0)
backend.syncCv_.notify_all();
delete &requestParams;
}
}
void
flatMapWriteTransactionCallback(CassFuture* fut, void* cbData)
{
CassandraFlatMapBackend::WriteTransactionCallbackData& requestParams =
*static_cast<CassandraFlatMapBackend::WriteTransactionCallbackData*>(
cbData);
CassandraFlatMapBackend& backend = *requestParams.backend;
auto rc = cass_future_error_code(fut);
if (rc != CASS_OK)
{
BOOST_LOG_TRIVIAL(error)
<< "ERROR!!! Cassandra insert error: " << rc << ", "
<< cass_error_desc(rc) << ", retrying ";
// exponential backoff with a max wait of 2^10 ms (about 1 second)
auto wait = std::chrono::milliseconds(
lround(std::pow(2, std::min(10u, requestParams.currentRetries))));
++requestParams.currentRetries;
std::shared_ptr<boost::asio::steady_timer> timer =
std::make_shared<boost::asio::steady_timer>(
backend.ioContext_, std::chrono::steady_clock::now() + wait);
timer->async_wait([timer, &requestParams, &backend](
const boost::system::error_code& error) {
backend.writeTransaction(requestParams, true);
});
}
else
{
--(backend.numRequestsOutstanding_);
backend.throttleCv_.notify_all();
if (backend.numRequestsOutstanding_ == 0)
backend.syncCv_.notify_all();
delete &requestParams;
}
}
// Process the result of an asynchronous read. Retry on error
// @param fut cassandra future associated with the read
// @param cbData struct that holds the request parameters
void
flatMapReadCallback(CassFuture* fut, void* cbData)
{
CassandraFlatMapBackend::ReadCallbackData& requestParams =
*static_cast<CassandraFlatMapBackend::ReadCallbackData*>(cbData);
CassError rc = cass_future_error_code(fut);
if (rc != CASS_OK)
{
BOOST_LOG_TRIVIAL(warning) << "Cassandra fetch error : " << rc << " : "
<< cass_error_desc(rc) << " - retrying";
// Retry right away. The only time the cluster should ever be overloaded
// is when the very first ledger is being written in full (millions of
// writes at once), during which no reads should be occurring. If reads
// are timing out, the code/architecture should be modified to handle
// greater read load, as opposed to just exponential backoff
requestParams.backend.read(requestParams);
}
else
{
auto finish = [&requestParams]() {
size_t batchSize = requestParams.batchSize;
if (++(requestParams.numFinished) == batchSize)
requestParams.cv.notify_all();
};
CassResult const* res = cass_future_get_result(fut);
CassRow const* row = cass_result_first_row(res);
if (!row)
{
cass_result_free(res);
BOOST_LOG_TRIVIAL(error) << "Cassandra fetch get row error : " << rc
<< ", " << cass_error_desc(rc);
finish();
return;
}
cass_byte_t const* buf;
std::size_t bufSize;
rc = cass_value_get_bytes(cass_row_get_column(row, 0), &buf, &bufSize);
if (rc != CASS_OK)
{
cass_result_free(res);
BOOST_LOG_TRIVIAL(error)
<< "Cassandra fetch get bytes error : " << rc << ", "
<< cass_error_desc(rc);
finish();
return;
}
std::vector<unsigned char> txn{buf, buf + bufSize};
cass_byte_t const* buf2;
std::size_t buf2Size;
rc =
cass_value_get_bytes(cass_row_get_column(row, 1), &buf2, &buf2Size);
if (rc != CASS_OK)
{
cass_result_free(res);
BOOST_LOG_TRIVIAL(error)
<< "Cassandra fetch get bytes error : " << rc << ", "
<< cass_error_desc(rc);
finish();
return;
}
std::vector<unsigned char> meta{buf2, buf2 + buf2Size};
requestParams.result = std::make_pair(std::move(txn), std::move(meta));
cass_result_free(res);
finish();
}
}

1156
reporting/ReportingBackend.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -24,6 +24,8 @@
#include <functional>
#include <iostream>
#include <memory>
#include <reporting/ETLSource.h>
#include <reporting/ReportingBackend.h>
#include <sstream>
#include <string>
#include <thread>
@@ -267,6 +269,26 @@ main(int argc, char* argv[])
auto const port = static_cast<unsigned short>(std::atoi(argv[2]));
auto const threads = std::max<int>(1, std::atoi(argv[3]));
auto const config = parse_config(argv[4]);
if (!config)
{
std::cerr << "couldnt parse config. Exiting..." << std::endl;
return EXIT_FAILURE;
}
auto cassConfig =
(*config).at("database").as_object().at("cassandra").as_object();
std::cout << cassConfig << std::endl;
CassandraFlatMapBackend backend{cassConfig};
backend.open();
boost::json::array sources = (*config).at("etl_sources").as_array();
if (!sources.size())
{
std::cerr << "no etl sources listed in config. exiting..." << std::endl;
return EXIT_FAILURE;
}
ETLSource source{sources[0].as_object(), backend};
source.start();
// source.loadInitialLedger(60000000);
// The io_context is required for all I/O
net::io_context ioc{threads};