index during ETL. not tested

This commit is contained in:
CJ Cobb
2021-04-08 20:07:05 +00:00
parent 5f9e5d03f4
commit d9a8ff5399
8 changed files with 289 additions and 54 deletions

View File

@@ -58,6 +58,7 @@ target_sources(reporting PRIVATE
reporting/ETLSource.cpp
reporting/CassandraBackend.cpp
reporting/PostgresBackend.cpp
reporting/BackendIndexer.cpp
reporting/Pg.cpp
reporting/DBHelpers.cpp
reporting/ReportingETL.cpp

View File

@@ -0,0 +1,76 @@
#include <reporting/BackendInterface.h>
namespace Backend {
BackendIndexer::BackendIndexer(boost::json::object const& config)
: keyShift_(config.at("keyshift").as_int64())
, bookShift_(config.at("bookshift").as_int64())
{
work_.emplace(ioc_);
ioThread_ = std::thread{[this]() { ioc_.run(); }};
};
BackendIndexer::~BackendIndexer()
{
std::unique_lock lck(mutex_);
work_.reset();
ioThread_.join();
}
void
BackendIndexer::addKey(ripple::uint256 const& key)
{
keys.insert(key);
}
void
BackendIndexer::deleteKey(ripple::uint256 const& key)
{
keys.erase(key);
}
void
BackendIndexer::addBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey)
{
booksToOffers[book].insert(offerKey);
}
void
BackendIndexer::deleteBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey)
{
booksToOffers[book].erase(offerKey);
booksToDeletedOffers[book].insert(offerKey);
}
void
BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend)
{
if (ledgerSequence >> keyShift_ << keyShift_ == ledgerSequence)
{
std::unordered_set<ripple::uint256> keysCopy = keys;
boost::asio::post(ioc_, [=, &backend]() {
BOOST_LOG_TRIVIAL(info) << "Indexer - writing keys. Ledger = "
<< std::to_string(ledgerSequence);
backend.writeKeys(keysCopy, ledgerSequence);
BOOST_LOG_TRIVIAL(info) << "Indexer - wrote keys. Ledger = "
<< std::to_string(ledgerSequence);
});
}
if (ledgerSequence >> bookShift_ << bookShift_ == ledgerSequence)
{
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToOffersCopy = booksToOffers;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToDeletedOffersCopy = booksToDeletedOffers;
boost::asio::post(ioc_, [=, &backend]() {
BOOST_LOG_TRIVIAL(info) << "Indexer - writing books. Ledger = "
<< std::to_string(ledgerSequence);
backend.writeBooks(booksToOffersCopy, ledgerSequence);
backend.writeBooks(booksToDeletedOffersCopy, ledgerSequence);
BOOST_LOG_TRIVIAL(info) << "Indexer - wrote books. Ledger = "
<< std::to_string(ledgerSequence);
});
booksToDeletedOffers = {};
}
}
} // namespace Backend

View File

@@ -1,7 +1,19 @@
#ifndef RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED
#define RIPPLE_APP_REPORTING_BACKENDINTERFACE_H_INCLUDED
#include <ripple/ledger/ReadView.h>
#include <boost/asio.hpp>
#include <reporting/DBHelpers.h>
namespace std {
template <>
struct hash<ripple::uint256>
{
std::size_t
operator()(const ripple::uint256& k) const noexcept
{
return boost::hash_range(k.begin(), k.end());
}
};
} // namespace std
namespace Backend {
using Blob = std::vector<unsigned char>;
struct LedgerObject
@@ -42,11 +54,51 @@ class DatabaseTimeout : public std::exception
return "Database read timed out. Please retry the request";
}
};
class BackendInterface;
class BackendIndexer
{
boost::asio::io_context ioc_;
std::mutex mutex_;
std::optional<boost::asio::io_context::work> work_;
std::thread ioThread_;
uint32_t keyShift_ = 16;
uint32_t bookShift_ = 16;
std::unordered_set<ripple::uint256> keys;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToOffers;
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>
booksToDeletedOffers;
public:
BackendIndexer(boost::json::object const& config);
~BackendIndexer();
void
addKey(ripple::uint256 const& key);
void
deleteKey(ripple::uint256 const& key);
void
addBookOffer(ripple::uint256 const& book, ripple::uint256 const& offerKey);
void
deleteBookOffer(
ripple::uint256 const& book,
ripple::uint256 const& offerKey);
void
finish(uint32_t ledgerSequence, BackendInterface const& backend);
};
class BackendInterface
{
private:
mutable BackendIndexer indexer_;
public:
// read methods
BackendInterface(boost::json::object const& config) : indexer_(config)
{
}
virtual std::optional<uint32_t>
fetchLatestLedgerSequence() const = 0;
@@ -107,8 +159,37 @@ public:
std::string&& ledgerHeader,
bool isFirst = false) const = 0;
virtual void
void
writeLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,
bool isCreated,
bool isDeleted,
std::optional<ripple::uint256>&& book) const
{
ripple::uint256 key256 = ripple::uint256::fromVoid(key.data());
if (isCreated)
indexer_.addKey(key256);
if (isDeleted)
indexer_.deleteKey(key256);
if (book)
{
if (isCreated)
indexer_.addBookOffer(*book, key256);
if (isDeleted)
indexer_.deleteBookOffer(*book, key256);
}
doWriteLedgerObject(
std::move(key),
seq,
std::move(blob),
isCreated,
isDeleted,
std::move(book));
}
virtual void
doWriteLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,
@@ -141,11 +222,27 @@ public:
virtual void
startWrites() const = 0;
bool
finishWrites(uint32_t ledgerSequence) const
{
indexer_.finish(ledgerSequence, *this);
return doFinishWrites();
}
virtual bool
finishWrites() const = 0;
doFinishWrites() const = 0;
virtual bool
doOnlineDelete(uint32_t minLedgerToKeep) const = 0;
virtual bool
writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const = 0;
virtual bool
writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const = 0;
virtual ~BackendInterface()
{

View File

@@ -2,6 +2,7 @@
#include <reporting/CassandraBackend.h>
#include <reporting/DBHelpers.h>
#include <unordered_map>
/*
namespace std {
template <>
struct hash<ripple::uint256>
@@ -13,6 +14,7 @@ struct hash<ripple::uint256>
}
};
} // namespace std
*/
namespace Backend {
template <class T, class F>
void
@@ -842,7 +844,7 @@ writeKeyCallback(CassFuture* fut, void* cbData)
bool
CassandraBackend::writeKeys(
std::unordered_set<ripple::uint256>& keys,
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const
{
BOOST_LOG_TRIVIAL(info)
@@ -889,26 +891,27 @@ CassandraBackend::writeKeys(
bool
CassandraBackend::writeBooks(
std::unordered_map<ripple::uint256, std::unordered_set<ripple::uint256>>&
books,
uint32_t ledgerSequence,
uint32_t numOffers) const
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const
{
BOOST_LOG_TRIVIAL(info)
<< __func__ << " Ledger = " << std::to_string(ledgerSequence)
<< " . num books = " << std::to_string(books.size())
<< " . num offers = " << std::to_string(numOffers);
std::atomic_uint32_t numRemaining = numOffers;
<< " . num books = " << std::to_string(books.size());
std::condition_variable cv;
std::mutex mtx;
std::vector<std::shared_ptr<WriteBookCallbackData>> cbs;
uint32_t concurrentLimit = maxRequestsOutstanding / 2;
uint32_t numSubmitted = 0;
std::atomic_uint32_t numOutstanding = 0;
size_t count = 0;
auto start = std::chrono::system_clock::now();
for (auto& book : books)
{
for (auto& offer : book.second)
{
++numOutstanding;
++count;
cbs.push_back(std::make_shared<WriteBookCallbackData>(
*this,
book.first,
@@ -916,40 +919,25 @@ CassandraBackend::writeBooks(
ledgerSequence,
cv,
mtx,
numRemaining));
numOutstanding));
writeBook2(*cbs.back());
++numSubmitted;
BOOST_LOG_TRIVIAL(trace) << __func__ << "Submitted a write request";
std::unique_lock<std::mutex> lck(mtx);
BOOST_LOG_TRIVIAL(trace) << __func__ << "Got the mutex";
cv.wait(
lck,
[&numRemaining, numSubmitted, concurrentLimit, numOffers]() {
BOOST_LOG_TRIVIAL(trace)
<< std::to_string(numSubmitted) << " "
<< std::to_string(numRemaining) << " "
<< std::to_string(numOffers) << " "
<< std::to_string(concurrentLimit);
return (numSubmitted - (numOffers - numRemaining)) <
concurrentLimit;
});
if (numSubmitted % 1000 == 0)
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " Submitted " << std::to_string(numSubmitted)
<< " write requests. Completed "
<< (numOffers - numRemaining);
cv.wait(lck, [&numOutstanding, concurrentLimit]() {
return numOutstanding < concurrentLimit;
});
}
}
BOOST_LOG_TRIVIAL(info) << __func__
<< "Submitted all book writes. Waiting for them to "
"finish. num submitted = "
<< std::to_string(numSubmitted);
<< std::to_string(count);
std::unique_lock<std::mutex> lck(mtx);
cv.wait(lck, [&numRemaining]() { return numRemaining == 0; });
cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; });
BOOST_LOG_TRIVIAL(info) << __func__ << "Finished writing books";
return true;
}
bool
CassandraBackend::isIndexed(uint32_t ledgerSequence) const
{
@@ -986,6 +974,8 @@ CassandraBackend::getNextToIndex() const
bool
CassandraBackend::runIndexer(uint32_t ledgerSequence) const
{
return false;
/*
auto start = std::chrono::system_clock::now();
constexpr uint32_t limit = 2048;
std::unordered_set<ripple::uint256> keys;
@@ -1091,7 +1081,6 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
size_t numOffersDeleted = 0;
// Get the diff and update keys
std::vector<LedgerObject> objs;
std::unordered_set<ripple::uint256> deleted;
std::vector<uint32_t> sequences(256, 0);
std::iota(sequences.begin(), sequences.end(), i + 1);
@@ -1104,7 +1093,6 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
if (obj.blob.size() == 0)
{
keys.erase(obj.key);
deleted.insert(obj.key);
if (offers.count(obj.key) > 0)
{
auto book = offers[obj.key];
@@ -1115,8 +1103,8 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
}
else
{
// insert other keys. keys is a set, so this is a noop
// if obj.key is already in keys
// insert other keys. keys is a set, so this is a
// noop if obj.key is already in keys
keys.insert(obj.key);
// if the object is an offer, add to books
if (isOffer(obj.blob))
@@ -1167,8 +1155,8 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const
nextLedgerSequence = prevLedgerSequence + (1 << indexerShift_);
}
return true;
*/
}
bool
CassandraBackend::doOnlineDelete(uint32_t minLedgerToKeep) const
{
@@ -1469,7 +1457,8 @@ CassandraBackend::open()
query = {};
query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "books2"
<< " ( book blob, sequence bigint, key blob, PRIMARY KEY "
"((book, sequence), key)) WITH CLUSTERING ORDER BY (key ASC)";
"((book, sequence), key)) WITH CLUSTERING ORDER BY (key "
"ASC)";
if (!executeSimpleStatement(query.str()))
continue;
query = {};
@@ -1633,7 +1622,7 @@ CassandraBackend::open()
<< " ALLOW FILTERING";
if (!upperBound2_.prepareStatement(query, session_.get()))
continue;
*/
*/
query = {};
query << "SELECT TOKEN(key) FROM " << tablePrefix << "objects "
<< " WHERE key = ? LIMIT 1";
@@ -1783,5 +1772,5 @@ CassandraBackend::open()
open_ = true;
BOOST_LOG_TRIVIAL(info) << "Opened database successfully";
}
} // namespace Backend
} // namespace Backend

View File

@@ -655,7 +655,8 @@ private:
mutable bool isFirstLedger_ = false;
public:
CassandraBackend(boost::json::object const& config) : config_(config)
CassandraBackend(boost::json::object const& config)
: BackendInterface(config), config_(config)
{
}
@@ -798,7 +799,7 @@ public:
};
bool
finishWrites() const override
doFinishWrites() const override
{
// wait for all other writes to finish
sync();
@@ -973,15 +974,14 @@ public:
bool
writeKeys(
std::unordered_set<ripple::uint256>& keys,
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const;
bool
writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>>& books,
uint32_t ledgerSequence,
uint32_t numOffers) const;
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const override;
std::pair<std::vector<LedgerObject>, std::optional<ripple::uint256>>
fetchBookOffers(
ripple::uint256 const& book,
@@ -1270,7 +1270,7 @@ public:
executeAsyncWrite(statement, flatMapWriteBookCallback, data, isRetry);
}
void
writeLedgerObject(
doWriteLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,

View File

@@ -3,7 +3,9 @@
namespace Backend {
PostgresBackend::PostgresBackend(boost::json::object const& config)
: pgPool_(make_PgPool(config)), writeConnection_(pgPool_)
: BackendInterface(config)
, pgPool_(make_PgPool(config))
, writeConnection_(pgPool_)
{
}
void
@@ -50,7 +52,7 @@ PostgresBackend::writeAccountTransactions(
}
}
void
PostgresBackend::writeLedgerObject(
PostgresBackend::doWriteLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,
@@ -553,7 +555,7 @@ PostgresBackend::startWrites() const
}
bool
PostgresBackend::finishWrites() const
PostgresBackend::doFinishWrites() const
{
if (!abortWrite_)
{
@@ -584,6 +586,66 @@ PostgresBackend::finishWrites() const
return !abortWrite_;
}
bool
PostgresBackend::writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const
{
PgQuery pgQuery(pgPool_);
std::stringstream keysBuffer;
size_t numRows = 0;
for (auto& key : keys)
{
keysBuffer << std::to_string(ledgerSequence) << '\t' << "\\\\x"
<< ripple::strHex(key) << '\n';
numRows++;
// If the buffer gets too large, the insert fails. Not sure why. So we
// insert after 1 million records
if (numRows == 1000000)
{
pgQuery.bulkInsert("keys", keysBuffer.str());
keysBuffer = {};
numRows = 0;
}
}
if (numRows > 0)
{
pgQuery.bulkInsert("keys", keysBuffer.str());
}
}
bool
PostgresBackend::writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const
{
PgQuery pgQuery(pgPool_);
std::stringstream booksBuffer;
size_t numRows = 0;
for (auto& book : books)
{
for (auto& offer : book.second)
{
booksBuffer << "\\\\x" << ripple::strHex(book.first) << '\t'
<< std::to_string(ledgerSequence) << '\t' << "\\\\x"
<< ripple::strHex(offer) << '\n';
numRows++;
// If the buffer gets too large, the insert fails. Not sure why. So
// we insert after 1 million records
if (numRows == 1000000)
{
pgQuery.bulkInsert("books", booksBuffer.str());
booksBuffer = {};
numRows = 0;
}
}
}
if (numRows > 0)
{
pgQuery.bulkInsert("books", booksBuffer.str());
}
}
bool
PostgresBackend::doOnlineDelete(uint32_t minLedgerToKeep) const
{
uint32_t limit = 2048;

View File

@@ -79,7 +79,7 @@ public:
bool isFirst) const override;
void
writeLedgerObject(
doWriteLedgerObject(
std::string&& key,
uint32_t seq,
std::string&& blob,
@@ -108,10 +108,20 @@ public:
startWrites() const override;
bool
finishWrites() const override;
doFinishWrites() const override;
bool
doOnlineDelete(uint32_t minLedgerToKeep) const override;
bool
writeKeys(
std::unordered_set<ripple::uint256> const& keys,
uint32_t ledgerSequence) const override;
bool
writeBooks(
std::unordered_map<
ripple::uint256,
std::unordered_set<ripple::uint256>> const& books,
uint32_t ledgerSequence) const override;
};
} // namespace Backend
#endif

View File

@@ -131,7 +131,7 @@ ReportingETL::loadInitialLedger(uint32_t startingSequence)
{
flatMapBackend_->writeAccountTransactions(std::move(accountTxData));
}
flatMapBackend_->finishWrites();
flatMapBackend_->finishWrites(startingSequence);
auto end = std::chrono::system_clock::now();
BOOST_LOG_TRIVIAL(debug) << "Time to download and store ledger = "
<< ((end - start).count()) / 1000000000.0;
@@ -298,7 +298,7 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData)
std::move(bookDir));
}
flatMapBackend_->writeAccountTransactions(std::move(accountTxData));
bool success = flatMapBackend_->finishWrites();
bool success = flatMapBackend_->finishWrites(lgrInfo.seq);
BOOST_LOG_TRIVIAL(debug)
<< __func__ << " : "
<< "Inserted/modified/deleted all objects. Number of objects = "