From ca886fe2c8ca9c5f182ade22659f20c16bce60cc Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 11 May 2021 19:06:02 +0000 Subject: [PATCH 01/25] add types to make it harder to mix up book index and key index --- reporting/BackendIndexer.cpp | 33 ++++++----- reporting/BackendInterface.h | 54 ++++++++++++------ reporting/CassandraBackend.cpp | 100 +++++++++++++++++---------------- reporting/CassandraBackend.h | 4 +- reporting/PostgresBackend.cpp | 95 +++++++++++++++---------------- reporting/PostgresBackend.h | 4 +- 6 files changed, 156 insertions(+), 134 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 6979310f..d7ca33fd 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -105,8 +105,8 @@ writeKeyFlagLedger( } auto start = std::chrono::system_clock::now(); - backend.writeKeys(keys, nextFlag, true); - backend.writeKeys({zero}, nextFlag, true); + backend.writeKeys(keys, KeyIndex{nextFlag}, true); + backend.writeKeys({zero}, KeyIndex{nextFlag}, true); auto end = std::chrono::system_clock::now(); BOOST_LOG_TRIVIAL(info) << __func__ @@ -134,8 +134,9 @@ writeBookFlagLedger( << " books.size() = " << std::to_string(books.size()); auto start = std::chrono::system_clock::now(); - backend.writeBooks(books, nextFlag, true); - backend.writeBooks({{zero, {zero}}}, nextFlag, true); + backend.writeBooks(books, BookIndex{nextFlag}, true); + backend.writeBooks({{zero, {zero}}}, BookIndex{nextFlag}, true); + auto end = std::chrono::system_clock::now(); BOOST_LOG_TRIVIAL(info) @@ -167,12 +168,12 @@ BackendIndexer::doBooksRepair( if (!sequence) sequence = rng->maxSequence; - if(sequence < rng->minSequence) + if (sequence < rng->minSequence) sequence = rng->minSequence; BOOST_LOG_TRIVIAL(info) << __func__ << " sequence = " << std::to_string(*sequence); - + ripple::uint256 zero = {}; while (true) { @@ -245,7 +246,7 @@ BackendIndexer::doKeysRepair( if (!sequence) sequence = rng->maxSequence; - if(sequence < rng->minSequence) + if (sequence < rng->minSequence) sequence = rng->minSequence; BOOST_LOG_TRIVIAL(info) @@ -435,30 +436,28 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) << __func__ << " starting. sequence = " << std::to_string(ledgerSequence); bool isFirst = false; - uint32_t keyIndex = getKeyIndexOfSeq(ledgerSequence); - uint32_t bookIndex = getBookIndexOfSeq(ledgerSequence); + auto keyIndex = getKeyIndexOfSeq(ledgerSequence); + auto bookIndex = getBookIndexOfSeq(ledgerSequence); auto rng = backend.fetchLedgerRangeNoThrow(); if (!rng || rng->minSequence == ledgerSequence) { isFirst = true; - keyIndex = bookIndex = ledgerSequence; + keyIndex = KeyIndex{ledgerSequence}; + bookIndex = BookIndex{ledgerSequence}; } backend.writeKeys(keys, keyIndex); backend.writeBooks(books, bookIndex); if (isFirst) { + // write completion record ripple::uint256 zero = {}; - backend.writeBooks({{zero, {zero}}}, ledgerSequence); - backend.writeKeys({zero}, ledgerSequence); - writeBookFlagLedgerAsync(ledgerSequence, backend); - writeKeyFlagLedgerAsync(ledgerSequence, backend); - + backend.writeBooks({{zero, {zero}}}, bookIndex); + backend.writeKeys({zero}, keyIndex); } keys = {}; books = {}; BOOST_LOG_TRIVIAL(info) << __func__ << " finished. sequence = " << std::to_string(ledgerSequence); - -} +} } // namespace Backend diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 0cc7c3ea..17b68520 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -53,6 +53,19 @@ struct LedgerRange uint32_t maxSequence; }; +// The below two structs exist to prevent developers from accidentally mixing up +// the two indexes. +struct BookIndex +{ + uint32_t bookIndex; + explicit BookIndex(uint32_t v) : bookIndex(v){}; +}; +struct KeyIndex +{ + uint32_t keyIndex; + explicit KeyIndex(uint32_t v) : keyIndex(v){}; +}; + class DatabaseTimeout : public std::exception { const char* @@ -148,26 +161,33 @@ public: { return keyShift_; } - uint32_t + KeyIndex getKeyIndexOfSeq(uint32_t seq) const { if (isKeyFlagLedger(seq)) - return seq; + return KeyIndex{seq}; auto incr = (1 << keyShift_); - return (seq >> keyShift_ << keyShift_) + incr; + KeyIndex index{(seq >> keyShift_ << keyShift_) + incr}; + assert(isKeyFlagLedger(index.keyIndex)); + return index; } bool isKeyFlagLedger(uint32_t ledgerSequence) const { return (ledgerSequence % (1 << keyShift_)) == 0; } - uint32_t + BookIndex getBookIndexOfSeq(uint32_t seq) const { if (isBookFlagLedger(seq)) - return seq; + return BookIndex{seq}; auto incr = (1 << bookShift_); - return (seq >> bookShift_ << bookShift_) + incr; + BookIndex index{(seq >> bookShift_ << bookShift_) + incr}; + assert(isBookFlagLedger(index.bookIndex)); + assert( + bookShift_ == keyShift_ || !isKeyFlagLedger(index.bookIndex) || + !isKeyFlagLedger(index.bookIndex + incr)); + return index; } bool isBookFlagLedger(uint32_t ledgerSequence) const @@ -193,28 +213,28 @@ public: return indexer_; } - std::optional + std::optional getKeyIndexOfSeq(uint32_t seq) const { if (indexer_.isKeyFlagLedger(seq)) - return seq; + return KeyIndex{seq}; auto rng = fetchLedgerRange(); if (!rng) return {}; if (rng->minSequence == seq) - return seq; + return KeyIndex{seq}; return indexer_.getKeyIndexOfSeq(seq); } - std::optional + std::optional getBookIndexOfSeq(uint32_t seq) const { if (indexer_.isBookFlagLedger(seq)) - return seq; + return BookIndex{seq}; auto rng = fetchLedgerRange(); if (!rng) return {}; if (rng->minSequence == seq) - return seq; + return BookIndex{seq}; return indexer_.getBookIndexOfSeq(seq); } @@ -225,9 +245,11 @@ public: auto commitRes = doFinishWrites(); if (commitRes) { - if (indexer_.isBookFlagLedger(ledgerSequence)) + bool isFirst = + fetchLedgerRangeNoThrow()->minSequence == ledgerSequence; + if (indexer_.isBookFlagLedger(ledgerSequence) || isFirst) indexer_.writeBookFlagLedgerAsync(ledgerSequence, *this); - if (indexer_.isKeyFlagLedger(ledgerSequence)) + if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst) indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); } return commitRes; @@ -381,14 +403,14 @@ public: virtual bool writeKeys( std::unordered_set const& keys, - uint32_t ledgerSequence, + KeyIndex const& index, bool isAsync = false) const = 0; virtual bool writeBooks( std::unordered_map< ripple::uint256, std::unordered_set> const& books, - uint32_t ledgerSequence, + BookIndex const& index, bool isAsync = false) const = 0; virtual ~BackendInterface() diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index e4f79e37..fd812271 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -405,12 +405,12 @@ CassandraBackend::fetchLedgerPage( LedgerPage page; BOOST_LOG_TRIVIAL(debug) << __func__ << " ledgerSequence = " << std::to_string(ledgerSequence) - << " index = " << std::to_string(*index); + << " index = " << std::to_string(index->keyIndex); if (cursor) BOOST_LOG_TRIVIAL(debug) << __func__ << " - Cursor = " << ripple::strHex(*cursor); CassandraStatement statement{selectKeys_}; - statement.bindInt(*index); + statement.bindInt(index->keyIndex); if (cursor) statement.bindBytes(*cursor); else @@ -503,15 +503,15 @@ CassandraBackend::fetchBookOffers( { auto rng = fetchLedgerRange(); auto limitTuningFactor = 50; - - if(!rng) - return {{},{}}; - auto readBooks = - [this, &book, &limit, &limitTuningFactor] - (std::uint32_t sequence) - -> std::pair>> - { + if (!rng) + return {{}, {}}; + + auto readBooks = + [this, &book, &limit, &limitTuningFactor](std::uint32_t sequence) + -> std::pair< + bool, + std::vector>> { CassandraStatement completeQuery{completeBook_}; completeQuery.bindInt(sequence); CassandraResult completeResult = executeSyncRead(completeQuery); @@ -519,12 +519,13 @@ CassandraBackend::fetchBookOffers( CassandraStatement statement{selectBook_}; std::vector> keys = {}; - + statement.bindBytes(book.data(), 24); statement.bindInt(sequence); - BOOST_LOG_TRIVIAL(info) << __func__ << " upper = " << std::to_string(sequence) - << " book = " << ripple::strHex(std::string((char*)book.data(), 24)); + BOOST_LOG_TRIVIAL(info) + << __func__ << " upper = " << std::to_string(sequence) << " book = " + << ripple::strHex(std::string((char*)book.data(), 24)); ripple::uint256 zero = beast::zero; statement.bindBytes(zero.data(), 8); @@ -560,8 +561,8 @@ CassandraBackend::fetchBookOffers( return {complete, keys}; }; - auto upper = indexer_.getBookIndexOfSeq(ledgerSequence); - auto [complete, quality_keys] = readBooks(upper); + auto upper = getBookIndexOfSeq(ledgerSequence); + auto [complete, quality_keys] = readBooks(upper->bookIndex); BOOST_LOG_TRIVIAL(debug) << __func__ << " - populated keys. num keys = " << quality_keys.size(); @@ -573,7 +574,7 @@ CassandraBackend::fetchBookOffers( BOOST_LOG_TRIVIAL(info) << "May be incomplete. Fetching other page"; auto bookShift = indexer_.getBookShift(); - std::uint32_t lower = upper - (1 << bookShift); + std::uint32_t lower = upper->bookIndex - (1 << bookShift); auto originalKeys = std::move(quality_keys); auto [lowerComplete, otherKeys] = readBooks(lower); @@ -581,32 +582,34 @@ CassandraBackend::fetchBookOffers( std::vector> merged_keys; merged_keys.reserve(originalKeys.size() + otherKeys.size()); - std::merge(originalKeys.begin(), originalKeys.end(), - otherKeys.begin(), otherKeys.end(), - std::back_inserter(merged_keys), - [](auto pair1, auto pair2) - { - return pair1.first < pair2.first; - }); + std::merge( + originalKeys.begin(), + originalKeys.end(), + otherKeys.begin(), + otherKeys.end(), + std::back_inserter(merged_keys), + [](auto pair1, auto pair2) { return pair1.first < pair2.first; }); } std::vector merged(quality_keys.size()); - std::transform(quality_keys.begin(), quality_keys.end(), - std::back_inserter(merged), - [](auto pair) { return pair.second; }); - + std::transform( + quality_keys.begin(), + quality_keys.end(), + std::back_inserter(merged), + [](auto pair) { return pair.second; }); + auto uniqEnd = std::unique(merged.begin(), merged.end()); std::vector keys{merged.begin(), uniqEnd}; std::cout << keys.size() << std::endl; - + auto start = std::chrono::system_clock::now(); std::vector objs = fetchLedgerObjects(keys, ledgerSequence); auto end = std::chrono::system_clock::now(); auto duration = ((end - start).count()) / 1000000000.0; - BOOST_LOG_TRIVIAL(info) << "Book object fetch took " - << std::to_string(duration) << " seconds."; + BOOST_LOG_TRIVIAL(info) + << "Book object fetch took " << std::to_string(duration) << " seconds."; std::vector results; for (size_t i = 0; i < objs.size(); ++i) @@ -615,8 +618,8 @@ CassandraBackend::fetchBookOffers( results.push_back({keys[i], objs[i]}); } - return {results, {}, warning}; -} + return {results, {}, warning}; +} // namespace Backend struct WriteBookCallbackData { CassandraBackend const& backend; @@ -654,7 +657,7 @@ writeBook(WriteBookCallbackData& cb) CassandraStatement statement{cb.backend.getInsertBookPreparedStatement()}; statement.bindBytes(cb.book.data(), 24); statement.bindInt(cb.ledgerSequence); - statement.bindBytes(cb.book.data()+24, 8); + statement.bindBytes(cb.book.data() + 24, 8); statement.bindBytes(cb.offerKey); // Passing isRetry as true bypasses incrementing numOutstanding cb.backend.executeAsyncWrite(statement, writeBookCallback, cb, true); @@ -775,14 +778,9 @@ writeKeyCallback(CassFuture* fut, void* cbData) bool CassandraBackend::writeKeys( std::unordered_set const& keys, - uint32_t ledgerSequence, + KeyIndex const& index, bool isAsync) const { - BOOST_LOG_TRIVIAL(info) - << __func__ << " Ledger = " << std::to_string(ledgerSequence) - << " . num keys = " << std::to_string(keys.size()) - << " . concurrentLimit = " - << std::to_string(indexerMaxRequestsOutstanding); std::atomic_uint32_t numRemaining = keys.size(); std::condition_variable cv; std::mutex mtx; @@ -790,11 +788,16 @@ CassandraBackend::writeKeys( cbs.reserve(keys.size()); uint32_t concurrentLimit = isAsync ? indexerMaxRequestsOutstanding : keys.size(); + BOOST_LOG_TRIVIAL(info) + << __func__ << " Ledger = " << std::to_string(index.keyIndex) + << " . num keys = " << std::to_string(keys.size()) + << " . concurrentLimit = " + << std::to_string(indexerMaxRequestsOutstanding); uint32_t numSubmitted = 0; for (auto& key : keys) { cbs.push_back(std::make_shared( - *this, key, ledgerSequence, cv, mtx, numRemaining)); + *this, key, index.keyIndex, cv, mtx, numRemaining)); writeKey(*cbs.back()); ++numSubmitted; BOOST_LOG_TRIVIAL(trace) << __func__ << "Submitted a write request"; @@ -828,11 +831,11 @@ CassandraBackend::writeBooks( std::unordered_map< ripple::uint256, std::unordered_set> const& books, - uint32_t ledgerSequence, + BookIndex const& index, bool isAsync) const { BOOST_LOG_TRIVIAL(info) - << __func__ << " Ledger = " << std::to_string(ledgerSequence) + << __func__ << " Ledger = " << std::to_string(index.bookIndex) << " . num books = " << std::to_string(books.size()); std::condition_variable cv; std::mutex mtx; @@ -852,7 +855,7 @@ CassandraBackend::writeBooks( *this, book.first, offer, - ledgerSequence, + index.bookIndex, cv, mtx, numOutstanding)); @@ -1100,7 +1103,7 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const */ } bool -CassandraBackend::doOnlineDelete(uint32_t minLedgerToKeep) const +CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const { throw std::runtime_error("doOnlineDelete : unimplemented"); return false; @@ -1386,8 +1389,10 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "books" - << " ( book blob, sequence bigint, quality_key tuple, PRIMARY KEY " - "((book, sequence), quality_key)) WITH CLUSTERING ORDER BY (quality_key " + << " ( book blob, sequence bigint, quality_key tuple, PRIMARY KEY " + "((book, sequence), quality_key)) WITH CLUSTERING ORDER BY " + "(quality_key " "ASC)"; if (!executeSimpleStatement(query.str())) continue; @@ -1564,11 +1569,10 @@ CassandraBackend::open(bool readOnly) query << "SELECT * FROM " << tablePrefix << "books " << "WHERE book = " << "0x000000000000000000000000000000000000000000000000" - << " AND sequence = ?"; + << " AND sequence = ?"; if (!completeBook_.prepareStatement(query, session_.get())) continue; - query.str(""); query << " INSERT INTO " << tablePrefix << "account_tx" << " (account, seq_idx, hash) " diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index 3a0e9c77..925e1570 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -1014,14 +1014,14 @@ public: bool writeKeys( std::unordered_set const& keys, - uint32_t ledgerSequence, + KeyIndex const& index, bool isAsync = false) const; bool writeBooks( std::unordered_map< ripple::uint256, std::unordered_set> const& books, - uint32_t ledgerSequence, + BookIndex const& index, bool isAsync = false) const override; BookOffersPage fetchBookOffers( diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index d97c7d80..f20cc8d6 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -335,7 +335,8 @@ PostgresBackend::fetchLedgerPage( PgQuery pgQuery(pgPool_); pgQuery("SET statement_timeout TO 10000"); std::stringstream sql; - sql << "SELECT key FROM keys WHERE ledger_seq = " << std::to_string(*index); + sql << "SELECT key FROM keys WHERE ledger_seq = " + << std::to_string(index->keyIndex); if (cursor) sql << " AND key > \'\\x" << ripple::strHex(*cursor) << "\'"; sql << " ORDER BY key ASC LIMIT " << std::to_string(limit); @@ -381,29 +382,27 @@ PostgresBackend::fetchBookOffers( auto rng = fetchLedgerRange(); auto limitTuningFactor = 50; - if(!rng) - return {{},{}}; + if (!rng) + return {{}, {}}; - ripple::uint256 bookBase = + ripple::uint256 bookBase = ripple::keylet::quality({ripple::ltDIR_NODE, book}, 0).key; ripple::uint256 bookEnd = ripple::getQualityNext(bookBase); using bookKeyPair = std::pair; - auto getBooks = - [this, &bookBase, &bookEnd, &limit, &limitTuningFactor] - (std::uint32_t sequence) - -> std::pair> - { + auto getBooks = [this, &bookBase, &bookEnd, &limit, &limitTuningFactor]( + std::uint32_t sequence) + -> std::pair> { BOOST_LOG_TRIVIAL(info) << __func__ << ": Fetching books between " - << "0x" << ripple::strHex(bookBase) << " and " - << "0x" << ripple::strHex(bookEnd) << "at ledger " - << std::to_string(sequence); + << "0x" << ripple::strHex(bookBase) << " and " + << "0x" << ripple::strHex(bookEnd) + << "at ledger " << std::to_string(sequence); auto start = std::chrono::system_clock::now(); std::stringstream sql; sql << "SELECT COUNT(*) FROM books WHERE " - << "book = \'\\x" << ripple::strHex(ripple::uint256(beast::zero)) + << "book = \'\\x" << ripple::strHex(ripple::uint256(beast::zero)) << "\' AND ledger_seq = " << std::to_string(sequence); bool complete; @@ -411,7 +410,7 @@ PostgresBackend::fetchBookOffers( auto res = pgQuery(sql.str().data()); if (size_t numRows = checkResult(res, 1)) complete = res.asInt(0, 0) != 0; - else + else return {false, {}}; sql.str(""); @@ -432,8 +431,7 @@ PostgresBackend::fetchBookOffers( auto duration = ((end - start).count()) / 1000000000.0; BOOST_LOG_TRIVIAL(info) << "Postgres book key fetch took " - << std::to_string(duration) - << " seconds"; + << std::to_string(duration) << " seconds"; if (size_t numRows = checkResult(res, 2)) { @@ -452,18 +450,16 @@ PostgresBackend::fetchBookOffers( return {complete, {}}; }; - auto fetchObjects = - [this] - (std::vector const& pairs, - std::uint32_t sequence, - std::uint32_t limit, - std::optional warning) - -> BookOffersPage - { + auto fetchObjects = + [this]( + std::vector const& pairs, + std::uint32_t sequence, + std::uint32_t limit, + std::optional warning) -> BookOffersPage { std::vector allKeys(pairs.size()); for (auto const& pair : pairs) allKeys.push_back(pair.second); - + auto uniqEnd = std::unique(allKeys.begin(), allKeys.end()); std::vector keys{allKeys.begin(), uniqEnd}; @@ -474,29 +470,28 @@ PostgresBackend::fetchBookOffers( auto end = std::chrono::system_clock::now(); auto duration = ((end - start).count()) / 1000000000.0; - BOOST_LOG_TRIVIAL(info) << "Postgres book objects fetch took " - << std::to_string(duration) - << " seconds. " - << "Fetched " - << std::to_string(ledgerEntries.size()) - << " ledger entries"; + BOOST_LOG_TRIVIAL(info) + << "Postgres book objects fetch took " << std::to_string(duration) + << " seconds. " + << "Fetched " << std::to_string(ledgerEntries.size()) + << " ledger entries"; std::vector objects; for (auto i = 0; i < ledgerEntries.size(); ++i) { - if(ledgerEntries[i].size() != 0) - objects.push_back(LedgerObject{keys[i], ledgerEntries[i]}); + if (ledgerEntries[i].size() != 0) + objects.push_back(LedgerObject{keys[i], ledgerEntries[i]}); } return {objects, {}, warning}; }; std::uint32_t bookShift = indexer_.getBookShift(); - auto upper = indexer_.getBookIndexOfSeq(ledgerSequence); + auto upper = getBookIndexOfSeq(ledgerSequence); - auto [upperComplete, upperResults] = getBooks(upper); + auto [upperComplete, upperResults] = getBooks(upper->bookIndex); - BOOST_LOG_TRIVIAL(info) << __func__ << ": Upper results found " + BOOST_LOG_TRIVIAL(info) << __func__ << ": Upper results found " << upperResults.size() << " books."; if (upperComplete) @@ -508,26 +503,28 @@ PostgresBackend::fetchBookOffers( BOOST_LOG_TRIVIAL(info) << "Upper book page is not complete " << "fetching again"; - auto lower = upper - (1 << bookShift); + auto lower = upper->bookIndex - (1 << bookShift); if (lower < rng->minSequence) lower = rng->minSequence; auto [lowerComplete, lowerResults] = getBooks(lower); - BOOST_LOG_TRIVIAL(info) << __func__ << ": Lower results found " + BOOST_LOG_TRIVIAL(info) << __func__ << ": Lower results found " << lowerResults.size() << " books."; assert(lowerComplete); std::vector pairs; pairs.reserve(upperResults.size() + lowerResults.size()); - std::merge(upperResults.begin(), upperResults.end(), - lowerResults.begin(), lowerResults.end(), - std::back_inserter(pairs), - [](bookKeyPair pair1, bookKeyPair pair2) -> bool - { - return pair1.first < pair2.first; - }); + std::merge( + upperResults.begin(), + upperResults.end(), + lowerResults.begin(), + lowerResults.end(), + std::back_inserter(pairs), + [](bookKeyPair pair1, bookKeyPair pair2) -> bool { + return pair1.first < pair2.first; + }); std::optional warning = "book data may be incomplete"; return fetchObjects(pairs, ledgerSequence, limit, warning); @@ -806,7 +803,7 @@ PostgresBackend::doFinishWrites() const bool PostgresBackend::writeKeys( std::unordered_set const& keys, - uint32_t ledgerSequence, + KeyIndex const& index, bool isAsync) const { BOOST_LOG_TRIVIAL(debug) << __func__; @@ -816,7 +813,7 @@ PostgresBackend::writeKeys( size_t numRows = 0; for (auto& key : keys) { - keysBuffer << std::to_string(ledgerSequence) << '\t' << "\\\\x" + keysBuffer << std::to_string(index.keyIndex) << '\t' << "\\\\x" << ripple::strHex(key) << '\n'; numRows++; // If the buffer gets too large, the insert fails. Not sure why. So we @@ -841,7 +838,7 @@ PostgresBackend::writeBooks( std::unordered_map< ripple::uint256, std::unordered_set> const& books, - uint32_t ledgerSequence, + BookIndex const& index, bool isAsync) const { BOOST_LOG_TRIVIAL(debug) << __func__; @@ -854,7 +851,7 @@ PostgresBackend::writeBooks( { for (auto& offer : book.second) { - booksBuffer << std::to_string(ledgerSequence) << '\t' << "\\\\x" + booksBuffer << std::to_string(index.bookIndex) << '\t' << "\\\\x" << ripple::strHex(book.first) << '\t' << "\\\\x" << ripple::strHex(offer) << '\n'; numRows++; diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index a50d37c6..27dbcaa9 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -117,14 +117,14 @@ public: bool writeKeys( std::unordered_set const& keys, - uint32_t ledgerSequence, + KeyIndex const& index, bool isAsync = false) const override; bool writeBooks( std::unordered_map< ripple::uint256, std::unordered_set> const& books, - uint32_t ledgerSequence, + BookIndex const& index, bool isAsync = false) const override; }; } // namespace Backend From 24816c021df1e8971b41fa31aa7e55f9c8db74fa Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 11 May 2021 21:14:47 +0000 Subject: [PATCH 02/25] online delete for cassandra. doesn't crash, but not sure it works --- reporting/BackendFactory.h | 5 +- reporting/BackendInterface.h | 2 +- reporting/CassandraBackend.cpp | 176 +++++++++++++++++++++++++++++++-- reporting/CassandraBackend.h | 13 ++- reporting/PostgresBackend.cpp | 12 ++- reporting/PostgresBackend.h | 2 +- reporting/ReportingETL.cpp | 3 +- 7 files changed, 197 insertions(+), 16 deletions(-) diff --git a/reporting/BackendFactory.h b/reporting/BackendFactory.h index c4630b0e..5ee48e79 100644 --- a/reporting/BackendFactory.h +++ b/reporting/BackendFactory.h @@ -8,12 +8,15 @@ namespace Backend { std::unique_ptr makeBackend(boost::json::object const& config) { - boost::json::object const& dbConfig = config.at("database").as_object(); + boost::json::object dbConfig = config.at("database").as_object(); auto type = dbConfig.at("type").as_string(); if (boost::iequals(type, "cassandra")) { + if (config.contains("online_delete")) + dbConfig.at(type).as_object()["ttl"] = + config.at("online_delete").as_int64() * 4; auto backend = std::make_unique(dbConfig.at(type).as_object()); return std::move(backend); diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 17b68520..3fa3a6a5 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -399,7 +399,7 @@ public: doFinishWrites() const = 0; virtual bool - doOnlineDelete(uint32_t minLedgerToKeep) const = 0; + doOnlineDelete(uint32_t numLedgersToKeep) const = 0; virtual bool writeKeys( std::unordered_set const& keys, diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index fd812271..56f5a7d1 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -726,6 +726,87 @@ struct WriteKeyCallbackData { } }; +struct OnlineDeleteCallbackData +{ + CassandraBackend const& backend; + ripple::uint256 key; + uint32_t ledgerSequence; + std::vector object; + std::condition_variable& cv; + std::atomic_uint32_t& numOutstanding; + std::mutex& mtx; + uint32_t currentRetries = 0; + OnlineDeleteCallbackData( + CassandraBackend const& backend, + ripple::uint256&& key, + uint32_t ledgerSequence, + std::vector&& object, + std::condition_variable& cv, + std::mutex& mtx, + std::atomic_uint32_t& numOutstanding) + : backend(backend) + , key(std::move(key)) + , ledgerSequence(ledgerSequence) + , object(std::move(object)) + , cv(cv) + , mtx(mtx) + , numOutstanding(numOutstanding) + + { + } +}; +void +onlineDeleteCallback(CassFuture* fut, void* cbData); +void +onlineDelete(OnlineDeleteCallbackData& cb) +{ + { + CassandraStatement statement{ + cb.backend.getInsertObjectPreparedStatement()}; + statement.bindBytes(cb.key); + statement.bindInt(cb.ledgerSequence); + statement.bindBytes(cb.object); + + cb.backend.executeAsyncWrite(statement, onlineDeleteCallback, cb, true); + } +} +void +onlineDeleteCallback(CassFuture* fut, void* cbData) +{ + OnlineDeleteCallbackData& requestParams = + *static_cast(cbData); + + CassandraBackend const& backend = requestParams.backend; + auto rc = cass_future_error_code(fut); + if (rc != CASS_OK) + { + // exponential backoff with a max wait of 2^10 ms (about 1 second) + auto wait = std::chrono::milliseconds( + lround(std::pow(2, std::min(10u, requestParams.currentRetries)))); + BOOST_LOG_TRIVIAL(error) + << "ERROR!!! Cassandra insert book error: " << rc << ", " + << cass_error_desc(rc) << ", retrying in " << wait.count() + << " milliseconds"; + ++requestParams.currentRetries; + std::shared_ptr timer = + std::make_shared( + backend.getIOContext(), + std::chrono::steady_clock::now() + wait); + timer->async_wait( + [timer, &requestParams](const boost::system::error_code& error) { + onlineDelete(requestParams); + }); + } + else + { + BOOST_LOG_TRIVIAL(trace) << __func__ << " Successfully inserted a book"; + { + std::lock_guard lck(requestParams.mtx); + --requestParams.numOutstanding; + requestParams.cv.notify_one(); + } + } +} void writeKeyCallback(CassFuture* fut, void* cbData); void @@ -1105,8 +1186,77 @@ CassandraBackend::runIndexer(uint32_t ledgerSequence) const bool CassandraBackend::doOnlineDelete(uint32_t numLedgersToKeep) const { - throw std::runtime_error("doOnlineDelete : unimplemented"); - return false; + // calculate TTL + // ledgers close roughly every 4 seconds. We double the TTL so that way + // there is a window of time to update the database, to prevent unchanging + // records from being deleted. + auto rng = fetchLedgerRangeNoThrow(); + if (!rng) + return false; + uint32_t minLedger = rng->maxSequence - numLedgersToKeep; + if (minLedger <= rng->minSequence) + return false; + std::condition_variable cv; + std::mutex mtx; + std::vector> cbs; + uint32_t concurrentLimit = 10; + std::atomic_uint32_t numOutstanding = 0; + + // iterate through latest ledger, updating TTL + std::optional cursor; + while (true) + { + try + { + auto [objects, curCursor, warning] = + fetchLedgerPage(cursor, minLedger, 256); + if (warning) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ + << " online delete running but flag ledger is not complete"; + std::this_thread::sleep_for(std::chrono::seconds(10)); + continue; + } + + for (auto& obj : objects) + { + ++numOutstanding; + cbs.push_back(std::make_shared( + *this, + std::move(obj.key), + minLedger, + std::move(obj.blob), + cv, + mtx, + numOutstanding)); + + onlineDelete(*cbs.back()); + std::unique_lock lck(mtx); + BOOST_LOG_TRIVIAL(trace) << __func__ << "Got the mutex"; + cv.wait(lck, [&numOutstanding, concurrentLimit]() { + return numOutstanding < concurrentLimit; + }); + } + BOOST_LOG_TRIVIAL(debug) << __func__ << " fetched a page"; + cursor = curCursor; + if (!cursor) + break; + } + catch (DatabaseTimeout const& e) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " Database timeout fetching keys"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + } + } + std::unique_lock lck(mtx); + cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; }); + CassandraStatement statement{deleteLedgerRange_}; + statement.bindInt(minLedger); + executeSyncWrite(statement); + // update ledger_range + return true; } void @@ -1208,6 +1358,7 @@ CassandraBackend::open(bool readOnly) int threads = config_.contains("threads") ? config_["threads"].as_int64() : std::thread::hardware_concurrency(); + int ttl = config_.contains("ttl") ? config_["ttl"].as_int64() * 2 : 0; rc = cass_cluster_set_num_threads_io(cluster, threads); if (rc != CASS_OK) @@ -1327,7 +1478,8 @@ CassandraBackend::open(bool readOnly) query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "objects" << " ( key blob, sequence bigint, object blob, PRIMARY " "KEY(key, " - "sequence)) WITH CLUSTERING ORDER BY (sequence DESC)"; + "sequence)) WITH CLUSTERING ORDER BY (sequence DESC) AND" + << " default_time_to_live = " << ttl; if (!executeSimpleStatement(query.str())) continue; @@ -1352,7 +1504,8 @@ CassandraBackend::open(bool readOnly) query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "transactions" << " ( hash blob PRIMARY KEY, ledger_sequence bigint, " "transaction " - "blob, metadata blob)"; + "blob, metadata blob)" + << " WITH default_time_to_live = " << ttl; if (!executeSimpleStatement(query.str())) continue; @@ -1407,7 +1560,9 @@ CassandraBackend::open(bool readOnly) " hash blob, " "PRIMARY KEY " "(account, seq_idx)) WITH " - "CLUSTERING ORDER BY (seq_idx desc)"; + "CLUSTERING ORDER BY (seq_idx desc)" + << " AND default_time_to_live = " << ttl; + if (!executeSimpleStatement(query.str())) continue; @@ -1419,7 +1574,8 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "ledgers" - << " ( sequence bigint PRIMARY KEY, header blob )"; + << " ( sequence bigint PRIMARY KEY, header blob )" + << " WITH default_time_to_live = " << ttl; if (!executeSimpleStatement(query.str())) continue; @@ -1431,7 +1587,8 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "ledger_hashes" - << " (hash blob PRIMARY KEY, sequence bigint)"; + << " (hash blob PRIMARY KEY, sequence bigint)" + << " WITH default_time_to_live = " << ttl; if (!executeSimpleStatement(query.str())) continue; @@ -1605,6 +1762,11 @@ CassandraBackend::open(bool readOnly) "(?,null)"; if (!updateLedgerRange_.prepareStatement(query, session_.get())) continue; + query = {}; + query << " update " << tablePrefix << "ledger_range" + << " set sequence = ? where is_latest = false"; + if (!deleteLedgerRange_.prepareStatement(query, session_.get())) + continue; query.str(""); query << " select header from " << tablePrefix diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index 925e1570..f9d66acd 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -166,6 +166,11 @@ public: bindBytes(data.data(), data.size()); } void + bindBytes(std::vector const& data) + { + bindBytes(data.data(), data.size()); + } + void bindBytes(ripple::AccountID const& data) { bindBytes(data.data(), data.size()); @@ -649,6 +654,7 @@ private: CassandraPreparedStatement insertLedgerHeader_; CassandraPreparedStatement insertLedgerHash_; CassandraPreparedStatement updateLedgerRange_; + CassandraPreparedStatement deleteLedgerRange_; CassandraPreparedStatement updateLedgerHeader_; CassandraPreparedStatement selectLedgerBySeq_; CassandraPreparedStatement selectLatestLedger_; @@ -735,6 +741,11 @@ public: { return insertBook2_; } + CassandraPreparedStatement const& + getInsertObjectPreparedStatement() const + { + return insertObject_; + } CassandraPreparedStatement const& getSelectLedgerDiffPreparedStatement() const @@ -1353,7 +1364,7 @@ public: syncCv_.wait(lck, [this]() { return finishedAllRequests(); }); } bool - doOnlineDelete(uint32_t minLedgerToKeep) const override; + doOnlineDelete(uint32_t numLedgersToKeep) const override; boost::asio::io_context& getIOContext() const diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index f20cc8d6..9d9c003c 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -874,14 +874,20 @@ PostgresBackend::writeBooks( return true; } bool -PostgresBackend::doOnlineDelete(uint32_t minLedgerToKeep) const +PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const { + auto rng = fetchLedgerRangeNoThrow(); + if (!rng) + return false; + uint32_t minLedger = rng->maxSequence - numLedgersToKeep; + if (minLedger <= rng->minSequence) + return false; uint32_t limit = 2048; PgQuery pgQuery(pgPool_); { std::stringstream sql; sql << "DELETE FROM ledgers WHERE ledger_seq < " - << std::to_string(minLedgerToKeep); + << std::to_string(minLedger); auto res = pgQuery(sql.str().data()); if (res.msg() != "ok") throw std::runtime_error("Error deleting from ledgers table"); @@ -892,7 +898,7 @@ PostgresBackend::doOnlineDelete(uint32_t minLedgerToKeep) const { std::stringstream sql; sql << "SELECT DISTINCT ON (key) key,ledger_seq,object FROM objects" - << " WHERE ledger_seq <= " << std::to_string(minLedgerToKeep); + << " WHERE ledger_seq <= " << std::to_string(minLedger); if (cursor.size()) sql << " AND key < \'\\x" << cursor << "\'"; sql << " ORDER BY key DESC, ledger_seq DESC" diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index 27dbcaa9..806a95da 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -113,7 +113,7 @@ public: doFinishWrites() const override; bool - doOnlineDelete(uint32_t minLedgerToKeep) const override; + doOnlineDelete(uint32_t numLedgersToKeep) const override; bool writeKeys( std::unordered_set const& keys, diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index 44dd1448..d238a369 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -507,8 +507,7 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) deleting_ = true; ioContext_.post([this, &range]() { BOOST_LOG_TRIVIAL(info) << "Running online delete"; - flatMapBackend_->doOnlineDelete( - range->maxSequence - *onlineDeleteInterval_); + flatMapBackend_->doOnlineDelete(*onlineDeleteInterval_); BOOST_LOG_TRIVIAL(info) << "Finished online delete"; deleting_ = false; }); From 0babf5bc444075f90c2e0a45dcec5cfa6d5d950c Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Thu, 13 May 2021 18:51:34 +0000 Subject: [PATCH 03/25] online delete for postgres working --- reporting/CassandraBackend.cpp | 17 ++++- reporting/Pg.cpp | 22 +++--- reporting/PostgresBackend.cpp | 132 +++++++++++++-------------------- 3 files changed, 79 insertions(+), 92 deletions(-) diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index 56f5a7d1..865e6799 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1359,6 +1359,16 @@ CassandraBackend::open(bool readOnly) ? config_["threads"].as_int64() : std::thread::hardware_concurrency(); int ttl = config_.contains("ttl") ? config_["ttl"].as_int64() * 2 : 0; + int keysTtl, keysIncr = pow(2, indexer_.getKeyShift()) * 4 * 2; + while (keysTtl < ttl) + { + keysTtl += keysIncr; + } + int booksTtl, booksIncr = pow(2, indexer_.getBookShift()) * 4 * 2; + while (booksTtl < ttl) + { + booksTtl += booksIncr; + } rc = cass_cluster_set_num_threads_io(cluster, threads); if (rc != CASS_OK) @@ -1530,7 +1540,9 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "keys" << " ( sequence bigint, key blob, PRIMARY KEY " - "(sequence, key))"; + "(sequence, key))" + " WITH default_time_to_live = " + << keysTtl; if (!executeSimpleStatement(query.str())) continue; @@ -1546,7 +1558,8 @@ CassandraBackend::open(bool readOnly) "blob>, PRIMARY KEY " "((book, sequence), quality_key)) WITH CLUSTERING ORDER BY " "(quality_key " - "ASC)"; + "ASC) AND default_time_to_live = " + << booksTtl; if (!executeSimpleStatement(query.str())) continue; query.str(""); diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index fc7bfe40..7f082b5d 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -242,8 +242,10 @@ Pg::bulkInsert(char const* table, std::string const& records) { // https://www.postgresql.org/docs/12/libpq-copy.html#LIBPQ-COPY-SEND assert(conn_.get()); - static auto copyCmd = boost::format(R"(COPY %s FROM stdin)"); - auto res = query(boost::str(copyCmd % table).c_str()); + auto copyCmd = boost::format(R"(COPY %s FROM stdin)"); + auto formattedCmd = boost::str(copyCmd % table); + BOOST_LOG_TRIVIAL(info) << __func__ << " " << formattedCmd; + auto res = query(formattedCmd.c_str()); if (!res || res.status() != PGRES_COPY_IN) { std::stringstream ss; @@ -284,7 +286,8 @@ Pg::bulkInsert(char const* table, std::string const& records) { std::stringstream ss; ss << "bulkInsert to " << table - << ". PQputCopyEnd status not PGRES_COMMAND_OK: " << status; + << ". PQputCopyEnd status not PGRES_COMMAND_OK: " << status + << " message = " << PQerrorMessage(conn_.get()); disconnect(); BOOST_LOG_TRIVIAL(error) << __func__ << " " << records; throw std::runtime_error(ss.str()); @@ -750,11 +753,12 @@ CREATE TABLE IF NOT EXISTS ledgers ( CREATE TABLE IF NOT EXISTS objects ( key bytea NOT NULL, - ledger_seq bigint NOT NULL, - object bytea, - PRIMARY KEY(key, ledger_seq) + ledger_seq bigint NOT NULL REFERENCES ledgers ON DELETE CASCADE, + object bytea ) PARTITION BY RANGE (ledger_seq); +CREATE INDEX objects_idx ON objects USING btree(key, ledger_seq); + create table if not exists objects1 partition of objects for values from (0) to (10000000); create table if not exists objects2 partition of objects for values from (10000000) to (20000000); create table if not exists objects3 partition of objects for values from (20000000) to (30000000); @@ -772,7 +776,7 @@ CREATE INDEX IF NOT EXISTS ledgers_ledger_hash_idx ON ledgers -- cascade here based on ledger_seq. CREATE TABLE IF NOT EXISTS transactions ( hash bytea NOT NULL, - ledger_seq bigint NOT NULL , + ledger_seq bigint NOT NULL REFERENCES ledgers ON DELETE CASCADE, transaction bytea NOT NULL, metadata bytea NOT NULL ) PARTITION BY RANGE(ledger_seq); @@ -791,7 +795,7 @@ create index if not exists tx_by_lgr_seq on transactions using hash (ledger_seq) -- ledger table cascade here based on ledger_seq. CREATE TABLE IF NOT EXISTS account_transactions ( account bytea NOT NULL, - ledger_seq bigint NOT NULL , + ledger_seq bigint NOT NULL REFERENCES ledgers ON DELETE CASCADE, transaction_index bigint NOT NULL, hash bytea NOT NULL, PRIMARY KEY (account, ledger_seq, transaction_index, hash) @@ -815,7 +819,7 @@ CREATE TABLE IF NOT EXISTS books ( CREATE INDEX book_idx ON books using btree(ledger_seq, book, offer_key); CREATE TABLE IF NOT EXISTS keys ( - ledger_seq bigint NOT NULL, + ledger_seq bigint NOT NULL, key bytea NOT NULL ); diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 9d9c003c..87385324 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -884,6 +884,44 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const return false; uint32_t limit = 2048; PgQuery pgQuery(pgPool_); + pgQuery("SET statement_timeout TO 0"); + std::optional cursor; + while (true) + { + try + { + auto [objects, curCursor, warning] = + fetchLedgerPage(cursor, minLedger, 256); + if (warning) + { + BOOST_LOG_TRIVIAL(warning) << __func__ + << " online delete running but " + "flag ledger is not complete"; + std::this_thread::sleep_for(std::chrono::seconds(10)); + continue; + } + BOOST_LOG_TRIVIAL(debug) << __func__ << " fetched a page"; + std::stringstream objectsBuffer; + + for (auto& obj : objects) + { + objectsBuffer << "\\\\x" << ripple::strHex(obj.key) << '\t' + << std::to_string(minLedger) << '\t' << "\\\\x" + << ripple::strHex(obj.blob) << '\n'; + } + pgQuery.bulkInsert("objects", objectsBuffer.str()); + cursor = curCursor; + if (!cursor) + break; + } + catch (DatabaseTimeout const& e) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " Database timeout fetching keys"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + } + } + BOOST_LOG_TRIVIAL(info) << __func__ << " finished inserting into objects"; { std::stringstream sql; sql << "DELETE FROM ledgers WHERE ledger_seq < " @@ -892,90 +930,22 @@ PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const if (res.msg() != "ok") throw std::runtime_error("Error deleting from ledgers table"); } - - std::string cursor; - do { std::stringstream sql; - sql << "SELECT DISTINCT ON (key) key,ledger_seq,object FROM objects" - << " WHERE ledger_seq <= " << std::to_string(minLedger); - if (cursor.size()) - sql << " AND key < \'\\x" << cursor << "\'"; - sql << " ORDER BY key DESC, ledger_seq DESC" - << " LIMIT " << std::to_string(limit); - BOOST_LOG_TRIVIAL(trace) << __func__ << sql.str(); + sql << "DELETE FROM keys WHERE ledger_seq < " + << std::to_string(minLedger); auto res = pgQuery(sql.str().data()); - BOOST_LOG_TRIVIAL(debug) << __func__ << "Fetched a page"; - if (size_t numRows = checkResult(res, 3)) - { - std::stringstream deleteSql; - std::stringstream deleteOffersSql; - deleteSql << "DELETE FROM objects WHERE ("; - deleteOffersSql << "DELETE FROM books WHERE ("; - bool firstOffer = true; - for (size_t i = 0; i < numRows; ++i) - { - std::string_view keyView{res.c_str(i, 0) + 2}; - int64_t sequence = res.asBigInt(i, 1); - std::string_view objView{res.c_str(i, 2) + 2}; - if (i != 0) - deleteSql << " OR "; - - deleteSql << "(key = " - << "\'\\x" << keyView << "\'"; - if (objView.size() == 0) - deleteSql << " AND ledger_seq <= " - << std::to_string(sequence); - else - deleteSql << " AND ledger_seq < " - << std::to_string(sequence); - deleteSql << ")"; - bool deleteOffer = false; - if (objView.size()) - { - deleteOffer = isOfferHex(objView); - } - else - { - // This is rather unelegant. For a deleted object, we - // don't know its type just from the key (or do we?). - // So, we just assume it is an offer and try to delete - // it. The alternative is to read the actual object out - // of the db from before it was deleted. This could - // result in a lot of individual reads though, so we - // chose to just delete - deleteOffer = true; - } - if (deleteOffer) - { - if (!firstOffer) - deleteOffersSql << " OR "; - deleteOffersSql << "( offer_key = " - << "\'\\x" << keyView << "\')"; - firstOffer = false; - } - } - if (numRows == limit) - cursor = res.c_str(numRows - 1, 0) + 2; - else - cursor = {}; - deleteSql << ")"; - deleteOffersSql << ")"; - BOOST_LOG_TRIVIAL(trace) << __func__ << deleteSql.str(); - res = pgQuery(deleteSql.str().data()); - if (res.msg() != "ok") - throw std::runtime_error("Error deleting from objects table"); - if (!firstOffer) - { - BOOST_LOG_TRIVIAL(trace) << __func__ << deleteOffersSql.str(); - res = pgQuery(deleteOffersSql.str().data()); - if (res.msg() != "ok") - throw std::runtime_error("Error deleting from books table"); - } - BOOST_LOG_TRIVIAL(debug) - << __func__ << "Deleted a page. Cursor = " << cursor; - } - } while (cursor.size()); + if (res.msg() != "ok") + throw std::runtime_error("Error deleting from keys table"); + } + { + std::stringstream sql; + sql << "DELETE FROM books WHERE ledger_seq < " + << std::to_string(minLedger); + auto res = pgQuery(sql.str().data()); + if (res.msg() != "ok") + throw std::runtime_error("Error deleting from books table"); + } return true; } From 1bcfe3aa8b818d3deec261a96142e48bfd591b90 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Thu, 13 May 2021 21:53:29 +0000 Subject: [PATCH 04/25] add dos guard --- CMakeLists.txt | 2 +- handlers/AccountTx.cpp | 103 +------- server/DOSGuard.h | 86 +++++++ websocket_server_async.cpp | 470 ------------------------------------- 4 files changed, 92 insertions(+), 569 deletions(-) create mode 100644 server/DOSGuard.h delete mode 100644 websocket_server_async.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 28b74856..aea127de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ file (TO_CMAKE_PATH "${BOOST_ROOT}" BOOST_ROOT) FIND_PACKAGE( Boost 1.75 COMPONENTS filesystem log log_setup thread system REQUIRED ) add_executable (reporting - websocket_server_async.cpp + server/websocket_server_async.cpp ) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/deps") include(ExternalProject) diff --git a/handlers/AccountTx.cpp b/handlers/AccountTx.cpp index c4d4430d..c4eae77b 100644 --- a/handlers/AccountTx.cpp +++ b/handlers/AccountTx.cpp @@ -19,102 +19,6 @@ #include #include -#include - -std::vector, - std::shared_ptr>> -doAccountTxStoredProcedure( - ripple::AccountID const& account, - std::shared_ptr& pgPool, - BackendInterface const& backend) -{ - pg_params dbParams; - - char const*& command = dbParams.first; - std::vector>& values = dbParams.second; - command = - "SELECT account_tx($1::bytea, $2::bool, " - "$3::bigint, $4::bigint, $5::bigint, $6::bytea, " - "$7::bigint, $8::bool, $9::bigint, $10::bigint)"; - values.resize(10); - values[0] = "\\x" + ripple::strHex(account); - values[1] = "true"; - static std::uint32_t const page_length(200); - values[2] = std::to_string(page_length); - - auto res = PgQuery(pgPool)(dbParams); - if (!res) - { - BOOST_LOG_TRIVIAL(error) - << __func__ << " : Postgres response is null - account = " - << ripple::strHex(account); - assert(false); - return {}; - } - else if (res.status() != PGRES_TUPLES_OK) - { - assert(false); - return {}; - } - - if (res.isNull() || res.ntuples() == 0) - { - BOOST_LOG_TRIVIAL(error) - << __func__ << " : No data returned from Postgres : account = " - << ripple::strHex(account); - - assert(false); - return {}; - } - - char const* resultStr = res.c_str(); - - boost::json::object result = boost::json::parse(resultStr).as_object(); - if (result.contains("transactions")) - { - std::vector nodestoreHashes; - for (auto& t : result.at("transactions").as_array()) - { - boost::json::object obj = t.as_object(); - if (obj.contains("ledger_seq") && obj.contains("nodestore_hash")) - { - std::string nodestoreHashHex = - obj.at("nodestore_hash").as_string().c_str(); - nodestoreHashHex.erase(0, 2); - ripple::uint256 nodestoreHash; - if (!nodestoreHash.parseHex(nodestoreHashHex)) - assert(false); - - if (nodestoreHash.isNonZero()) - { - nodestoreHashes.push_back(nodestoreHash); - } - else - { - assert(false); - } - } - else - { - assert(false); - } - } - - std::vector, - std::shared_ptr>> - results; - auto dbResults = backend.fetchTransactions(nodestoreHashes); - for (auto const& res : dbResults) - { - if (res.transaction.size() && res.metadata.size()) - results.push_back(deserializeTxPlusMeta(res)); - } - return results; - } - return {}; -} // { // account: account, @@ -190,7 +94,9 @@ doAccountTx(boost::json::object const& request, BackendInterface const& backend) auto [blobs, retCursor] = backend.fetchAccountTransactions(*account, limit, cursor); auto end = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(info) << __func__ << " db fetch took " << ((end - start).count() / 1000000000.0) << " num blobs = " << blobs.size(); + BOOST_LOG_TRIVIAL(info) << __func__ << " db fetch took " + << ((end - start).count() / 1000000000.0) + << " num blobs = " << blobs.size(); for (auto const& txnPlusMeta : blobs) { if (txnPlusMeta.ledgerSequence > ledgerSequence) @@ -224,7 +130,8 @@ doAccountTx(boost::json::object const& request, BackendInterface const& backend) response["cursor"] = cursorJson; } auto end2 = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(info) << __func__ << " serialization took " << ((end2 - end).count() / 1000000000.0); + BOOST_LOG_TRIVIAL(info) << __func__ << " serialization took " + << ((end2 - end).count() / 1000000000.0); return response; } diff --git a/server/DOSGuard.h b/server/DOSGuard.h new file mode 100644 index 00000000..7d494955 --- /dev/null +++ b/server/DOSGuard.h @@ -0,0 +1,86 @@ +#include +#include +#include +#include + +class DOSGuard +{ + std::unordered_map ipFetchCount_; + uint32_t maxFetches_ = 100; + uint32_t sweepInterval_ = 1; + std::unordered_set whitelist_; + boost::asio::io_context& ctx_; + std::mutex mtx_; + +public: + DOSGuard(boost::json::object const& config, boost::asio::io_context& ctx) + : ctx_(ctx) + { + if (config.contains("dos_guard")) + { + auto dosGuardConfig = config.at("dos_guard").as_object(); + if (dosGuardConfig.contains("max_fetches") && + dosGuardConfig.contains("sweep_interval")) + { + maxFetches_ = dosGuardConfig.at("max_fetches").as_int64(); + sweepInterval_ = dosGuardConfig.at("sweep_interval").as_int64(); + } + if (dosGuardConfig.contains("whitelist")) + { + auto whitelist = dosGuardConfig.at("whitelist").as_array(); + for (auto& ip : whitelist) + whitelist_.insert(ip.as_string().c_str()); + } + } + createTimer(); + } + + void + createTimer() + { + auto wait = std::chrono::seconds(sweepInterval_); + std::shared_ptr timer = + std::make_shared( + ctx_, std::chrono::steady_clock::now() + wait); + timer->async_wait( + [timer, this](const boost::system::error_code& error) { + clear(); + createTimer(); + }); + } + + bool + isOk(std::string const& ip) + { + if (whitelist_.count(ip) > 0) + return true; + std::unique_lock lck(mtx_); + auto it = ipFetchCount_.find(ip); + if (it == ipFetchCount_.end()) + return true; + return it->second < maxFetches_; + } + + bool + add(std::string const& ip, uint32_t numObjects) + { + if (whitelist_.count(ip) > 0) + return true; + { + std::unique_lock lck(mtx_); + auto it = ipFetchCount_.find(ip); + if (it == ipFetchCount_.end()) + ipFetchCount_[ip] = numObjects; + else + it->second += numObjects; + } + return isOk(ip); + } + + void + clear() + { + std::unique_lock lck(mtx_); + ipFetchCount_.clear(); + } +}; diff --git a/websocket_server_async.cpp b/websocket_server_async.cpp deleted file mode 100644 index 1b4d65e3..00000000 --- a/websocket_server_async.cpp +++ /dev/null @@ -1,470 +0,0 @@ -// -// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -// -// Official repository: https://github.com/boostorg/beast -// - -//------------------------------------------------------------------------------ -// -// Example: WebSocket server, asynchronous -// -//------------------------------------------------------------------------------ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//------------------------------------------------------------------------------ -enum RPCCommand { - tx, - account_tx, - ledger, - account_info, - ledger_data, - book_offers, - ledger_range, - ledger_entry -}; -std::unordered_map commandMap{ - {"tx", tx}, - {"account_tx", account_tx}, - {"ledger", ledger}, - {"ledger_range", ledger_range}, - {"ledger_entry", ledger_entry}, - {"account_info", account_info}, - {"ledger_data", ledger_data}, - {"book_offers", book_offers}}; - -boost::json::object -doAccountInfo( - boost::json::object const& request, - BackendInterface const& backend); -boost::json::object -doTx(boost::json::object const& request, BackendInterface const& backend); -boost::json::object -doAccountTx( - boost::json::object const& request, - BackendInterface const& backend); -boost::json::object -doLedgerData( - boost::json::object const& request, - BackendInterface const& backend); -boost::json::object -doLedgerEntry( - boost::json::object const& request, - BackendInterface const& backend); -boost::json::object -doBookOffers( - boost::json::object const& request, - BackendInterface const& backend); -boost::json::object -doLedger(boost::json::object const& request, BackendInterface const& backend); -boost::json::object -doLedgerRange( - boost::json::object const& request, - BackendInterface const& backend); - -boost::json::object -buildResponse( - boost::json::object const& request, - BackendInterface const& backend) -{ - std::string command = request.at("command").as_string().c_str(); - BOOST_LOG_TRIVIAL(info) << "Received rpc command : " << request; - boost::json::object response; - switch (commandMap[command]) - { - case tx: - return doTx(request, backend); - break; - case account_tx: - return doAccountTx(request, backend); - break; - case ledger: - return doLedger(request, backend); - break; - case ledger_entry: - return doLedgerEntry(request, backend); - break; - case ledger_range: - return doLedgerRange(request, backend); - break; - case ledger_data: - return doLedgerData(request, backend); - break; - case account_info: - return doAccountInfo(request, backend); - break; - case book_offers: - return doBookOffers(request, backend); - break; - default: - BOOST_LOG_TRIVIAL(error) << "Unknown command: " << command; - } - return response; -} -// Report a failure -void -fail(boost::beast::error_code ec, char const* what) -{ - std::cerr << what << ": " << ec.message() << "\n"; -} - -// Echoes back all received WebSocket messages -class session : public std::enable_shared_from_this -{ - boost::beast::websocket::stream ws_; - boost::beast::flat_buffer buffer_; - std::string response_; - BackendInterface const& backend_; - -public: - // Take ownership of the socket - explicit session( - boost::asio::ip::tcp::socket&& socket, - BackendInterface const& backend) - : ws_(std::move(socket)), backend_(backend) - { - } - - // Get on the correct executor - void - run() - { - // We need to be executing within a strand to perform async operations - // on the I/O objects in this session. Although not strictly necessary - // for single-threaded contexts, this example code is written to be - // thread-safe by default. - boost::asio::dispatch( - ws_.get_executor(), - boost::beast::bind_front_handler( - &session::on_run, shared_from_this())); - } - - // Start the asynchronous operation - void - on_run() - { - // Set suggested timeout settings for the websocket - ws_.set_option(boost::beast::websocket::stream_base::timeout::suggested( - boost::beast::role_type::server)); - - // Set a decorator to change the Server of the handshake - ws_.set_option(boost::beast::websocket::stream_base::decorator( - [](boost::beast::websocket::response_type& res) { - res.set( - boost::beast::http::field::server, - std::string(BOOST_BEAST_VERSION_STRING) + - " websocket-server-async"); - })); - // Accept the websocket handshake - ws_.async_accept(boost::beast::bind_front_handler( - &session::on_accept, shared_from_this())); - } - - void - on_accept(boost::beast::error_code ec) - { - if (ec) - return fail(ec, "accept"); - - // Read a message - do_read(); - } - - void - do_read() - { - // Read a message into our buffer - ws_.async_read( - buffer_, - boost::beast::bind_front_handler( - &session::on_read, shared_from_this())); - } - - void - on_read(boost::beast::error_code ec, std::size_t bytes_transferred) - { - boost::ignore_unused(bytes_transferred); - - // This indicates that the session was closed - if (ec == boost::beast::websocket::error::closed) - return; - - if (ec) - fail(ec, "read"); - std::string msg{ - static_cast(buffer_.data().data()), buffer_.size()}; - // BOOST_LOG_TRIVIAL(debug) << __func__ << msg; - boost::json::object response; - try - { - boost::json::value raw = boost::json::parse(msg); - boost::json::object request = raw.as_object(); - BOOST_LOG_TRIVIAL(debug) << " received request : " << request; - try - { - auto start = std::chrono::system_clock::now(); - response = buildResponse(request, backend_); - auto end = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(info) << __func__ << " RPC call took " << ((end - start).count() / 1000000000.0) << " . request = " << request; - } - catch (Backend::DatabaseTimeout const& t) - { - BOOST_LOG_TRIVIAL(error) << __func__ << " Database timeout"; - response["error"] = - "Database read timeout. Please retry the request"; - } - } - catch (std::exception const& e) - { - BOOST_LOG_TRIVIAL(error) - << __func__ << "caught exception : " << e.what(); - } - BOOST_LOG_TRIVIAL(trace) << __func__ << response; - response_ = boost::json::serialize(response); - - // Echo the message - ws_.text(ws_.got_text()); - ws_.async_write( - boost::asio::buffer(response_), - boost::beast::bind_front_handler( - &session::on_write, shared_from_this())); - } - - void - on_write(boost::beast::error_code ec, std::size_t bytes_transferred) - { - boost::ignore_unused(bytes_transferred); - - if (ec) - return fail(ec, "write"); - - // Clear the buffer - buffer_.consume(buffer_.size()); - - // Do another read - do_read(); - } -}; - -//------------------------------------------------------------------------------ - -// Accepts incoming connections and launches the sessions -class listener : public std::enable_shared_from_this -{ - boost::asio::io_context& ioc_; - boost::asio::ip::tcp::acceptor acceptor_; - BackendInterface const& backend_; - -public: - listener( - boost::asio::io_context& ioc, - boost::asio::ip::tcp::endpoint endpoint, - BackendInterface const& backend) - : ioc_(ioc), acceptor_(ioc), backend_(backend) - { - boost::beast::error_code ec; - - // Open the acceptor - acceptor_.open(endpoint.protocol(), ec); - if (ec) - { - fail(ec, "open"); - return; - } - - // Allow address reuse - acceptor_.set_option(boost::asio::socket_base::reuse_address(true), ec); - if (ec) - { - fail(ec, "set_option"); - return; - } - - // Bind to the server address - acceptor_.bind(endpoint, ec); - if (ec) - { - fail(ec, "bind"); - return; - } - - // Start listening for connections - acceptor_.listen(boost::asio::socket_base::max_listen_connections, ec); - if (ec) - { - fail(ec, "listen"); - return; - } - } - - // Start accepting incoming connections - void - run() - { - do_accept(); - } - -private: - void - do_accept() - { - // The new connection gets its own strand - acceptor_.async_accept( - boost::asio::make_strand(ioc_), - boost::beast::bind_front_handler( - &listener::on_accept, shared_from_this())); - } - - void - on_accept(boost::beast::error_code ec, boost::asio::ip::tcp::socket socket) - { - if (ec) - { - fail(ec, "accept"); - } - else - { - // Create the session and run it - std::make_shared(std::move(socket), backend_)->run(); - } - - // Accept another connection - do_accept(); - } -}; - -std::optional -parse_config(const char* filename) -{ - try - { - std::ifstream in(filename, std::ios::in | std::ios::binary); - if (in) - { - std::stringstream contents; - contents << in.rdbuf(); - in.close(); - std::cout << contents.str() << std::endl; - boost::json::value value = boost::json::parse(contents.str()); - return value.as_object(); - } - } - catch (std::exception const& e) - { - std::cout << e.what() << std::endl; - } - return {}; -} -//------------------------------------------------------------------------------ -// -void -initLogLevel(int level) -{ - switch (level) - { - case 0: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::trace); - break; - case 1: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::debug); - break; - case 2: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::info); - break; - case 3: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::warning); - break; - case 4: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::error); - break; - case 5: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::fatal); - break; - default: - boost::log::core::get()->set_filter( - boost::log::trivial::severity >= boost::log::trivial::info); - } -} - -int -main(int argc, char* argv[]) -{ - // Check command line arguments. - if (argc != 5 and argc != 6) - { - std::cerr - << "Usage: websocket-server-async
" - " \n" - << "Example:\n" - << " websocket-server-async 0.0.0.0 8080 1 config.json 2\n"; - return EXIT_FAILURE; - } - auto const address = boost::asio::ip::make_address(argv[1]); - auto const port = static_cast(std::atoi(argv[2])); - auto const threads = std::max(1, std::atoi(argv[3])); - auto const config = parse_config(argv[4]); - if (argc > 5) - { - initLogLevel(std::atoi(argv[5])); - } - else - { - initLogLevel(2); - } - if (!config) - { - std::cerr << "couldnt parse config. Exiting..." << std::endl; - return EXIT_FAILURE; - } - - // The io_context is required for all I/O - boost::asio::io_context ioc{threads}; - ReportingETL etl{config.value(), ioc}; - - // Create and launch a listening port - std::make_shared( - ioc, - boost::asio::ip::tcp::endpoint{address, port}, - etl.getFlatMapBackend()) - ->run(); - - // Run the I/O service on the requested number of threads - std::vector v; - v.reserve(threads - 1); - for (auto i = threads - 1; i > 0; --i) - v.emplace_back([&ioc] { ioc.run(); }); - std::cout << "created ETL" << std::endl; - etl.run(); - std::cout << "running ETL" << std::endl; - ioc.run(); - - return EXIT_SUCCESS; -} From fa8eb67ca6b3f0303a9db74a04578079f2d4990d Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Mon, 17 May 2021 19:44:52 +0000 Subject: [PATCH 05/25] move websocket_server_async.cpp --- reporting/ReportingETL.cpp | 14 +- server/websocket_server_async.cpp | 513 ++++++++++++++++++++++++++++++ 2 files changed, 526 insertions(+), 1 deletion(-) create mode 100644 server/websocket_server_async.cpp diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index d238a369..f6a5d3f3 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -719,7 +719,19 @@ ReportingETL::ReportingETL( if (config.contains("read_only")) readOnly_ = config.at("read_only").as_bool(); if (config.contains("online_delete")) - onlineDeleteInterval_ = config.at("online_delete").as_int64(); + { + int64_t interval = config.at("online_delete").as_int64(); + uint32_t max = std::numeric_limits::max(); + if (interval > max) + { + std::stringstream msg; + msg << "online_delete cannot be greater than " + << std::to_string(max); + throw std::runtime_error(msg.str()); + } + if (interval > 0) + onlineDeleteInterval_ = static_cast(interval); + } if (config.contains("extractor_threads")) extractorThreads_ = config.at("extractor_threads").as_int64(); if (config.contains("txn_threshold")) diff --git a/server/websocket_server_async.cpp b/server/websocket_server_async.cpp new file mode 100644 index 00000000..1e848d55 --- /dev/null +++ b/server/websocket_server_async.cpp @@ -0,0 +1,513 @@ +// +// Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/boostorg/beast +// + +//------------------------------------------------------------------------------ +// +// Example: WebSocket server, asynchronous +// +//------------------------------------------------------------------------------ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//------------------------------------------------------------------------------ +enum RPCCommand { + tx, + account_tx, + ledger, + account_info, + ledger_data, + book_offers, + ledger_range, + ledger_entry +}; +std::unordered_map commandMap{ + {"tx", tx}, + {"account_tx", account_tx}, + {"ledger", ledger}, + {"ledger_range", ledger_range}, + {"ledger_entry", ledger_entry}, + {"account_info", account_info}, + {"ledger_data", ledger_data}, + {"book_offers", book_offers}}; + +boost::json::object +doAccountInfo( + boost::json::object const& request, + BackendInterface const& backend); +boost::json::object +doTx(boost::json::object const& request, BackendInterface const& backend); +boost::json::object +doAccountTx( + boost::json::object const& request, + BackendInterface const& backend); +boost::json::object +doLedgerData( + boost::json::object const& request, + BackendInterface const& backend); +boost::json::object +doLedgerEntry( + boost::json::object const& request, + BackendInterface const& backend); +boost::json::object +doBookOffers( + boost::json::object const& request, + BackendInterface const& backend); +boost::json::object +doLedger(boost::json::object const& request, BackendInterface const& backend); +boost::json::object +doLedgerRange( + boost::json::object const& request, + BackendInterface const& backend); + +std::pair +buildResponse( + boost::json::object const& request, + BackendInterface const& backend) +{ + std::string command = request.at("command").as_string().c_str(); + BOOST_LOG_TRIVIAL(info) << "Received rpc command : " << request; + boost::json::object response; + switch (commandMap[command]) + { + case tx: + return {doTx(request, backend), 1}; + break; + case account_tx: { + auto res = doAccountTx(request, backend); + if (res.contains("transactions")) + return {res, res["transactions"].as_array().size()}; + return {res, 1}; + } + break; + case ledger: { + auto res = doLedger(request, backend); + if (res.contains("transactions")) + return {res, res["transactions"].as_array().size()}; + return {res, 1}; + } + break; + case ledger_entry: + return {doLedgerEntry(request, backend), 1}; + break; + case ledger_range: + return {doLedgerRange(request, backend), 1}; + break; + case ledger_data: { + auto res = doLedgerData(request, backend); + if (res.contains("objects")) + return {res, res["objects"].as_array().size()}; + return {res, 1}; + } + break; + case account_info: + return {doAccountInfo(request, backend), 1}; + break; + case book_offers: { + auto res = doBookOffers(request, backend); + if (res.contains("offers")) + return {res, res["offers"].as_array().size()}; + return {res, 1}; + } + break; + default: + BOOST_LOG_TRIVIAL(error) << "Unknown command: " << command; + } + return {response, 1}; +} +// Report a failure +void +fail(boost::beast::error_code ec, char const* what) +{ + std::cerr << what << ": " << ec.message() << "\n"; +} + +// Echoes back all received WebSocket messages +class session : public std::enable_shared_from_this +{ + boost::beast::websocket::stream ws_; + boost::beast::flat_buffer buffer_; + std::string response_; + BackendInterface const& backend_; + DOSGuard& dosGuard_; + +public: + // Take ownership of the socket + explicit session( + boost::asio::ip::tcp::socket&& socket, + BackendInterface const& backend, + DOSGuard& dosGuard) + : ws_(std::move(socket)), backend_(backend), dosGuard_(dosGuard) + { + } + + // Get on the correct executor + void + run() + { + // We need to be executing within a strand to perform async operations + // on the I/O objects in this session. Although not strictly necessary + // for single-threaded contexts, this example code is written to be + // thread-safe by default. + boost::asio::dispatch( + ws_.get_executor(), + boost::beast::bind_front_handler( + &session::on_run, shared_from_this())); + } + + // Start the asynchronous operation + void + on_run() + { + // Set suggested timeout settings for the websocket + ws_.set_option(boost::beast::websocket::stream_base::timeout::suggested( + boost::beast::role_type::server)); + + // Set a decorator to change the Server of the handshake + ws_.set_option(boost::beast::websocket::stream_base::decorator( + [](boost::beast::websocket::response_type& res) { + res.set( + boost::beast::http::field::server, + std::string(BOOST_BEAST_VERSION_STRING) + + " websocket-server-async"); + })); + // Accept the websocket handshake + ws_.async_accept(boost::beast::bind_front_handler( + &session::on_accept, shared_from_this())); + } + + void + on_accept(boost::beast::error_code ec) + { + if (ec) + return fail(ec, "accept"); + + // Read a message + do_read(); + } + + void + do_read() + { + // Read a message into our buffer + ws_.async_read( + buffer_, + boost::beast::bind_front_handler( + &session::on_read, shared_from_this())); + } + + void + on_read(boost::beast::error_code ec, std::size_t bytes_transferred) + { + boost::ignore_unused(bytes_transferred); + + // This indicates that the session was closed + if (ec == boost::beast::websocket::error::closed) + return; + + if (ec) + fail(ec, "read"); + std::string msg{ + static_cast(buffer_.data().data()), buffer_.size()}; + // BOOST_LOG_TRIVIAL(debug) << __func__ << msg; + boost::json::object response; + auto ip = + ws_.next_layer().socket().remote_endpoint().address().to_string(); + BOOST_LOG_TRIVIAL(debug) + << __func__ << " received request from ip = " << ip; + if (!dosGuard_.isOk(ip)) + response["error"] = "Too many requests. Slow down"; + else + { + try + { + boost::json::value raw = boost::json::parse(msg); + boost::json::object request = raw.as_object(); + BOOST_LOG_TRIVIAL(debug) << " received request : " << request; + try + { + auto start = std::chrono::system_clock::now(); + auto [res, cost] = buildResponse(request, backend_); + response = std::move(res); + if (!dosGuard_.add(ip, cost)) + { + response["warning"] = "Too many requests"; + } + + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(info) + << __func__ << " RPC call took " + << ((end - start).count() / 1000000000.0) + << " . request = " << request; + } + catch (Backend::DatabaseTimeout const& t) + { + BOOST_LOG_TRIVIAL(error) << __func__ << " Database timeout"; + response["error"] = + "Database read timeout. Please retry the request"; + } + } + catch (std::exception const& e) + { + BOOST_LOG_TRIVIAL(error) + << __func__ << "caught exception : " << e.what(); + response["error"] = "Unknown exception"; + } + } + BOOST_LOG_TRIVIAL(trace) << __func__ << response; + response_ = boost::json::serialize(response); + + // Echo the message + ws_.text(ws_.got_text()); + ws_.async_write( + boost::asio::buffer(response_), + boost::beast::bind_front_handler( + &session::on_write, shared_from_this())); + } + + void + on_write(boost::beast::error_code ec, std::size_t bytes_transferred) + { + boost::ignore_unused(bytes_transferred); + + if (ec) + return fail(ec, "write"); + + // Clear the buffer + buffer_.consume(buffer_.size()); + + // Do another read + do_read(); + } +}; + +//------------------------------------------------------------------------------ + +// Accepts incoming connections and launches the sessions +class listener : public std::enable_shared_from_this +{ + boost::asio::io_context& ioc_; + boost::asio::ip::tcp::acceptor acceptor_; + BackendInterface const& backend_; + DOSGuard& dosGuard_; + +public: + listener( + boost::asio::io_context& ioc, + boost::asio::ip::tcp::endpoint endpoint, + BackendInterface const& backend, + DOSGuard& dosGuard) + : ioc_(ioc), acceptor_(ioc), backend_(backend), dosGuard_(dosGuard) + { + boost::beast::error_code ec; + + // Open the acceptor + acceptor_.open(endpoint.protocol(), ec); + if (ec) + { + fail(ec, "open"); + return; + } + + // Allow address reuse + acceptor_.set_option(boost::asio::socket_base::reuse_address(true), ec); + if (ec) + { + fail(ec, "set_option"); + return; + } + + // Bind to the server address + acceptor_.bind(endpoint, ec); + if (ec) + { + fail(ec, "bind"); + return; + } + + // Start listening for connections + acceptor_.listen(boost::asio::socket_base::max_listen_connections, ec); + if (ec) + { + fail(ec, "listen"); + return; + } + } + + // Start accepting incoming connections + void + run() + { + do_accept(); + } + +private: + void + do_accept() + { + // The new connection gets its own strand + acceptor_.async_accept( + boost::asio::make_strand(ioc_), + boost::beast::bind_front_handler( + &listener::on_accept, shared_from_this())); + } + + void + on_accept(boost::beast::error_code ec, boost::asio::ip::tcp::socket socket) + { + if (ec) + { + fail(ec, "accept"); + } + else + { + // Create the session and run it + std::make_shared(std::move(socket), backend_, dosGuard_) + ->run(); + } + + // Accept another connection + do_accept(); + } +}; + +std::optional +parse_config(const char* filename) +{ + try + { + std::ifstream in(filename, std::ios::in | std::ios::binary); + if (in) + { + std::stringstream contents; + contents << in.rdbuf(); + in.close(); + std::cout << contents.str() << std::endl; + boost::json::value value = boost::json::parse(contents.str()); + return value.as_object(); + } + } + catch (std::exception const& e) + { + std::cout << e.what() << std::endl; + } + return {}; +} +//------------------------------------------------------------------------------ +// +void +initLogLevel(int level) +{ + switch (level) + { + case 0: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::trace); + break; + case 1: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::debug); + break; + case 2: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::info); + break; + case 3: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::warning); + break; + case 4: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::error); + break; + case 5: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::fatal); + break; + default: + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::info); + } +} + +int +main(int argc, char* argv[]) +{ + // Check command line arguments. + if (argc != 5 and argc != 6) + { + std::cerr + << "Usage: websocket-server-async
" + " \n" + << "Example:\n" + << " websocket-server-async 0.0.0.0 8080 1 config.json 2\n"; + return EXIT_FAILURE; + } + auto const address = boost::asio::ip::make_address(argv[1]); + auto const port = static_cast(std::atoi(argv[2])); + auto const threads = std::max(1, std::atoi(argv[3])); + auto const config = parse_config(argv[4]); + if (argc > 5) + { + initLogLevel(std::atoi(argv[5])); + } + else + { + initLogLevel(2); + } + if (!config) + { + std::cerr << "couldnt parse config. Exiting..." << std::endl; + return EXIT_FAILURE; + } + + // The io_context is required for all I/O + boost::asio::io_context ioc{threads}; + ReportingETL etl{config.value(), ioc}; + DOSGuard dosGuard{config.value(), ioc}; + + // Create and launch a listening port + std::make_shared( + ioc, + boost::asio::ip::tcp::endpoint{address, port}, + etl.getFlatMapBackend(), + dosGuard) + ->run(); + + // Run the I/O service on the requested number of threads + std::vector v; + v.reserve(threads - 1); + for (auto i = threads - 1; i > 0; --i) + v.emplace_back([&ioc] { ioc.run(); }); + std::cout << "created ETL" << std::endl; + etl.run(); + std::cout << "running ETL" << std::endl; + ioc.run(); + + return EXIT_SUCCESS; +} From 0e52ebef48fdf1d03c0fb42e83d762d5a2881c5f Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Wed, 19 May 2021 17:49:22 +0000 Subject: [PATCH 06/25] create cassandra keyspace if it doesn't exist --- reporting/CassandraBackend.cpp | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index 865e6799..f54ba75a 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1479,8 +1479,36 @@ CassandraBackend::open(bool readOnly) { std::stringstream ss; ss << "nodestore: Error connecting Cassandra session keyspace: " - << rc << ", " << cass_error_desc(rc); + << rc << ", " << cass_error_desc(rc) + << ", trying to create it ourselves"; BOOST_LOG_TRIVIAL(error) << ss.str(); + // if the keyspace doesn't exist, try to create it + session_.reset(cass_session_new()); + fut = cass_session_connect(session_.get(), cluster); + rc = cass_future_error_code(fut); + cass_future_free(fut); + if (rc != CASS_OK) + { + std::stringstream ss; + ss << "nodestore: Error connecting Cassandra session at all: " + << rc << ", " << cass_error_desc(rc); + BOOST_LOG_TRIVIAL(error) << ss.str(); + } + else + { + std::stringstream query; + query + << "CREATE KEYSPACE IF NOT EXISTS " << keyspace + << " WITH replication = {'class': 'SimpleStrategy', " + "'replication_factor': '3'} AND durable_writes = true"; + if (!executeSimpleStatement(query.str())) + continue; + query = {}; + query << "USE " << keyspace; + if (!executeSimpleStatement(query.str())) + continue; + } + continue; } From f513438a955fac3c666ec5123cad8f2abc7b09db Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Thu, 20 May 2021 18:36:20 +0000 Subject: [PATCH 07/25] create postgres database if it doesn't exist --- reporting/CassandraBackend.cpp | 5 ---- reporting/Pg.cpp | 45 +++++++++++++++++++++++++++++----- reporting/PostgresBackend.cpp | 16 +++++++----- 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index f54ba75a..f2c85317 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1527,11 +1527,6 @@ CassandraBackend::open(bool readOnly) if (!executeSimpleStatement(query.str())) continue; - query.str(""); - query << "CREATE INDEX ON " << tablePrefix << "objects(sequence)"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); query << "SELECT * FROM " << tablePrefix << "objects WHERE sequence=1" << " LIMIT 1"; diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 7f082b5d..4ae7788d 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,6 @@ #include #include #include -#include static void noticeReceiver(void* arg, PGresult const* res) @@ -350,11 +350,27 @@ PgPool::PgPool(boost::json::object const& config) */ constexpr std::size_t maxFieldSize = 1024; constexpr std::size_t maxFields = 1000; + std::string conninfo = "postgres://"; + auto getFieldAsString = [&config](auto field) { + if (!config.contains(field)) + throw std::runtime_error( + field + std::string{" missing from postgres config"}); + if (!config.at(field).is_string()) + throw std::runtime_error( + field + std::string{" in postgres config is not a string"}); + return std::string{config.at(field).as_string().c_str()}; + }; + conninfo += getFieldAsString("username"); + conninfo += ":"; + conninfo += getFieldAsString("password"); + conninfo += "@"; + conninfo += getFieldAsString("contact_point"); + conninfo += "/"; + conninfo += getFieldAsString("database"); // The connection object must be freed using the libpq API PQfinish() call. pg_connection_type conn( - PQconnectdb(config.at("conninfo").as_string().c_str()), - [](PGconn* conn) { PQfinish(conn); }); + PQconnectdb(conninfo.c_str()), [](PGconn* conn) { PQfinish(conn); }); if (!conn) throw std::runtime_error("Can't create DB connection."); if (PQstatus(conn.get()) != CONNECTION_OK) @@ -605,9 +621,26 @@ PgPool::checkin(std::unique_ptr& pg) std::shared_ptr make_PgPool(boost::json::object const& config) { - auto ret = std::make_shared(config); - ret->setup(); - return ret; + try + { + auto ret = std::make_shared(config); + ret->setup(); + return ret; + } + catch (std::runtime_error& e) + { + boost::json::object configCopy = config; + configCopy["database"] = "postgres"; + auto ret = std::make_shared(configCopy); + ret->setup(); + PgQuery pgQuery{ret}; + std::string query = "CREATE DATABASE " + + std::string{config.at("database").as_string().c_str()}; + pgQuery(query.c_str()); + ret = std::make_shared(config); + ret->setup(); + return ret; + } } //----------------------------------------------------------------------------- diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 87385324..249d305a 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -816,14 +816,16 @@ PostgresBackend::writeKeys( keysBuffer << std::to_string(index.keyIndex) << '\t' << "\\\\x" << ripple::strHex(key) << '\n'; numRows++; - // If the buffer gets too large, the insert fails. Not sure why. So we - // insert after 1 million records - if (numRows == 100000) + // If the buffer gets too large, the insert fails. Not sure why. + // When writing in the background, we insert after every 10000 rows + if ((isAsync && numRows == 10000) || numRows == 100000) { pgQuery.bulkInsert("keys", keysBuffer.str()); std::stringstream temp; keysBuffer.swap(temp); numRows = 0; + if (isAsync) + std::this_thread::sleep_for(std::chrono::seconds(1)); } } if (numRows > 0) @@ -855,14 +857,16 @@ PostgresBackend::writeBooks( << ripple::strHex(book.first) << '\t' << "\\\\x" << ripple::strHex(offer) << '\n'; numRows++; - // If the buffer gets too large, the insert fails. Not sure why. So - // we insert after 1 million records - if (numRows == 1000000) + // If the buffer gets too large, the insert fails. Not sure why. + // When writing in the background, we insert after every 10 rows + if ((isAsync && numRows == 1000) || numRows == 100000) { pgQuery.bulkInsert("books", booksBuffer.str()); std::stringstream temp; booksBuffer.swap(temp); numRows = 0; + if (isAsync) + std::this_thread::sleep_for(std::chrono::seconds(1)); } } } From cadf2fa972f757b1bcbee1d5c248b407572dfcb9 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Mon, 24 May 2021 18:23:36 +0000 Subject: [PATCH 08/25] checkpoint --- reporting/BackendIndexer.cpp | 17 +++++---- reporting/BackendInterface.h | 17 ++++++--- reporting/CassandraBackend.cpp | 9 ++--- reporting/CassandraBackend.h | 8 ----- reporting/Pg.cpp | 2 +- reporting/PostgresBackend.cpp | 66 +++++++++++++++++++++------------- reporting/PostgresBackend.h | 3 +- reporting/ReportingETL.cpp | 24 +++++++++---- 8 files changed, 89 insertions(+), 57 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index d7ca33fd..0320d973 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -438,22 +438,27 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) bool isFirst = false; auto keyIndex = getKeyIndexOfSeq(ledgerSequence); auto bookIndex = getBookIndexOfSeq(ledgerSequence); - auto rng = backend.fetchLedgerRangeNoThrow(); - if (!rng || rng->minSequence == ledgerSequence) + if (isFirst_) { - isFirst = true; - keyIndex = KeyIndex{ledgerSequence}; - bookIndex = BookIndex{ledgerSequence}; + auto rng = backend.fetchLedgerRangeNoThrow(); + if (rng && rng->minSequence != ledgerSequence) + isFirst_ = false; + else + { + keyIndex = KeyIndex{ledgerSequence}; + bookIndex = BookIndex{ledgerSequence}; + } } backend.writeKeys(keys, keyIndex); backend.writeBooks(books, bookIndex); - if (isFirst) + if (isFirst_) { // write completion record ripple::uint256 zero = {}; backend.writeBooks({{zero, {zero}}}, bookIndex); backend.writeKeys({zero}, keyIndex); } + isFirst_ = false; keys = {}; books = {}; BOOST_LOG_TRIVIAL(info) diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 3fa3a6a5..b6451445 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -100,6 +100,8 @@ class BackendIndexer std::mutex mtx; std::condition_variable cv_; + mutable bool isFirst_ = true; + void addKeyAsync(ripple::uint256 const& key); void @@ -200,6 +202,7 @@ class BackendInterface { protected: mutable BackendIndexer indexer_; + mutable bool isFirst_ = true; public: // read methods @@ -245,12 +248,17 @@ public: auto commitRes = doFinishWrites(); if (commitRes) { - bool isFirst = - fetchLedgerRangeNoThrow()->minSequence == ledgerSequence; - if (indexer_.isBookFlagLedger(ledgerSequence) || isFirst) + if (isFirst_) + { + auto rng = fetchLedgerRangeNoThrow(); + if (rng && rng->minSequence != ledgerSequence) + isFirst_ = false; + } + if (indexer_.isBookFlagLedger(ledgerSequence) || isFirst_) indexer_.writeBookFlagLedgerAsync(ledgerSequence, *this); - if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst) + if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst_) indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); + isFirst_ = false; } return commitRes; } @@ -267,6 +275,7 @@ public: std::optional fetchLedgerRangeNoThrow() const { + BOOST_LOG_TRIVIAL(warning) << __func__; while (true) { try diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index f2c85317..b447efc9 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1527,12 +1527,6 @@ CassandraBackend::open(bool readOnly) if (!executeSimpleStatement(query.str())) continue; - query.str(""); - query << "SELECT * FROM " << tablePrefix << "objects WHERE sequence=1" - << " LIMIT 1"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "transactions" << " ( hash blob PRIMARY KEY, ledger_sequence bigint, " @@ -1822,12 +1816,13 @@ CassandraBackend::open(bool readOnly) << " is_latest IN (true, false)"; if (!selectLedgerRange_.prepareStatement(query, session_.get())) continue; + /* query.str(""); query << " SELECT key,object FROM " << tablePrefix << "objects WHERE sequence = ?"; if (!selectLedgerDiff_.prepareStatement(query, session_.get())) continue; - + */ setupPreparedStatements = true; } diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index f9d66acd..66dbe6c7 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -841,14 +841,6 @@ public: { // wait for all other writes to finish sync(); - auto rng = fetchLedgerRangeNoThrow(); - if (rng && rng->maxSequence >= ledgerSequence_) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " Ledger " << std::to_string(ledgerSequence_) - << " already written. Returning"; - return false; - } // write range if (isFirstLedger_) { diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 4ae7788d..4966b6b8 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -790,7 +790,7 @@ CREATE TABLE IF NOT EXISTS objects ( object bytea ) PARTITION BY RANGE (ledger_seq); -CREATE INDEX objects_idx ON objects USING btree(key, ledger_seq); +CREATE INDEX objects_idx ON objects USING btree(ledger_seq,key); create table if not exists objects1 partition of objects for values from (0) to (10000000); create table if not exists objects2 partition of objects for values from (10000000) to (20000000); diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 249d305a..dcbb0e45 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -775,12 +775,22 @@ PostgresBackend::doFinishWrites() const { if (!abortWrite_) { - writeConnection_.bulkInsert("transactions", transactionsBuffer_.str()); + std::string txStr = transactionsBuffer_.str(); + writeConnection_.bulkInsert("transactions", txStr); writeConnection_.bulkInsert( "account_transactions", accountTxBuffer_.str()); std::string objectsStr = objectsBuffer_.str(); if (objectsStr.size()) writeConnection_.bulkInsert("objects", objectsStr); + BOOST_LOG_TRIVIAL(debug) + << __func__ << " objects size = " << objectsStr.size() + << " txns size = " << txStr.size(); + std::string keysStr = keysBuffer_.str(); + if (keysStr.size()) + writeConnection_.bulkInsert("keys", keysStr); + std::string booksStr = booksBuffer_.str(); + if (booksStr.size()) + writeConnection_.bulkInsert("books", booksStr); } auto res = writeConnection_("COMMIT"); if (!res || res.status() != PGRES_COMMAND_OK) @@ -795,6 +805,8 @@ PostgresBackend::doFinishWrites() const objectsBuffer_.clear(); booksBuffer_.str(""); booksBuffer_.clear(); + keysBuffer_.str(""); + keysBuffer_.clear(); accountTxBuffer_.str(""); accountTxBuffer_.clear(); numRowsInObjectsBuffer_ = 0; @@ -806,33 +818,36 @@ PostgresBackend::writeKeys( KeyIndex const& index, bool isAsync) const { + return true; + if (isAsync) + return true; + if (abortWrite_) + return false; BOOST_LOG_TRIVIAL(debug) << __func__; PgQuery pgQuery(pgPool_); - pgQuery("BEGIN"); - std::stringstream keysBuffer; + PgQuery& conn = isAsync ? pgQuery : writeConnection_; + if (isAsync) + conn("BEGIN"); size_t numRows = 0; for (auto& key : keys) { - keysBuffer << std::to_string(index.keyIndex) << '\t' << "\\\\x" - << ripple::strHex(key) << '\n'; + keysBuffer_ << std::to_string(index.keyIndex) << '\t' << "\\\\x" + << ripple::strHex(key) << '\n'; numRows++; // If the buffer gets too large, the insert fails. Not sure why. // When writing in the background, we insert after every 10000 rows if ((isAsync && numRows == 10000) || numRows == 100000) { - pgQuery.bulkInsert("keys", keysBuffer.str()); + conn.bulkInsert("keys", keysBuffer_.str()); std::stringstream temp; - keysBuffer.swap(temp); + keysBuffer_.swap(temp); numRows = 0; if (isAsync) std::this_thread::sleep_for(std::chrono::seconds(1)); } } - if (numRows > 0) - { - pgQuery.bulkInsert("keys", keysBuffer.str()); - } - pgQuery("COMMIT"); + if (isAsync) + conn("COMMIT"); return true; } bool @@ -843,38 +858,41 @@ PostgresBackend::writeBooks( BookIndex const& index, bool isAsync) const { + return true; + if (isAsync) + return true; + if (abortWrite_) + return false; BOOST_LOG_TRIVIAL(debug) << __func__; PgQuery pgQuery(pgPool_); - pgQuery("BEGIN"); - std::stringstream booksBuffer; + PgQuery& conn = isAsync ? pgQuery : writeConnection_; + if (isAsync) + conn("BEGIN"); size_t numRows = 0; for (auto& book : books) { for (auto& offer : book.second) { - booksBuffer << std::to_string(index.bookIndex) << '\t' << "\\\\x" - << ripple::strHex(book.first) << '\t' << "\\\\x" - << ripple::strHex(offer) << '\n'; + booksBuffer_ << std::to_string(index.bookIndex) << '\t' << "\\\\x" + << ripple::strHex(book.first) << '\t' << "\\\\x" + << ripple::strHex(offer) << '\n'; numRows++; // If the buffer gets too large, the insert fails. Not sure why. // When writing in the background, we insert after every 10 rows if ((isAsync && numRows == 1000) || numRows == 100000) { - pgQuery.bulkInsert("books", booksBuffer.str()); + conn.bulkInsert("books", booksBuffer_.str()); std::stringstream temp; - booksBuffer.swap(temp); + booksBuffer_.swap(temp); numRows = 0; if (isAsync) std::this_thread::sleep_for(std::chrono::seconds(1)); } } } - if (numRows > 0) - { - pgQuery.bulkInsert("books", booksBuffer.str()); - } - pgQuery("COMMIT"); + if (isAsync) + conn("COMMIT"); return true; } bool diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index 806a95da..c4890ecf 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -9,8 +9,9 @@ class PostgresBackend : public BackendInterface private: mutable size_t numRowsInObjectsBuffer_ = 0; mutable std::stringstream objectsBuffer_; - mutable std::stringstream transactionsBuffer_; mutable std::stringstream booksBuffer_; + mutable std::stringstream keysBuffer_; + mutable std::stringstream transactionsBuffer_; mutable std::stringstream accountTxBuffer_; std::shared_ptr pgPool_; mutable PgQuery writeConnection_; diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index f6a5d3f3..cdcb6ee4 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -69,7 +69,7 @@ ReportingETL::insertTransactions( auto metaSerializer = std::make_shared( txMeta.getAsObject().getSerializer()); - BOOST_LOG_TRIVIAL(trace) + BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "Inserting transaction = " << sttx.getTransactionID(); @@ -241,7 +241,7 @@ ReportingETL::fetchLedgerDataAndDiff(uint32_t idx) std::pair ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) { - BOOST_LOG_TRIVIAL(trace) << __func__ << " : " + BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "Beginning ledger update"; ripple::LedgerInfo lgrInfo = @@ -252,8 +252,12 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) << "Deserialized ledger header. " << detail::toString(lgrInfo); flatMapBackend_->startWrites(); + BOOST_LOG_TRIVIAL(debug) << __func__ << " : " + << "started writes"; flatMapBackend_->writeLedger( lgrInfo, std::move(*rawData.mutable_ledger_header())); + BOOST_LOG_TRIVIAL(debug) << __func__ << " : " + << "wrote ledger header"; std::vector accountTxData{ insertTransactions(lgrInfo, rawData)}; @@ -293,7 +297,13 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) isDeleted, std::move(bookDir)); } + BOOST_LOG_TRIVIAL(debug) + << __func__ << " : " + << "wrote objects. num objects = " + << std::to_string(rawData.ledger_objects().objects_size()); flatMapBackend_->writeAccountTransactions(std::move(accountTxData)); + BOOST_LOG_TRIVIAL(debug) << __func__ << " : " + << "wrote account_tx"; accumTxns_ += rawData.transactions_list().transactions_size(); bool success = true; if (accumTxns_ >= txnThreshold_) @@ -361,6 +371,7 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) assert(false); throw std::runtime_error("runETLPipeline: parent ledger is null"); } + std::atomic minSequence = rng->minSequence; BOOST_LOG_TRIVIAL(info) << __func__ << " : " << "Populating caches"; @@ -451,6 +462,7 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) } std::thread transformer{[this, + &minSequence, &writeConflict, &startSequence, &getNext, @@ -499,16 +511,16 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) lastPublishedSequence = lgrInfo.seq; } writeConflict = !success; - auto range = flatMapBackend_->fetchLedgerRangeNoThrow(); if (onlineDeleteInterval_ && !deleting_ && - range->maxSequence - range->minSequence > - *onlineDeleteInterval_) + lgrInfo.seq - minSequence > *onlineDeleteInterval_) { deleting_ = true; - ioContext_.post([this, &range]() { + ioContext_.post([this, &minSequence]() { BOOST_LOG_TRIVIAL(info) << "Running online delete"; flatMapBackend_->doOnlineDelete(*onlineDeleteInterval_); BOOST_LOG_TRIVIAL(info) << "Finished online delete"; + auto rng = flatMapBackend_->fetchLedgerRangeNoThrow(); + minSequence = rng->minSequence; deleting_ = false; }); } From db0595c08326a9f35e0329c90aa5bb69122aca1e Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 25 May 2021 20:19:09 +0000 Subject: [PATCH 09/25] update cache asynchronously --- reporting/BackendIndexer.cpp | 43 ++++++++++++++++++++++++++++++++-- reporting/BackendInterface.h | 26 ++++++++++---------- reporting/CassandraBackend.cpp | 6 +++-- reporting/PostgresBackend.cpp | 28 ++++++++++------------ reporting/ReportingETL.cpp | 14 +++++------ 5 files changed, 79 insertions(+), 38 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 0320d973..1cf19ce4 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -9,6 +9,7 @@ BackendIndexer::BackendIndexer(boost::json::object const& config) bookShift_ = config.at("indexer_book_shift").as_int64(); work_.emplace(ioc_); ioThread_ = std::thread{[this]() { ioc_.run(); }}; + updateThread_ = std::thread{[this]() { ioc_.run(); }}; }; BackendIndexer::~BackendIndexer() { @@ -16,6 +17,39 @@ BackendIndexer::~BackendIndexer() work_.reset(); ioThread_.join(); } +void +BackendIndexer::writeLedgerObject( + ripple::uint256&& key, + std::optional&& book, + bool isCreated, + bool isDeleted) +{ + ++updatesOutstanding_; + boost::asio::post( + ioc_, + [this, + key = std::move(key), + isCreated, + isDeleted, + book = std::move(book)]() { + if (isCreated) + addKey(key); + if (isDeleted) + deleteKey(key); + if (book) + { + if (isCreated) + addBookOffer(*book, key); + if (isDeleted) + deleteBookOffer(*book, key); + } + --updatesOutstanding_; + { + std::unique_lock lck(mtx); + updateCv_.notify_one(); + } + }); +} void BackendIndexer::addKey(ripple::uint256 const& key) @@ -360,7 +394,7 @@ BackendIndexer::populateCaches(BackendInterface const& backend) std::unique_lock lck(mtx); deletedKeys = {}; deletedBooks = {}; - cv_.notify_one(); + cacheCv_.notify_one(); } BOOST_LOG_TRIVIAL(info) << __func__ @@ -387,7 +421,7 @@ void BackendIndexer::waitForCaches() { std::unique_lock lck(mtx); - cv_.wait(lck, [this]() { + cacheCv_.wait(lck, [this]() { return !populatingCacheAsync && deletedKeys.size() == 0; }); } @@ -449,6 +483,11 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) bookIndex = BookIndex{ledgerSequence}; } } + { + std::unique_lock lck(mtx); + updateCv_.wait(lck, [this]() { return updatesOutstanding_ == 0; }); + } + backend.writeKeys(keys, keyIndex); backend.writeBooks(books, bookIndex); if (isFirst_) diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index b6451445..524b7d44 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -81,6 +81,10 @@ class BackendIndexer std::mutex mutex_; std::optional work_; std::thread ioThread_; + std::thread updateThread_; + std::atomic_uint32_t updatesOutstanding_ = 0; + std::condition_variable updateCv_; + uint32_t keyShift_ = 20; uint32_t bookShift_ = 10; std::unordered_set keys; @@ -98,7 +102,7 @@ class BackendIndexer std::unordered_map> booksRepair; std::mutex mtx; - std::condition_variable cv_; + std::condition_variable cacheCv_; mutable bool isFirst_ = true; @@ -123,6 +127,13 @@ public: void waitForCaches(); + void + writeLedgerObject( + ripple::uint256&& key, + std::optional&& book, + bool isCreated, + bool isDeleted); + void addKey(ripple::uint256 const& key); void @@ -350,17 +361,8 @@ public: std::optional&& book) const { ripple::uint256 key256 = ripple::uint256::fromVoid(key.data()); - if (isCreated) - indexer_.addKey(key256); - if (isDeleted) - indexer_.deleteKey(key256); - if (book) - { - if (isCreated) - indexer_.addBookOffer(*book, key256); - if (isDeleted) - indexer_.deleteBookOffer(*book, key256); - } + indexer_.writeLedgerObject( + std::move(key256), std::move(book), isCreated, isDeleted); doWriteLedgerObject( std::move(key), seq, diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index b447efc9..e83b3351 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1359,12 +1359,14 @@ CassandraBackend::open(bool readOnly) ? config_["threads"].as_int64() : std::thread::hardware_concurrency(); int ttl = config_.contains("ttl") ? config_["ttl"].as_int64() * 2 : 0; - int keysTtl, keysIncr = pow(2, indexer_.getKeyShift()) * 4 * 2; + int keysTtl, + keysIncr = ttl != 0 ? pow(2, indexer_.getKeyShift()) * 4 * 2 : 0; while (keysTtl < ttl) { keysTtl += keysIncr; } - int booksTtl, booksIncr = pow(2, indexer_.getBookShift()) * 4 * 2; + int booksTtl, + booksIncr = ttl != 0 ? pow(2, indexer_.getBookShift()) * 4 * 2 : 0; while (booksTtl < ttl) { booksTtl += booksIncr; diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index dcbb0e45..799ae715 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -818,29 +818,28 @@ PostgresBackend::writeKeys( KeyIndex const& index, bool isAsync) const { - return true; - if (isAsync) - return true; if (abortWrite_) return false; BOOST_LOG_TRIVIAL(debug) << __func__; PgQuery pgQuery(pgPool_); PgQuery& conn = isAsync ? pgQuery : writeConnection_; + std::stringstream asyncBuffer; + std::stringstream& buffer = isAsync ? asyncBuffer : keysBuffer_; if (isAsync) conn("BEGIN"); size_t numRows = 0; for (auto& key : keys) { - keysBuffer_ << std::to_string(index.keyIndex) << '\t' << "\\\\x" - << ripple::strHex(key) << '\n'; + buffer << std::to_string(index.keyIndex) << '\t' << "\\\\x" + << ripple::strHex(key) << '\n'; numRows++; // If the buffer gets too large, the insert fails. Not sure why. // When writing in the background, we insert after every 10000 rows if ((isAsync && numRows == 10000) || numRows == 100000) { - conn.bulkInsert("keys", keysBuffer_.str()); + conn.bulkInsert("keys", buffer.str()); std::stringstream temp; - keysBuffer_.swap(temp); + buffer.swap(temp); numRows = 0; if (isAsync) std::this_thread::sleep_for(std::chrono::seconds(1)); @@ -858,15 +857,14 @@ PostgresBackend::writeBooks( BookIndex const& index, bool isAsync) const { - return true; - if (isAsync) - return true; if (abortWrite_) return false; BOOST_LOG_TRIVIAL(debug) << __func__; PgQuery pgQuery(pgPool_); PgQuery& conn = isAsync ? pgQuery : writeConnection_; + std::stringstream asyncBuffer; + std::stringstream& buffer = isAsync ? asyncBuffer : booksBuffer_; if (isAsync) conn("BEGIN"); size_t numRows = 0; @@ -874,17 +872,17 @@ PostgresBackend::writeBooks( { for (auto& offer : book.second) { - booksBuffer_ << std::to_string(index.bookIndex) << '\t' << "\\\\x" - << ripple::strHex(book.first) << '\t' << "\\\\x" - << ripple::strHex(offer) << '\n'; + buffer << std::to_string(index.bookIndex) << '\t' << "\\\\x" + << ripple::strHex(book.first) << '\t' << "\\\\x" + << ripple::strHex(offer) << '\n'; numRows++; // If the buffer gets too large, the insert fails. Not sure why. // When writing in the background, we insert after every 10 rows if ((isAsync && numRows == 1000) || numRows == 100000) { - conn.bulkInsert("books", booksBuffer_.str()); + conn.bulkInsert("books", buffer.str()); std::stringstream temp; - booksBuffer_.swap(temp); + buffer.swap(temp); numRows = 0; if (isAsync) std::this_thread::sleep_for(std::chrono::seconds(1)); diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index cdcb6ee4..023aaed7 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -258,13 +258,6 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) lgrInfo, std::move(*rawData.mutable_ledger_header())); BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "wrote ledger header"; - std::vector accountTxData{ - insertTransactions(lgrInfo, rawData)}; - - BOOST_LOG_TRIVIAL(debug) - << __func__ << " : " - << "Inserted all transactions. Number of transactions = " - << rawData.transactions_list().transactions_size(); for (auto& obj : *(rawData.mutable_ledger_objects()->mutable_objects())) { @@ -301,6 +294,13 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) << __func__ << " : " << "wrote objects. num objects = " << std::to_string(rawData.ledger_objects().objects_size()); + std::vector accountTxData{ + insertTransactions(lgrInfo, rawData)}; + + BOOST_LOG_TRIVIAL(debug) + << __func__ << " : " + << "Inserted all transactions. Number of transactions = " + << rawData.transactions_list().transactions_size(); flatMapBackend_->writeAccountTransactions(std::move(accountTxData)); BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "wrote account_tx"; From 7759d86087dccba051f76ea669301273c5b910b0 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 25 May 2021 21:14:47 +0000 Subject: [PATCH 10/25] tests build and run --- CMakeLists.txt | 20 +++++++++++++++++++- deps/cassandra.cmake | 4 +++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aea127de..890c7f39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,10 +21,22 @@ endif () file (TO_CMAKE_PATH "${BOOST_ROOT}" BOOST_ROOT) FIND_PACKAGE( Boost 1.75 COMPONENTS filesystem log log_setup thread system REQUIRED ) +include(FetchContent) +FetchContent_Declare( + googletest + URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip +) +FetchContent_MakeAvailable(googletest) +enable_testing() +include(GoogleTest) -add_executable (reporting +add_executable (reporting_main server/websocket_server_async.cpp ) +add_executable (reporting_tests + tests/main.cpp +) +add_library(reporting reporting/BackendInterface.h) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/deps") include(ExternalProject) message(${CMAKE_CURRENT_BINARY_DIR}) @@ -76,3 +88,9 @@ target_sources(reporting PRIVATE message(${Boost_LIBRARIES}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR} ${Boost_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) TARGET_LINK_LIBRARIES(reporting PUBLIC ${Boost_LIBRARIES}) +TARGET_LINK_LIBRARIES(reporting_main PUBLIC reporting) +TARGET_LINK_LIBRARIES(reporting_tests PUBLIC reporting gtest_main) + + +gtest_discover_tests(reporting_tests) + diff --git a/deps/cassandra.cmake b/deps/cassandra.cmake index 920fc49e..b4b459ac 100644 --- a/deps/cassandra.cmake +++ b/deps/cassandra.cmake @@ -97,7 +97,6 @@ if(NOT cassandra) file(TO_CMAKE_PATH "${libuv_src_SOURCE_DIR}" libuv_src_SOURCE_DIR) endif() - add_library (cassandra STATIC IMPORTED GLOBAL) ExternalProject_Add(cassandra_src PREFIX ${nih_cache_path} @@ -148,6 +147,9 @@ if(NOT cassandra) else() target_link_libraries(cassandra INTERFACE ${zlib}) endif() + set(OPENSSL_USE_STATIC_LIBS TRUE) + find_package(OpenSSL REQUIRED) + target_link_libraries(cassandra INTERFACE OpenSSL::SSL) file(TO_CMAKE_PATH "${cassandra_src_SOURCE_DIR}" cassandra_src_SOURCE_DIR) target_link_libraries(reporting PUBLIC cassandra) From 96f896c117e0249e9cbaeaa6a8f72d04abf60152 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Wed, 26 May 2021 21:23:16 +0000 Subject: [PATCH 11/25] checkpoint --- handlers/Ledger.cpp | 16 ---------------- handlers/RPCHelpers.cpp | 16 ++++++++++++++++ handlers/RPCHelpers.h | 2 ++ reporting/BackendIndexer.cpp | 1 + reporting/Pg.cpp | 2 +- reporting/PostgresBackend.cpp | 8 ++++++++ test.py | 2 +- 7 files changed, 29 insertions(+), 18 deletions(-) diff --git a/handlers/Ledger.cpp b/handlers/Ledger.cpp index 96dd3e83..b03a8920 100644 --- a/handlers/Ledger.cpp +++ b/handlers/Ledger.cpp @@ -1,21 +1,5 @@ #include #include -std::vector -ledgerInfoToBlob(ripple::LedgerInfo const& info) -{ - ripple::Serializer s; - s.add32(info.seq); - s.add64(info.drops.drops()); - s.addBitString(info.parentHash); - s.addBitString(info.txHash); - s.addBitString(info.accountHash); - s.add32(info.parentCloseTime.time_since_epoch().count()); - s.add32(info.closeTime.time_since_epoch().count()); - s.add8(info.closeTimeResolution.count()); - s.add8(info.closeFlags); - // s.addBitString(info.hash); - return s.peekData(); -} boost::json::object doLedger(boost::json::object const& request, BackendInterface const& backend) diff --git a/handlers/RPCHelpers.cpp b/handlers/RPCHelpers.cpp index 77e0f997..191c6790 100644 --- a/handlers/RPCHelpers.cpp +++ b/handlers/RPCHelpers.cpp @@ -80,3 +80,19 @@ ledgerSequenceFromRequest( return request.at("ledger_index").as_int64(); } } +std::vector +ledgerInfoToBlob(ripple::LedgerInfo const& info) +{ + ripple::Serializer s; + s.add32(info.seq); + s.add64(info.drops.drops()); + s.addBitString(info.parentHash); + s.addBitString(info.txHash); + s.addBitString(info.accountHash); + s.add32(info.parentCloseTime.time_since_epoch().count()); + s.add32(info.closeTime.time_since_epoch().count()); + s.add8(info.closeTimeResolution.count()); + s.add8(info.closeFlags); + s.addBitString(info.hash); + return s.peekData(); +} diff --git a/handlers/RPCHelpers.h b/handlers/RPCHelpers.h index f07bf3ef..d3c5c26e 100644 --- a/handlers/RPCHelpers.h +++ b/handlers/RPCHelpers.h @@ -24,5 +24,7 @@ std::optional ledgerSequenceFromRequest( boost::json::object const& request, BackendInterface const& backend); +std::vector +ledgerInfoToBlob(ripple::LedgerInfo const& info); #endif diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 1cf19ce4..af0f49bd 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -16,6 +16,7 @@ BackendIndexer::~BackendIndexer() std::unique_lock lck(mutex_); work_.reset(); ioThread_.join(); + updateThread_.join(); } void BackendIndexer::writeLedgerObject( diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 4966b6b8..9f61df1e 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -52,7 +52,7 @@ static void noticeReceiver(void* arg, PGresult const* res) { - BOOST_LOG_TRIVIAL(debug) << "server message: " << PQresultErrorMessage(res); + BOOST_LOG_TRIVIAL(trace) << "server message: " << PQresultErrorMessage(res); } //----------------------------------------------------------------------------- diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 799ae715..d6bae3c1 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -846,7 +846,11 @@ PostgresBackend::writeKeys( } } if (isAsync) + { + if (numRows > 0) + conn.bulkInsert("keys", buffer.str()); conn("COMMIT"); + } return true; } bool @@ -890,7 +894,11 @@ PostgresBackend::writeBooks( } } if (isAsync) + { + if (numRows > 0) + conn.bulkInsert("books", buffer.str()); conn("COMMIT"); + } return true; } bool diff --git a/test.py b/test.py index 7c5aa898..72d296ab 100755 --- a/test.py +++ b/test.py @@ -729,7 +729,7 @@ async def ledger(ip, port, ledger, binary, transactions, expand): async with websockets.connect(address,max_size=1000000000) as ws: await ws.send(json.dumps({"command":"ledger","ledger_index":int(ledger),"binary":bool(binary), "transactions":bool(transactions),"expand":bool(expand)})) res = json.loads(await ws.recv()) - #print(json.dumps(res,indent=4,sort_keys=True)) + print(json.dumps(res,indent=4,sort_keys=True)) return res except websockets.exceptions.connectionclosederror as e: From 65dfa60fb2c2c6dc78bbfdd516721ae6116a9885 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Wed, 26 May 2021 21:26:02 +0000 Subject: [PATCH 12/25] add tests/main.cpp --- tests/main.cpp | 193 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) create mode 100644 tests/main.cpp diff --git a/tests/main.cpp b/tests/main.cpp new file mode 100644 index 00000000..66569ec8 --- /dev/null +++ b/tests/main.cpp @@ -0,0 +1,193 @@ +#include +#include +#include +#include + +// Demonstrate some basic assertions. +TEST(BackendTest, Basic) +{ + std::string keyspace = + "oceand_test_" + + std::to_string( + std::chrono::system_clock::now().time_since_epoch().count()); + boost::json::object config{ + {"database", + {{"type", "cassandra"}, + {"cassandra", + {{"contact_points", "34.222.180.207"}, + {"port", 9042}, + {"keyspace", keyspace.c_str()}, + {"table_prefix", ""}, + {"max_requests_outstanding", 10000}, + {"threads", 8}}}}}}; + auto backend = Backend::makeBackend(config); + backend->open(false); + + std::string rawHeader = + "03C3141A01633CD656F91B4EBB5EB89B791BD34DBC8A04BB6F407C5335BC54351EDD73" + "3898497E809E04074D14D271E4832D7888754F9230800761563A292FA2315A6DB6FE30" + "CC5909B285080FCD6773CC883F9FE0EE4D439340AC592AADB973ED3CF53E2232B33EF5" + "7CECAC2816E3122816E31A0A00F8377CD95DFA484CFAE282656A58CE5AA29652EFFD80" + "AC59CD91416E4E13DBBE"; + + auto hexStringToBinaryString = [](auto const& hex) { + auto blob = ripple::strUnHex(hex); + std::string strBlob; + for (auto c : *blob) + { + strBlob += c; + } + return strBlob; + }; + auto ledgerInfoToBinaryString = [](auto const& info) { + auto blob = ledgerInfoToBlob(info); + std::string strBlob; + for (auto c : *blob) + { + strBlob += c; + } + return strBlob; + }; + + std::string rawHeaderBlob = hexStringToBinaryString(rawHeader); + ripple::LedgerInfo lgrInfo = + deserializeHeader(ripple::makeSlice(rawHeaderBlob)); + + backend->startWrites(); + backend->writeLedger(lgrInfo, std::move(rawHeaderBlob), true); + EXPECT_TRUE(backend->finishWrites(lgrInfo.seq)); + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng.has_value()); + EXPECT_EQ(rng->minSequence, rng->maxSequence); + EXPECT_EQ(rng->maxSequence, lgrInfo.seq); + } + { + auto seq = backend->fetchLatestLedgerSequence(); + EXPECT_TRUE(seq.has_value()); + EXPECT_EQ(*seq, lgrInfo.seq); + } + + { + auto retLgr = backend->fetchLedgerBySequence(lgrInfo.seq); + EXPECT_TRUE(lgr.has_value()); + EXPECT_EQ(lgr->seq, lgrInfo.seq); + EXPECT_EQ(ledgerInfoToBlob(lgrInfo), ledgerInfoToBlob(retLgr)); + } + + EXPECT_FALSE(backend->fetchLedgerBySequence(lgrInfo.seq + 1).has_value()); + auto lgrInfoOld = lgrInfo; + + auto lgrInfoNext = lgrInfo; + lgrInfoNext.seq = lgrInfo.seq + 1; + lgrInfoNext.accountHash = ~lgrInfo.accountHash; + { + std::string rawHeaderBlob = ledgerInfoToBinaryString(lgrInfoNext); + + backend->startWrites(); + backend->writeLedger(lgrInfoNext, std::move(rawHeaderBlob)); + res = backend->finishWrites(lgrInfoNext.seq); + EXPECT_TRUE(res); + } + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng.has_value()); + EXPECT_EQ(rng->minSequence, lgrInfo.seq - 1); + EXPECT_EQ(rng->maxSequence, lgrInfoNext.seq); + } + { + auto seq = backend->fetchLatestLedgerSequence(); + EXPECT_EQ(seq, lgrInfoNext.seq); + } + { + auto retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq); + EXPECT_TRUE(lgr.has_value()); + EXPECT_EQ(lgr->seq, lgrInfoNext.seq); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); + retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 1); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); + EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 2); + EXPECT_FALSE( + backend->fetchLedgerBySequence(lgrInfoNext.seq - 2).has_value()); + } + + auto txns = backend->fetchAllTransactionsInLedger(lgrInfoNext.seq); + EXPECT_EQ(txns.size(), 0); + auto hashes = backend->fetchAllTransactionHashesInLedger(lgrInfoNext.seq); + EXPECT_EQ(hashes.size(), 0); + + std::string metadata = + "201C0000001DF8E311006F5630F58E8E36FD9F77456E6E5B76C8C479D55D2675DC" + "2B57" + "8D9EE0FBFD0F4435E7E82400F5ACA25010623C4C4AD65873DA787AC85A0A1385FE" + "6233" + "B6DE100799474F19BA75E8F4A44E64D5A0BA986182A59400000000000000000000" + "0000" + "434E5900000000000360E3E0751BD9A566CD03FA6CAFC78118B82BA06540000002" + "F63A" + "19788114B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E411006456623C" + "4C4A" + "D65873DA787AC85A0A1385FE6233B6DE100799474F19B87CAAEB9A59E722000000" + "0036" + "4F19B87CAAEB9A5958623C4C4AD65873DA787AC85A0A1385FE6233B6DE10079947" + "4F19" + "B87CAAEB9A590111000000000000000000000000434E59000000000002110360E3" + "E075" + "1BD9A566CD03FA6CAFC78118B82BA0031100000000000000000000000000000000" + "0000" + "000004110000000000000000000000000000000000000000E1E1E311006456623C" + "4C4A" + "D65873DA787AC85A0A1385FE6233B6DE100799474F19BA75E8F4A44EE8364F19BA" + "75E8" + "F4A44E58623C4C4AD65873DA787AC85A0A1385FE6233B6DE100799474F19BA75E8" + "F4A4" + "4E0111000000000000000000000000434E59000000000002110360E3E0751BD9A5" + "66CD" + "03FA6CAFC78118B82BA0E1E1E411006F568120731CA1CECDB619E8DAA252098015" + "8407" + "F8C587654D5DC8050BE6D5E6F6A4E722000000002400F5AC9E2503CE17F1330000" + "0000" + "00000000340000000000000000558614FB8C558DF9DB89BA9D147E6F6540196114" + "D611" + "5E4DD3D266DE237D464F5C5010623C4C4AD65873DA787AC85A0A1385FE6233B6DE" + "1007" + "99474F19B87CAAEB9A5964D588B6135A6783DB000000000000000000000000434E" + "5900" + "000000000360E3E0751BD9A566CD03FA6CAFC78118B82BA06540000000C9DF6DFA" + "8114" + "B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E51100612503CE17F95599" + "5AFC" + "E2A0B6B925C8BD04158D9AE706518E8CEC1695D78052E412799447C75A56EB0772" + "83F2" + "89CE1E0956133D9AD7828C1F88FFE5A50A885AD8679E8AEDBCDAA7E62400F5ACA2" + "6240" + "0000012E3449A4E1E722000000002400F5ACA32D0000000562400000012E344998" + "8114" + "B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E511006456FE9C40EDE9C0" + "AE6C" + "A8023498F9B9092DF3EB722B8B17C0C8A210A2FDCF22C08DE7220000000058FE9C" + "40ED" + "E9C0AE6CA8023498F9B9092DF3EB722B8B17C0C8A210A2FDCF22C08D8214B61B3E" + "B556" + "60F67EAAA4479175D2FDEA71CD940BE1E1F1031000"; + std::string transaction = + "12000722000000002400F5ACA2201900F5AC9E201B03CE17FB64D5A0BA986182A5" + "9400" + "0000000000000000000000434E5900000000000360E3E0751BD9A566CD03FA6CAF" + "C781" + "18B82BA06540000002F63A197868400000000000000C732102EF32A8F811F2D6EA" + "67FD" + "BAF625ABE70C0885189AA03A99330B6F7437C88492D0744630440220145530852F" + "98E6" + "D2A4D427A045556B6E45E54477BB3BC24952C8DFF3514A0E51022063F6D619D51C" + "7F60" + "B64B3CDF1E9EB79F4E7B5E2BDA9C81489CCD93F247F713618114B61B3EB55660F6" + "7EAA" + "A4479175D2FDEA71CD940B"; + + std::string metaBlob = hexStringToBinaryString(meta); + std::string txnBlob = hexStringToBinaryString(transaction); +} + From db8cfa518b68a15b74b1267f0357f62653130eb0 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Thu, 27 May 2021 16:24:20 -0400 Subject: [PATCH 13/25] tests working --- CMakeLists.txt | 4 +- reporting/BackendInterface.h | 2 + reporting/CassandraBackend.cpp | 128 ++++++---------------- reporting/CassandraBackend.h | 2 +- tests/main.cpp | 193 --------------------------------- 5 files changed, 40 insertions(+), 289 deletions(-) delete mode 100644 tests/main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 890c7f39..39874c95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ set(CMAKE_VERBOSE_MAKEFILE TRUE) project(reporting) cmake_minimum_required(VERSION 3.16) -set (CMAKE_CXX_STANDARD 17) +set (CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -Wno-narrowing") set(Boost_USE_STATIC_LIBS ON) set(Boost_USE_MULTITHREADED ON) @@ -34,7 +34,7 @@ add_executable (reporting_main server/websocket_server_async.cpp ) add_executable (reporting_tests - tests/main.cpp + unittests/main.cpp ) add_library(reporting reporting/BackendInterface.h) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/deps") diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 524b7d44..78f54221 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -39,6 +39,8 @@ struct TransactionAndMetadata Blob transaction; Blob metadata; uint32_t ledgerSequence; + bool + operator==(const TransactionAndMetadata&) const = default; }; struct AccountTransactionsCursor diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index e83b3351..d22f0e5a 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -1270,6 +1270,11 @@ CassandraBackend::open(bool readOnly) } return {""}; }; + auto getInt = [this](std::string const& field) -> std::optional { + if (config_.contains(field) && config_.at(field).is_int64()) + return config_[field].as_int64(); + return {}; + }; if (open_) { assert(false); @@ -1320,14 +1325,14 @@ CassandraBackend::open(bool readOnly) throw std::runtime_error(ss.str()); } - int port = config_.contains("port") ? config_["port"].as_int64() : 0; + auto port = getInt("port"); if (port) { - rc = cass_cluster_set_port(cluster, port); + rc = cass_cluster_set_port(cluster, *port); if (rc != CASS_OK) { std::stringstream ss; - ss << "nodestore: Error setting Cassandra port: " << port + ss << "nodestore: Error setting Cassandra port: " << *port << ", result: " << rc << ", " << cass_error_desc(rc); throw std::runtime_error(ss.str()); @@ -1355,22 +1360,8 @@ CassandraBackend::open(bool readOnly) cass_cluster_set_credentials( cluster, username.c_str(), getString("password").c_str()); } - int threads = config_.contains("threads") - ? config_["threads"].as_int64() - : std::thread::hardware_concurrency(); - int ttl = config_.contains("ttl") ? config_["ttl"].as_int64() * 2 : 0; - int keysTtl, - keysIncr = ttl != 0 ? pow(2, indexer_.getKeyShift()) * 4 * 2 : 0; - while (keysTtl < ttl) - { - keysTtl += keysIncr; - } - int booksTtl, - booksIncr = ttl != 0 ? pow(2, indexer_.getBookShift()) * 4 * 2 : 0; - while (booksTtl < ttl) - { - booksTtl += booksIncr; - } + int threads = getInt("threads") ? *getInt("threads") + : std::thread::hardware_concurrency(); rc = cass_cluster_set_num_threads_io(cluster, threads); if (rc != CASS_OK) @@ -1380,6 +1371,8 @@ CassandraBackend::open(bool readOnly) << ", result: " << rc << ", " << cass_error_desc(rc); throw std::runtime_error(ss.str()); } + if (getInt("max_requests_outstanding")) + maxRequestsOutstanding = *getInt("max_requests_outstanding"); cass_cluster_set_request_timeout(cluster, 10000); @@ -1436,10 +1429,13 @@ CassandraBackend::open(bool readOnly) std::string keyspace = getString("keyspace"); if (keyspace.empty()) { - throw std::runtime_error( - "nodestore: Missing keyspace in Cassandra config"); + BOOST_LOG_TRIVIAL(warning) + << "No keyspace specified. Using keyspace oceand"; + keyspace = "oceand"; } + int rf = getInt("replication_factor") ? *getInt("replication_factor") : 3; + std::string tablePrefix = getString("table_prefix"); if (tablePrefix.empty()) { @@ -1448,6 +1444,20 @@ CassandraBackend::open(bool readOnly) cass_cluster_set_connect_timeout(cluster, 10000); + int ttl = getInt("ttl") ? *getInt("ttl") * 2 : 0; + int keysTtl, + keysIncr = ttl != 0 ? pow(2, indexer_.getKeyShift()) * 4 * 2 : 0; + while (keysTtl < ttl) + { + keysTtl += keysIncr; + } + int booksTtl, + booksIncr = ttl != 0 ? pow(2, indexer_.getBookShift()) * 4 * 2 : 0; + while (booksTtl < ttl) + { + booksTtl += booksIncr; + } + auto executeSimpleStatement = [this](std::string const& query) { CassStatement* statement = makeStatement(query.c_str(), 0); CassFuture* fut = cass_session_execute(session_.get(), statement); @@ -1499,10 +1509,10 @@ CassandraBackend::open(bool readOnly) else { std::stringstream query; - query - << "CREATE KEYSPACE IF NOT EXISTS " << keyspace - << " WITH replication = {'class': 'SimpleStrategy', " - "'replication_factor': '3'} AND durable_writes = true"; + query << "CREATE KEYSPACE IF NOT EXISTS " << keyspace + << " WITH replication = {'class': 'SimpleStrategy', " + "'replication_factor': '" + << std::to_string(rf) << "'} AND durable_writes = true"; if (!executeSimpleStatement(query.str())) continue; query = {}; @@ -1828,74 +1838,6 @@ CassandraBackend::open(bool readOnly) setupPreparedStatements = true; } - /* - while (true) - { - std::this_thread::sleep_for(std::chrono::seconds(1)); - if (!fetchLatestLedgerSequence()) - { - std::stringstream query; - query << "TRUNCATE TABLE " << tablePrefix << "ledger_range"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "TRUNCATE TABLE " << tablePrefix << "ledgers"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "TRUNCATE TABLE " << tablePrefix << "ledger_hashes"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "TRUNCATE TABLE " << tablePrefix << "objects"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "TRUNCATE TABLE " << tablePrefix << "transactions"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "TRUNCATE TABLE " << tablePrefix << "account_tx"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - } - break; - } - */ - - if (config_.contains("max_requests_outstanding")) - { - maxRequestsOutstanding = config_["max_requests_outstanding"].as_int64(); - } - if (config_.contains("indexer_max_requests_outstanding")) - { - indexerMaxRequestsOutstanding = - config_["indexer_max_requests_outstanding"].as_int64(); - } - /* - if (config_.contains("run_indexer")) - { - if (config_["run_indexer"].as_bool()) - { - if (config_.contains("indexer_shift")) - { - indexerShift_ = config_["indexer_shift"].as_int64(); - } - indexer_ = std::thread{[this]() { - auto seq = getNextToIndex(); - if (seq) - { - BOOST_LOG_TRIVIAL(info) - << "Running indexer. Ledger = " << std::to_string(*seq); - runIndexer(*seq); - BOOST_LOG_TRIVIAL(info) << "Ran indexer"; - } - }}; - } - } - */ - work_.emplace(ioContext_); ioThread_ = std::thread{[this]() { ioContext_.run(); }}; open_ = true; diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index 66dbe6c7..f9b326ca 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -1018,7 +1018,7 @@ public: writeKeys( std::unordered_set const& keys, KeyIndex const& index, - bool isAsync = false) const; + bool isAsync = false) const override; bool writeBooks( std::unordered_map< diff --git a/tests/main.cpp b/tests/main.cpp deleted file mode 100644 index 66569ec8..00000000 --- a/tests/main.cpp +++ /dev/null @@ -1,193 +0,0 @@ -#include -#include -#include -#include - -// Demonstrate some basic assertions. -TEST(BackendTest, Basic) -{ - std::string keyspace = - "oceand_test_" + - std::to_string( - std::chrono::system_clock::now().time_since_epoch().count()); - boost::json::object config{ - {"database", - {{"type", "cassandra"}, - {"cassandra", - {{"contact_points", "34.222.180.207"}, - {"port", 9042}, - {"keyspace", keyspace.c_str()}, - {"table_prefix", ""}, - {"max_requests_outstanding", 10000}, - {"threads", 8}}}}}}; - auto backend = Backend::makeBackend(config); - backend->open(false); - - std::string rawHeader = - "03C3141A01633CD656F91B4EBB5EB89B791BD34DBC8A04BB6F407C5335BC54351EDD73" - "3898497E809E04074D14D271E4832D7888754F9230800761563A292FA2315A6DB6FE30" - "CC5909B285080FCD6773CC883F9FE0EE4D439340AC592AADB973ED3CF53E2232B33EF5" - "7CECAC2816E3122816E31A0A00F8377CD95DFA484CFAE282656A58CE5AA29652EFFD80" - "AC59CD91416E4E13DBBE"; - - auto hexStringToBinaryString = [](auto const& hex) { - auto blob = ripple::strUnHex(hex); - std::string strBlob; - for (auto c : *blob) - { - strBlob += c; - } - return strBlob; - }; - auto ledgerInfoToBinaryString = [](auto const& info) { - auto blob = ledgerInfoToBlob(info); - std::string strBlob; - for (auto c : *blob) - { - strBlob += c; - } - return strBlob; - }; - - std::string rawHeaderBlob = hexStringToBinaryString(rawHeader); - ripple::LedgerInfo lgrInfo = - deserializeHeader(ripple::makeSlice(rawHeaderBlob)); - - backend->startWrites(); - backend->writeLedger(lgrInfo, std::move(rawHeaderBlob), true); - EXPECT_TRUE(backend->finishWrites(lgrInfo.seq)); - { - auto rng = backend->fetchLedgerRange(); - EXPECT_TRUE(rng.has_value()); - EXPECT_EQ(rng->minSequence, rng->maxSequence); - EXPECT_EQ(rng->maxSequence, lgrInfo.seq); - } - { - auto seq = backend->fetchLatestLedgerSequence(); - EXPECT_TRUE(seq.has_value()); - EXPECT_EQ(*seq, lgrInfo.seq); - } - - { - auto retLgr = backend->fetchLedgerBySequence(lgrInfo.seq); - EXPECT_TRUE(lgr.has_value()); - EXPECT_EQ(lgr->seq, lgrInfo.seq); - EXPECT_EQ(ledgerInfoToBlob(lgrInfo), ledgerInfoToBlob(retLgr)); - } - - EXPECT_FALSE(backend->fetchLedgerBySequence(lgrInfo.seq + 1).has_value()); - auto lgrInfoOld = lgrInfo; - - auto lgrInfoNext = lgrInfo; - lgrInfoNext.seq = lgrInfo.seq + 1; - lgrInfoNext.accountHash = ~lgrInfo.accountHash; - { - std::string rawHeaderBlob = ledgerInfoToBinaryString(lgrInfoNext); - - backend->startWrites(); - backend->writeLedger(lgrInfoNext, std::move(rawHeaderBlob)); - res = backend->finishWrites(lgrInfoNext.seq); - EXPECT_TRUE(res); - } - { - auto rng = backend->fetchLedgerRange(); - EXPECT_TRUE(rng.has_value()); - EXPECT_EQ(rng->minSequence, lgrInfo.seq - 1); - EXPECT_EQ(rng->maxSequence, lgrInfoNext.seq); - } - { - auto seq = backend->fetchLatestLedgerSequence(); - EXPECT_EQ(seq, lgrInfoNext.seq); - } - { - auto retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq); - EXPECT_TRUE(lgr.has_value()); - EXPECT_EQ(lgr->seq, lgrInfoNext.seq); - EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); - EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); - retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 1); - EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); - EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); - retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 2); - EXPECT_FALSE( - backend->fetchLedgerBySequence(lgrInfoNext.seq - 2).has_value()); - } - - auto txns = backend->fetchAllTransactionsInLedger(lgrInfoNext.seq); - EXPECT_EQ(txns.size(), 0); - auto hashes = backend->fetchAllTransactionHashesInLedger(lgrInfoNext.seq); - EXPECT_EQ(hashes.size(), 0); - - std::string metadata = - "201C0000001DF8E311006F5630F58E8E36FD9F77456E6E5B76C8C479D55D2675DC" - "2B57" - "8D9EE0FBFD0F4435E7E82400F5ACA25010623C4C4AD65873DA787AC85A0A1385FE" - "6233" - "B6DE100799474F19BA75E8F4A44E64D5A0BA986182A59400000000000000000000" - "0000" - "434E5900000000000360E3E0751BD9A566CD03FA6CAFC78118B82BA06540000002" - "F63A" - "19788114B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E411006456623C" - "4C4A" - "D65873DA787AC85A0A1385FE6233B6DE100799474F19B87CAAEB9A59E722000000" - "0036" - "4F19B87CAAEB9A5958623C4C4AD65873DA787AC85A0A1385FE6233B6DE10079947" - "4F19" - "B87CAAEB9A590111000000000000000000000000434E59000000000002110360E3" - "E075" - "1BD9A566CD03FA6CAFC78118B82BA0031100000000000000000000000000000000" - "0000" - "000004110000000000000000000000000000000000000000E1E1E311006456623C" - "4C4A" - "D65873DA787AC85A0A1385FE6233B6DE100799474F19BA75E8F4A44EE8364F19BA" - "75E8" - "F4A44E58623C4C4AD65873DA787AC85A0A1385FE6233B6DE100799474F19BA75E8" - "F4A4" - "4E0111000000000000000000000000434E59000000000002110360E3E0751BD9A5" - "66CD" - "03FA6CAFC78118B82BA0E1E1E411006F568120731CA1CECDB619E8DAA252098015" - "8407" - "F8C587654D5DC8050BE6D5E6F6A4E722000000002400F5AC9E2503CE17F1330000" - "0000" - "00000000340000000000000000558614FB8C558DF9DB89BA9D147E6F6540196114" - "D611" - "5E4DD3D266DE237D464F5C5010623C4C4AD65873DA787AC85A0A1385FE6233B6DE" - "1007" - "99474F19B87CAAEB9A5964D588B6135A6783DB000000000000000000000000434E" - "5900" - "000000000360E3E0751BD9A566CD03FA6CAFC78118B82BA06540000000C9DF6DFA" - "8114" - "B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E51100612503CE17F95599" - "5AFC" - "E2A0B6B925C8BD04158D9AE706518E8CEC1695D78052E412799447C75A56EB0772" - "83F2" - "89CE1E0956133D9AD7828C1F88FFE5A50A885AD8679E8AEDBCDAA7E62400F5ACA2" - "6240" - "0000012E3449A4E1E722000000002400F5ACA32D0000000562400000012E344998" - "8114" - "B61B3EB55660F67EAAA4479175D2FDEA71CD940BE1E1E511006456FE9C40EDE9C0" - "AE6C" - "A8023498F9B9092DF3EB722B8B17C0C8A210A2FDCF22C08DE7220000000058FE9C" - "40ED" - "E9C0AE6CA8023498F9B9092DF3EB722B8B17C0C8A210A2FDCF22C08D8214B61B3E" - "B556" - "60F67EAAA4479175D2FDEA71CD940BE1E1F1031000"; - std::string transaction = - "12000722000000002400F5ACA2201900F5AC9E201B03CE17FB64D5A0BA986182A5" - "9400" - "0000000000000000000000434E5900000000000360E3E0751BD9A566CD03FA6CAF" - "C781" - "18B82BA06540000002F63A197868400000000000000C732102EF32A8F811F2D6EA" - "67FD" - "BAF625ABE70C0885189AA03A99330B6F7437C88492D0744630440220145530852F" - "98E6" - "D2A4D427A045556B6E45E54477BB3BC24952C8DFF3514A0E51022063F6D619D51C" - "7F60" - "B64B3CDF1E9EB79F4E7B5E2BDA9C81489CCD93F247F713618114B61B3EB55660F6" - "7EAA" - "A4479175D2FDEA71CD940B"; - - std::string metaBlob = hexStringToBinaryString(meta); - std::string txnBlob = hexStringToBinaryString(transaction); -} - From 572b0722717b61f299062a236b6c54344d24d508 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Fri, 28 May 2021 10:42:13 -0400 Subject: [PATCH 14/25] lots of tests --- reporting/DBHelpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/reporting/DBHelpers.h b/reporting/DBHelpers.h index caf6d79a..2a086705 100644 --- a/reporting/DBHelpers.h +++ b/reporting/DBHelpers.h @@ -44,6 +44,8 @@ struct AccountTransactionsData , txHash(txHash) { } + + AccountTransactionsData() = default; }; template From 562f96a30f646d961cef4abbae361c182512ae47 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Fri, 28 May 2021 21:21:24 -0400 Subject: [PATCH 15/25] tests working for postges and cassandra. removed books index. need to implement book_offers --- reporting/BackendIndexer.cpp | 436 ++++---------------- reporting/BackendInterface.h | 180 ++++---- reporting/CassandraBackend.cpp | 242 +---------- reporting/CassandraBackend.h | 11 +- reporting/PostgresBackend.cpp | 207 +--------- reporting/PostgresBackend.h | 11 +- reporting/ReportingETL.cpp | 2 - unittests/main.cpp | 727 +++++++++++++++++++++++++++++++++ 8 files changed, 915 insertions(+), 901 deletions(-) create mode 100644 unittests/main.cpp diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index af0f49bd..fe4f5510 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -5,101 +5,20 @@ BackendIndexer::BackendIndexer(boost::json::object const& config) { if (config.contains("indexer_key_shift")) keyShift_ = config.at("indexer_key_shift").as_int64(); - if (config.contains("indexer_book_shift")) - bookShift_ = config.at("indexer_book_shift").as_int64(); work_.emplace(ioc_); ioThread_ = std::thread{[this]() { ioc_.run(); }}; - updateThread_ = std::thread{[this]() { ioc_.run(); }}; }; BackendIndexer::~BackendIndexer() { std::unique_lock lck(mutex_); work_.reset(); ioThread_.join(); - updateThread_.join(); -} -void -BackendIndexer::writeLedgerObject( - ripple::uint256&& key, - std::optional&& book, - bool isCreated, - bool isDeleted) -{ - ++updatesOutstanding_; - boost::asio::post( - ioc_, - [this, - key = std::move(key), - isCreated, - isDeleted, - book = std::move(book)]() { - if (isCreated) - addKey(key); - if (isDeleted) - deleteKey(key); - if (book) - { - if (isCreated) - addBookOffer(*book, key); - if (isDeleted) - deleteBookOffer(*book, key); - } - --updatesOutstanding_; - { - std::unique_lock lck(mtx); - updateCv_.notify_one(); - } - }); } void -BackendIndexer::addKey(ripple::uint256 const& key) +BackendIndexer::addKey(ripple::uint256&& key) { - std::unique_lock lck(mtx); - keys.insert(key); - keysCumulative.insert(key); -} -void -BackendIndexer::addKeyAsync(ripple::uint256 const& key) -{ - std::unique_lock lck(mtx); - keysCumulative.insert(key); -} -void -BackendIndexer::deleteKey(ripple::uint256 const& key) -{ - std::unique_lock lck(mtx); - keysCumulative.erase(key); - if (populatingCacheAsync) - deletedKeys.insert(key); -} - -void -BackendIndexer::addBookOffer( - ripple::uint256 const& book, - ripple::uint256 const& offerKey) -{ - std::unique_lock lck(mtx); - books[book].insert(offerKey); - booksCumulative[book].insert(offerKey); -} -void -BackendIndexer::addBookOfferAsync( - ripple::uint256 const& book, - ripple::uint256 const& offerKey) -{ - std::unique_lock lck(mtx); - booksCumulative[book].insert(offerKey); -} -void -BackendIndexer::deleteBookOffer( - ripple::uint256 const& book, - ripple::uint256 const& offerKey) -{ - std::unique_lock lck(mtx); - booksCumulative[book].erase(offerKey); - if (populatingCacheAsync) - deletedBooks[book].insert(offerKey); + keys.insert(std::move(key)); } void @@ -152,123 +71,6 @@ writeKeyFlagLedger( .count(); } void -writeBookFlagLedger( - uint32_t ledgerSequence, - uint32_t shift, - BackendInterface const& backend, - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books) -{ - uint32_t nextFlag = ((ledgerSequence >> shift << shift) + (1 << shift)); - ripple::uint256 zero = {}; - BOOST_LOG_TRIVIAL(info) - << __func__ - << " starting. ledgerSequence = " << std::to_string(ledgerSequence) - << " nextFlag = " << std::to_string(nextFlag) - << " books.size() = " << std::to_string(books.size()); - - auto start = std::chrono::system_clock::now(); - backend.writeBooks(books, BookIndex{nextFlag}, true); - backend.writeBooks({{zero, {zero}}}, BookIndex{nextFlag}, true); - - auto end = std::chrono::system_clock::now(); - - BOOST_LOG_TRIVIAL(info) - << __func__ - << " finished. ledgerSequence = " << std::to_string(ledgerSequence) - << " nextFlag = " << std::to_string(nextFlag) - << " books.size() = " << std::to_string(books.size()) << " time = " - << std::chrono::duration_cast(end - start) - .count(); -} - -void -BackendIndexer::clearCaches() -{ - keysCumulative = {}; - booksCumulative = {}; -} - -void -BackendIndexer::doBooksRepair( - BackendInterface const& backend, - std::optional sequence) -{ - auto rng = backend.fetchLedgerRangeNoThrow(); - - if (!rng) - return; - - if (!sequence) - sequence = rng->maxSequence; - - if (sequence < rng->minSequence) - sequence = rng->minSequence; - - BOOST_LOG_TRIVIAL(info) - << __func__ << " sequence = " << std::to_string(*sequence); - - ripple::uint256 zero = {}; - while (true) - { - try - { - auto [objects, cursor, warning] = - backend.fetchBookOffers(zero, *sequence, 1); - if (!warning) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " flag ledger already written. sequence = " - << std::to_string(*sequence) << "returning"; - return; - } - else - { - uint32_t lower = (*sequence - 1) >> bookShift_ << bookShift_; - doBooksRepair(backend, lower); - } - break; - } - catch (DatabaseTimeout& t) - { - ; - } - } - std::optional cursor; - while (true) - { - try - { - auto [objects, curCursor, warning] = - backend.fetchLedgerPage(cursor, *sequence, 2048); - - BOOST_LOG_TRIVIAL(debug) << __func__ << " fetched a page"; - cursor = curCursor; - for (auto& obj : objects) - { - if (isOffer(obj.blob)) - { - auto book = getBook(obj.blob); - booksRepair[book].insert(obj.key); - } - } - if (!cursor) - break; - } - catch (DatabaseTimeout const& e) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " Database timeout fetching keys"; - std::this_thread::sleep_for(std::chrono::seconds(2)); - } - } - writeBookFlagLedger(*sequence, bookShift_, backend, booksRepair); - booksRepair = {}; - BOOST_LOG_TRIVIAL(info) - << __func__ << " finished. sequence = " << std::to_string(*sequence); -} -void BackendIndexer::doKeysRepair( BackendInterface const& backend, std::optional sequence) @@ -293,34 +95,23 @@ BackendIndexer::doKeysRepair( try { auto [objects, curCursor, warning] = - backend.fetchLedgerPage(cursor, *sequence, 2048); + backend.fetchLedgerPage({}, *sequence, 1); // no cursor means this is the first page - if (!cursor) + // if there is no warning, we don't need to do a repair + // warning only shows up on the first page + if (!warning) { - // if there is no warning, we don't need to do a repair - // warning only shows up on the first page - if (!warning) - { - BOOST_LOG_TRIVIAL(info) - << __func__ - << " flag ledger already written. returning"; - return; - } - else - { - uint32_t lower = (*sequence - 1) >> keyShift_ << keyShift_; - doKeysRepair(backend, lower); - } + BOOST_LOG_TRIVIAL(debug) + << __func__ << " flag ledger already written. returning"; + return; } - - BOOST_LOG_TRIVIAL(debug) << __func__ << " fetched a page"; - cursor = curCursor; - for (auto& obj : objects) + else { - keysRepair.insert(obj.key); + uint32_t lower = (*sequence - 1) >> keyShift_ << keyShift_; + doKeysRepair(backend, lower); + writeKeyFlagLedgerAsync(lower, backend); + return; } - if (!cursor) - break; } catch (DatabaseTimeout const& e) { @@ -329,104 +120,10 @@ BackendIndexer::doKeysRepair( std::this_thread::sleep_for(std::chrono::seconds(2)); } } - writeKeyFlagLedger(*sequence, keyShift_, backend, keysRepair); - keysRepair = {}; BOOST_LOG_TRIVIAL(info) << __func__ << " finished. sequence = " << std::to_string(*sequence); } -void -BackendIndexer::populateCaches(BackendInterface const& backend) -{ - auto rng = backend.fetchLedgerRangeNoThrow(); - if (!rng) - return; - uint32_t sequence = rng->maxSequence; - BOOST_LOG_TRIVIAL(info) - << __func__ << " sequence = " << std::to_string(sequence); - doBooksRepair(backend, sequence); - doKeysRepair(backend, sequence); - std::optional cursor; - while (true) - { - try - { - auto [objects, curCursor, warning] = - backend.fetchLedgerPage(cursor, sequence, 2048); - BOOST_LOG_TRIVIAL(debug) << __func__ << " fetched a page"; - cursor = curCursor; - for (auto& obj : objects) - { - addKeyAsync(obj.key); - if (isOffer(obj.blob)) - { - auto book = getBook(obj.blob); - addBookOfferAsync(book, obj.key); - } - } - if (!cursor) - break; - } - catch (DatabaseTimeout const& e) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " Database timeout fetching keys"; - std::this_thread::sleep_for(std::chrono::seconds(2)); - } - } - // Do reconcilation. Remove anything from keys or books that shouldn't - // be there - { - std::unique_lock lck(mtx); - populatingCacheAsync = false; - } - for (auto& key : deletedKeys) - { - deleteKey(key); - } - for (auto& book : deletedBooks) - { - for (auto& offer : book.second) - { - deleteBookOffer(book.first, offer); - } - } - { - std::unique_lock lck(mtx); - deletedKeys = {}; - deletedBooks = {}; - cacheCv_.notify_one(); - } - BOOST_LOG_TRIVIAL(info) - << __func__ - << " finished. keys.size() = " << std::to_string(keysCumulative.size()); -} -void -BackendIndexer::populateCachesAsync(BackendInterface const& backend) -{ - if (keysCumulative.size() > 0) - { - BOOST_LOG_TRIVIAL(info) - << __func__ << " caches already populated. returning"; - return; - } - { - std::unique_lock lck(mtx); - populatingCacheAsync = true; - } - BOOST_LOG_TRIVIAL(info) << __func__; - boost::asio::post(ioc_, [this, &backend]() { populateCaches(backend); }); -} - -void -BackendIndexer::waitForCaches() -{ - std::unique_lock lck(mtx); - cacheCv_.wait(lck, [this]() { - return !populatingCacheAsync && deletedKeys.size() == 0; - }); -} - void BackendIndexer::writeKeyFlagLedgerAsync( uint32_t ledgerSequence, @@ -436,28 +133,82 @@ BackendIndexer::writeKeyFlagLedgerAsync( << __func__ << " starting. sequence = " << std::to_string(ledgerSequence); - waitForCaches(); - auto keysCopy = keysCumulative; - boost::asio::post(ioc_, [=, this, &backend]() { - writeKeyFlagLedger(ledgerSequence, keyShift_, backend, keysCopy); - }); - BOOST_LOG_TRIVIAL(info) - << __func__ - << " finished. sequence = " << std::to_string(ledgerSequence); -} -void -BackendIndexer::writeBookFlagLedgerAsync( - uint32_t ledgerSequence, - BackendInterface const& backend) -{ - BOOST_LOG_TRIVIAL(info) - << __func__ - << " starting. sequence = " << std::to_string(ledgerSequence); + boost::asio::post(ioc_, [this, ledgerSequence, &backend]() { + std::unordered_set keys; + auto nextFlag = getKeyIndexOfSeq(ledgerSequence + 1); + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) + << " starting"; + ripple::uint256 zero = {}; + std::optional cursor; + size_t numKeys = 0; + auto begin = std::chrono::system_clock::now(); + while (true) + { + try + { + auto start = std::chrono::system_clock::now(); + auto [objects, curCursor, warning] = + backend.fetchLedgerPage(cursor, ledgerSequence, 2048); + auto mid = std::chrono::system_clock::now(); + // no cursor means this is the first page + if (!cursor) + { + // if there is no warning, we don't need to do a repair + // warning only shows up on the first page + if (warning) + { + BOOST_LOG_TRIVIAL(error) + << "writeKeyFlagLedger - " + << " prev flag ledger not written " + << std::to_string(nextFlag.keyIndex) << " : " + << std::to_string(ledgerSequence); + assert(false); + throw std::runtime_error("Missing prev flag"); + } + } - waitForCaches(); - auto booksCopy = booksCumulative; - boost::asio::post(ioc_, [=, this, &backend]() { - writeBookFlagLedger(ledgerSequence, bookShift_, backend, booksCopy); + cursor = curCursor; + for (auto& obj : objects) + { + keys.insert(obj.key); + } + backend.writeKeys(keys, nextFlag, true); + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(debug) + << "writeKeyFlagLedger - " + << std::to_string(nextFlag.keyIndex) << " fetched a page " + << " cursor = " + << (cursor.has_value() ? ripple::strHex(*cursor) + : std::string{}) + << " num keys = " << std::to_string(numKeys) + << " fetch time = " + << std::chrono::duration_cast( + mid - start) + .count() + << " write time = " + << std::chrono::duration_cast( + end - mid) + .count(); + if (!cursor) + break; + } + catch (DatabaseTimeout const& e) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " Database timeout fetching keys"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + } + } + backend.writeKeys({zero}, nextFlag, true); + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) + << " finished. " + << " num keys = " << std::to_string(numKeys) << " total time = " + << std::chrono::duration_cast( + end - begin) + .count(); }); BOOST_LOG_TRIVIAL(info) << __func__ @@ -472,7 +223,6 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) << " starting. sequence = " << std::to_string(ledgerSequence); bool isFirst = false; auto keyIndex = getKeyIndexOfSeq(ledgerSequence); - auto bookIndex = getBookIndexOfSeq(ledgerSequence); if (isFirst_) { auto rng = backend.fetchLedgerRangeNoThrow(); @@ -481,26 +231,18 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) else { keyIndex = KeyIndex{ledgerSequence}; - bookIndex = BookIndex{ledgerSequence}; } } - { - std::unique_lock lck(mtx); - updateCv_.wait(lck, [this]() { return updatesOutstanding_ == 0; }); - } backend.writeKeys(keys, keyIndex); - backend.writeBooks(books, bookIndex); if (isFirst_) { // write completion record ripple::uint256 zero = {}; - backend.writeBooks({{zero, {zero}}}, bookIndex); backend.writeKeys({zero}, keyIndex); } isFirst_ = false; keys = {}; - books = {}; BOOST_LOG_TRIVIAL(info) << __func__ << " finished. sequence = " << std::to_string(ledgerSequence); diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 78f54221..fb60a107 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -83,70 +83,18 @@ class BackendIndexer std::mutex mutex_; std::optional work_; std::thread ioThread_; - std::thread updateThread_; - std::atomic_uint32_t updatesOutstanding_ = 0; - std::condition_variable updateCv_; uint32_t keyShift_ = 20; - uint32_t bookShift_ = 10; std::unordered_set keys; - std::unordered_set keysCumulative; - std::unordered_map> - books; - std::unordered_map> - booksCumulative; - bool populatingCacheAsync = false; - // These are only used when the cache is being populated asynchronously - std::unordered_set deletedKeys; - std::unordered_map> - deletedBooks; - std::unordered_set keysRepair; - std::unordered_map> - booksRepair; - std::mutex mtx; - std::condition_variable cacheCv_; mutable bool isFirst_ = true; - void - addKeyAsync(ripple::uint256 const& key); - void - addBookOfferAsync( - ripple::uint256 const& book, - ripple::uint256 const& offerKey); - public: BackendIndexer(boost::json::object const& config); ~BackendIndexer(); void - populateCachesAsync(BackendInterface const& backend); - void - populateCaches(BackendInterface const& backend); - void - clearCaches(); - // Blocking, possibly for minutes - void - waitForCaches(); - - void - writeLedgerObject( - ripple::uint256&& key, - std::optional&& book, - bool isCreated, - bool isDeleted); - - void - addKey(ripple::uint256 const& key); - void - deleteKey(ripple::uint256 const& key); - void - addBookOffer(ripple::uint256 const& book, ripple::uint256 const& offerKey); - - void - deleteBookOffer( - ripple::uint256 const& book, - ripple::uint256 const& offerKey); + addKey(ripple::uint256&& key); void finish(uint32_t ledgerSequence, BackendInterface const& backend); @@ -155,22 +103,9 @@ public: uint32_t ledgerSequence, BackendInterface const& backend); void - writeBookFlagLedgerAsync( - uint32_t ledgerSequence, - BackendInterface const& backend); - void doKeysRepair( BackendInterface const& backend, std::optional sequence); - void - doBooksRepair( - BackendInterface const& backend, - std::optional sequence); - uint32_t - getBookShift() - { - return bookShift_; - } uint32_t getKeyShift() { @@ -191,24 +126,6 @@ public: { return (ledgerSequence % (1 << keyShift_)) == 0; } - BookIndex - getBookIndexOfSeq(uint32_t seq) const - { - if (isBookFlagLedger(seq)) - return BookIndex{seq}; - auto incr = (1 << bookShift_); - BookIndex index{(seq >> bookShift_ << bookShift_) + incr}; - assert(isBookFlagLedger(index.bookIndex)); - assert( - bookShift_ == keyShift_ || !isKeyFlagLedger(index.bookIndex) || - !isKeyFlagLedger(index.bookIndex + incr)); - return index; - } - bool - isBookFlagLedger(uint32_t ledgerSequence) const - { - return (ledgerSequence % (1 << bookShift_)) == 0; - } }; class BackendInterface @@ -241,18 +158,6 @@ public: return KeyIndex{seq}; return indexer_.getKeyIndexOfSeq(seq); } - std::optional - getBookIndexOfSeq(uint32_t seq) const - { - if (indexer_.isBookFlagLedger(seq)) - return BookIndex{seq}; - auto rng = fetchLedgerRange(); - if (!rng) - return {}; - if (rng->minSequence == seq) - return BookIndex{seq}; - return indexer_.getBookIndexOfSeq(seq); - } bool finishWrites(uint32_t ledgerSequence) const @@ -266,9 +171,8 @@ public: auto rng = fetchLedgerRangeNoThrow(); if (rng && rng->minSequence != ledgerSequence) isFirst_ = false; + indexer_.doKeysRepair(*this, ledgerSequence); } - if (indexer_.isBookFlagLedger(ledgerSequence) || isFirst_) - indexer_.writeBookFlagLedgerAsync(ledgerSequence, *this); if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst_) indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); isFirst_ = false; @@ -315,8 +219,76 @@ public: virtual std::vector fetchAllTransactionHashesInLedger(uint32_t ledgerSequence) const = 0; - virtual LedgerPage + LedgerPage fetchLedgerPage( + std::optional const& cursor, + std::uint32_t ledgerSequence, + std::uint32_t limit) const + { + assert(limit != 0); + bool incomplete = false; + { + auto check = doFetchLedgerPage({}, ledgerSequence, 1); + incomplete = check.warning.has_value(); + } + uint32_t adjustedLimit = limit; + LedgerPage page; + page.cursor = cursor; + do + { + adjustedLimit = adjustedLimit > 2048 ? 2048 : adjustedLimit * 2; + auto partial = + doFetchLedgerPage(page.cursor, ledgerSequence, adjustedLimit); + page.objects.insert( + page.objects.end(), + partial.objects.begin(), + partial.objects.end()); + page.cursor = partial.cursor; + } while (page.objects.size() < limit && page.cursor); + if (incomplete) + { + std::cout << "checking lower" << std::endl; + uint32_t lowerSequence = ledgerSequence >> indexer_.getKeyShift() + << indexer_.getKeyShift(); + auto lowerPage = fetchLedgerPage(cursor, lowerSequence, limit); + std::vector keys; + std::transform( + std::move_iterator(lowerPage.objects.begin()), + std::move_iterator(lowerPage.objects.end()), + std::back_inserter(keys), + [](auto&& elt) { return std::move(elt.key); }); + auto objs = fetchLedgerObjects(keys, ledgerSequence); + for (size_t i = 0; i < keys.size(); ++i) + { + auto& obj = objs[i]; + auto& key = keys[i]; + if (obj.size()) + page.objects.push_back({std::move(key), std::move(obj)}); + } + std::sort( + page.objects.begin(), page.objects.end(), [](auto a, auto b) { + return a.key < b.key; + }); + page.warning = "Data may be incomplete"; + } + if (page.objects.size() >= limit) + { + page.objects.resize(limit); + page.cursor = page.objects.back().key; + } + return page; + } + + std::optional + fetchSuccessor(ripple::uint256 key, uint32_t ledgerSequence) + { + auto page = fetchLedgerPage({++key}, ledgerSequence, 1); + if (page.objects.size()) + return page.objects[0]; + return {}; + } + virtual LedgerPage + doFetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, std::uint32_t limit) const = 0; @@ -363,8 +335,7 @@ public: std::optional&& book) const { ripple::uint256 key256 = ripple::uint256::fromVoid(key.data()); - indexer_.writeLedgerObject( - std::move(key256), std::move(book), isCreated, isDeleted); + indexer_.addKey(std::move(key256)); doWriteLedgerObject( std::move(key), seq, @@ -418,13 +389,6 @@ public: std::unordered_set const& keys, KeyIndex const& index, bool isAsync = false) const = 0; - virtual bool - writeBooks( - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books, - BookIndex const& index, - bool isAsync = false) const = 0; virtual ~BackendInterface() { diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index d22f0e5a..7c4152ab 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -394,7 +394,7 @@ CassandraBackend::fetchLedgerDiff(uint32_t ledgerSequence) const return objects; } LedgerPage -CassandraBackend::fetchLedgerPage( +CassandraBackend::doFetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, std::uint32_t limit) const @@ -433,7 +433,7 @@ CassandraBackend::fetchLedgerPage( if (keys.size() && keys.size() == limit) { page.cursor = keys.back(); - keys.pop_back(); + ++(*page.cursor); } auto objects = fetchLedgerObjects(keys, ledgerSequence); if (objects.size() != keys.size()) @@ -501,124 +501,7 @@ CassandraBackend::fetchBookOffers( std::uint32_t limit, std::optional const& cursor) const { - auto rng = fetchLedgerRange(); - auto limitTuningFactor = 50; - - if (!rng) - return {{}, {}}; - - auto readBooks = - [this, &book, &limit, &limitTuningFactor](std::uint32_t sequence) - -> std::pair< - bool, - std::vector>> { - CassandraStatement completeQuery{completeBook_}; - completeQuery.bindInt(sequence); - CassandraResult completeResult = executeSyncRead(completeQuery); - bool complete = completeResult.hasResult(); - - CassandraStatement statement{selectBook_}; - std::vector> keys = {}; - - statement.bindBytes(book.data(), 24); - statement.bindInt(sequence); - - BOOST_LOG_TRIVIAL(info) - << __func__ << " upper = " << std::to_string(sequence) << " book = " - << ripple::strHex(std::string((char*)book.data(), 24)); - - ripple::uint256 zero = beast::zero; - statement.bindBytes(zero.data(), 8); - statement.bindBytes(zero); - - statement.bindUInt(limit * limitTuningFactor); - - auto start = std::chrono::system_clock::now(); - - CassandraResult result = executeSyncRead(statement); - - auto end = std::chrono::system_clock::now(); - auto duration = ((end - start).count()) / 1000000000.0; - - BOOST_LOG_TRIVIAL(info) << "Book directory fetch took " - << std::to_string(duration) << " seconds."; - - BOOST_LOG_TRIVIAL(debug) << __func__ << " - got keys"; - if (!result) - { - return {false, {{}, {}}}; - } - - do - { - auto [quality, index] = result.getBytesTuple(); - std::uint64_t q = 0; - memcpy(&q, quality.data(), 8); - keys.push_back({q, ripple::uint256::fromVoid(index.data())}); - - } while (result.nextRow()); - - return {complete, keys}; - }; - - auto upper = getBookIndexOfSeq(ledgerSequence); - auto [complete, quality_keys] = readBooks(upper->bookIndex); - - BOOST_LOG_TRIVIAL(debug) - << __func__ << " - populated keys. num keys = " << quality_keys.size(); - - std::optional warning = {}; - if (!complete) - { - warning = "Data may be incomplete"; - BOOST_LOG_TRIVIAL(info) << "May be incomplete. Fetching other page"; - - auto bookShift = indexer_.getBookShift(); - std::uint32_t lower = upper->bookIndex - (1 << bookShift); - auto originalKeys = std::move(quality_keys); - auto [lowerComplete, otherKeys] = readBooks(lower); - - assert(lowerComplete); - - std::vector> merged_keys; - merged_keys.reserve(originalKeys.size() + otherKeys.size()); - std::merge( - originalKeys.begin(), - originalKeys.end(), - otherKeys.begin(), - otherKeys.end(), - std::back_inserter(merged_keys), - [](auto pair1, auto pair2) { return pair1.first < pair2.first; }); - } - - std::vector merged(quality_keys.size()); - std::transform( - quality_keys.begin(), - quality_keys.end(), - std::back_inserter(merged), - [](auto pair) { return pair.second; }); - - auto uniqEnd = std::unique(merged.begin(), merged.end()); - std::vector keys{merged.begin(), uniqEnd}; - - std::cout << keys.size() << std::endl; - - auto start = std::chrono::system_clock::now(); - std::vector objs = fetchLedgerObjects(keys, ledgerSequence); - auto end = std::chrono::system_clock::now(); - auto duration = ((end - start).count()) / 1000000000.0; - - BOOST_LOG_TRIVIAL(info) - << "Book object fetch took " << std::to_string(duration) << " seconds."; - - std::vector results; - for (size_t i = 0; i < objs.size(); ++i) - { - if (objs[i].size() != 0) - results.push_back({keys[i], objs[i]}); - } - - return {results, {}, warning}; + return {}; } // namespace Backend struct WriteBookCallbackData { @@ -907,57 +790,6 @@ CassandraBackend::writeKeys( return true; } -bool -CassandraBackend::writeBooks( - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books, - BookIndex const& index, - bool isAsync) const -{ - BOOST_LOG_TRIVIAL(info) - << __func__ << " Ledger = " << std::to_string(index.bookIndex) - << " . num books = " << std::to_string(books.size()); - std::condition_variable cv; - std::mutex mtx; - std::vector> cbs; - uint32_t concurrentLimit = - isAsync ? indexerMaxRequestsOutstanding : maxRequestsOutstanding; - std::atomic_uint32_t numOutstanding = 0; - size_t count = 0; - auto start = std::chrono::system_clock::now(); - for (auto& book : books) - { - for (auto& offer : book.second) - { - ++numOutstanding; - ++count; - cbs.push_back(std::make_shared( - *this, - book.first, - offer, - index.bookIndex, - cv, - mtx, - numOutstanding)); - writeBook(*cbs.back()); - BOOST_LOG_TRIVIAL(trace) << __func__ << "Submitted a write request"; - std::unique_lock lck(mtx); - BOOST_LOG_TRIVIAL(trace) << __func__ << "Got the mutex"; - cv.wait(lck, [&numOutstanding, concurrentLimit]() { - return numOutstanding < concurrentLimit; - }); - } - } - BOOST_LOG_TRIVIAL(info) << __func__ - << "Submitted all book writes. Waiting for them to " - "finish. num submitted = " - << std::to_string(count); - std::unique_lock lck(mtx); - cv.wait(lck, [&numOutstanding]() { return numOutstanding == 0; }); - BOOST_LOG_TRIVIAL(info) << __func__ << "Finished writing books"; - return true; -} bool CassandraBackend::isIndexed(uint32_t ledgerSequence) const { @@ -1445,18 +1277,17 @@ CassandraBackend::open(bool readOnly) cass_cluster_set_connect_timeout(cluster, 10000); int ttl = getInt("ttl") ? *getInt("ttl") * 2 : 0; - int keysTtl, - keysIncr = ttl != 0 ? pow(2, indexer_.getKeyShift()) * 4 * 2 : 0; + int keysTtl = (ttl != 0 ? pow(2, indexer_.getKeyShift()) * 4 * 2 : 0); + int incr = keysTtl; while (keysTtl < ttl) { - keysTtl += keysIncr; - } - int booksTtl, - booksIncr = ttl != 0 ? pow(2, indexer_.getBookShift()) * 4 * 2 : 0; - while (booksTtl < ttl) - { - booksTtl += booksIncr; + keysTtl += incr; } + int booksTtl = 0; + BOOST_LOG_TRIVIAL(info) + << __func__ << " setting ttl to " << std::to_string(ttl) + << " , books ttl to " << std::to_string(booksTtl) << " , keys ttl to " + << std::to_string(keysTtl); auto executeSimpleStatement = [this](std::string const& query) { CassStatement* statement = makeStatement(query.c_str(), 0); @@ -1529,7 +1360,7 @@ CassandraBackend::open(bool readOnly) << " ( key blob, sequence bigint, object blob, PRIMARY " "KEY(key, " "sequence)) WITH CLUSTERING ORDER BY (sequence DESC) AND" - << " default_time_to_live = " << ttl; + << " default_time_to_live = " << std::to_string(ttl); if (!executeSimpleStatement(query.str())) continue; @@ -1544,7 +1375,7 @@ CassandraBackend::open(bool readOnly) << " ( hash blob PRIMARY KEY, ledger_sequence bigint, " "transaction " "blob, metadata blob)" - << " WITH default_time_to_live = " << ttl; + << " WITH default_time_to_live = " << std::to_string(ttl); if (!executeSimpleStatement(query.str())) continue; @@ -1571,7 +1402,7 @@ CassandraBackend::open(bool readOnly) << " ( sequence bigint, key blob, PRIMARY KEY " "(sequence, key))" " WITH default_time_to_live = " - << keysTtl; + << std::to_string(keysTtl); if (!executeSimpleStatement(query.str())) continue; @@ -1582,28 +1413,13 @@ CassandraBackend::open(bool readOnly) continue; query.str(""); - query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "books" - << " ( book blob, sequence bigint, quality_key tuple, PRIMARY KEY " - "((book, sequence), quality_key)) WITH CLUSTERING ORDER BY " - "(quality_key " - "ASC) AND default_time_to_live = " - << booksTtl; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); - query << "SELECT * FROM " << tablePrefix << "books" - << " LIMIT 1"; - if (!executeSimpleStatement(query.str())) - continue; - query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "account_tx" << " ( account blob, seq_idx tuple, " " hash blob, " "PRIMARY KEY " "(account, seq_idx)) WITH " "CLUSTERING ORDER BY (seq_idx desc)" - << " AND default_time_to_live = " << ttl; + << " AND default_time_to_live = " << std::to_string(ttl); if (!executeSimpleStatement(query.str())) continue; @@ -1617,7 +1433,7 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "ledgers" << " ( sequence bigint PRIMARY KEY, header blob )" - << " WITH default_time_to_live = " << ttl; + << " WITH default_time_to_live = " << std::to_string(ttl); if (!executeSimpleStatement(query.str())) continue; @@ -1630,7 +1446,7 @@ CassandraBackend::open(bool readOnly) query.str(""); query << "CREATE TABLE IF NOT EXISTS " << tablePrefix << "ledger_hashes" << " (hash blob PRIMARY KEY, sequence bigint)" - << " WITH default_time_to_live = " << ttl; + << " WITH default_time_to_live = " << std::to_string(ttl); if (!executeSimpleStatement(query.str())) continue; @@ -1680,13 +1496,6 @@ CassandraBackend::open(bool readOnly) if (!insertKey_.prepareStatement(query, session_.get())) continue; - query.str(""); - query << "INSERT INTO " << tablePrefix << "books" - << " (book, sequence, quality_key) VALUES (?, ?, (?, ?))"; - if (!insertBook2_.prepareStatement(query, session_.get())) - continue; - query.str(""); - query.str(""); query << "SELECT key FROM " << tablePrefix << "keys" << " WHERE sequence = ? AND key >= ? ORDER BY key ASC LIMIT ?"; @@ -1755,23 +1564,6 @@ CassandraBackend::open(bool readOnly) if (!getToken_.prepareStatement(query, session_.get())) continue; - query.str(""); - query << "SELECT quality_key FROM " << tablePrefix << "books " - << " WHERE book = ? AND sequence = ?" - << " AND quality_key >= (?, ?)" - " ORDER BY quality_key ASC " - " LIMIT ?"; - if (!selectBook_.prepareStatement(query, session_.get())) - continue; - - query.str(""); - query << "SELECT * FROM " << tablePrefix << "books " - << "WHERE book = " - << "0x000000000000000000000000000000000000000000000000" - << " AND sequence = ?"; - if (!completeBook_.prepareStatement(query, session_.get())) - continue; - query.str(""); query << " INSERT INTO " << tablePrefix << "account_tx" << " (account, seq_idx, hash) " diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index f9b326ca..09b97a1a 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -957,7 +957,7 @@ public: CassandraResult result = executeSyncRead(statement); if (!result) { - BOOST_LOG_TRIVIAL(error) << __func__ << " - no rows"; + BOOST_LOG_TRIVIAL(debug) << __func__ << " - no rows"; return {}; } return result.getBytes(); @@ -997,7 +997,7 @@ public: return {{result.getBytes(), result.getBytes(), result.getUInt32()}}; } LedgerPage - fetchLedgerPage( + doFetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, std::uint32_t limit) const override; @@ -1019,13 +1019,6 @@ public: std::unordered_set const& keys, KeyIndex const& index, bool isAsync = false) const override; - bool - writeBooks( - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books, - BookIndex const& index, - bool isAsync = false) const override; BookOffersPage fetchBookOffers( ripple::uint256 const& book, diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index d6bae3c1..55572ed9 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -324,7 +324,7 @@ PostgresBackend::fetchAllTransactionHashesInLedger( } LedgerPage -PostgresBackend::fetchLedgerPage( +PostgresBackend::doFetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, std::uint32_t limit) const @@ -338,7 +338,7 @@ PostgresBackend::fetchLedgerPage( sql << "SELECT key FROM keys WHERE ledger_seq = " << std::to_string(index->keyIndex); if (cursor) - sql << " AND key > \'\\x" << ripple::strHex(*cursor) << "\'"; + sql << " AND key >= \'\\x" << ripple::strHex(*cursor) << "\'"; sql << " ORDER BY key ASC LIMIT " << std::to_string(limit); BOOST_LOG_TRIVIAL(debug) << __func__ << sql.str(); auto res = pgQuery(sql.str().data()); @@ -352,7 +352,10 @@ PostgresBackend::fetchLedgerPage( keys.push_back({res.asUInt256(i, 0)}); } if (numRows == limit) + { returnCursor = keys.back(); + ++(*returnCursor); + } auto objs = fetchLedgerObjects(keys, ledgerSequence); std::vector results; @@ -379,155 +382,7 @@ PostgresBackend::fetchBookOffers( std::uint32_t limit, std::optional const& cursor) const { - auto rng = fetchLedgerRange(); - auto limitTuningFactor = 50; - - if (!rng) - return {{}, {}}; - - ripple::uint256 bookBase = - ripple::keylet::quality({ripple::ltDIR_NODE, book}, 0).key; - ripple::uint256 bookEnd = ripple::getQualityNext(bookBase); - - using bookKeyPair = std::pair; - auto getBooks = [this, &bookBase, &bookEnd, &limit, &limitTuningFactor]( - std::uint32_t sequence) - -> std::pair> { - BOOST_LOG_TRIVIAL(info) << __func__ << ": Fetching books between " - << "0x" << ripple::strHex(bookBase) << " and " - << "0x" << ripple::strHex(bookEnd) - << "at ledger " << std::to_string(sequence); - - auto start = std::chrono::system_clock::now(); - - std::stringstream sql; - sql << "SELECT COUNT(*) FROM books WHERE " - << "book = \'\\x" << ripple::strHex(ripple::uint256(beast::zero)) - << "\' AND ledger_seq = " << std::to_string(sequence); - - bool complete; - PgQuery pgQuery(this->pgPool_); - auto res = pgQuery(sql.str().data()); - if (size_t numRows = checkResult(res, 1)) - complete = res.asInt(0, 0) != 0; - else - return {false, {}}; - - sql.str(""); - sql << "SELECT book, offer_key FROM books " - << "WHERE ledger_seq = " << std::to_string(sequence) - << " AND book >= " - << "\'\\x" << ripple::strHex(bookBase) << "\' " - << "AND book < " - << "\'\\x" << ripple::strHex(bookEnd) << "\' " - << "ORDER BY book ASC " - << "LIMIT " << std::to_string(limit * limitTuningFactor); - - BOOST_LOG_TRIVIAL(debug) << sql.str(); - - res = pgQuery(sql.str().data()); - - auto end = std::chrono::system_clock::now(); - auto duration = ((end - start).count()) / 1000000000.0; - - BOOST_LOG_TRIVIAL(info) << "Postgres book key fetch took " - << std::to_string(duration) << " seconds"; - - if (size_t numRows = checkResult(res, 2)) - { - std::vector results(numRows); - for (size_t i = 0; i < numRows; ++i) - { - auto book = res.asUInt256(i, 0); - auto key = res.asUInt256(i, 1); - - results.push_back({std::move(book), std::move(key)}); - } - - return {complete, results}; - } - - return {complete, {}}; - }; - - auto fetchObjects = - [this]( - std::vector const& pairs, - std::uint32_t sequence, - std::uint32_t limit, - std::optional warning) -> BookOffersPage { - std::vector allKeys(pairs.size()); - for (auto const& pair : pairs) - allKeys.push_back(pair.second); - - auto uniqEnd = std::unique(allKeys.begin(), allKeys.end()); - std::vector keys{allKeys.begin(), uniqEnd}; - - auto start = std::chrono::system_clock::now(); - - auto ledgerEntries = fetchLedgerObjects(keys, sequence); - - auto end = std::chrono::system_clock::now(); - auto duration = ((end - start).count()) / 1000000000.0; - - BOOST_LOG_TRIVIAL(info) - << "Postgres book objects fetch took " << std::to_string(duration) - << " seconds. " - << "Fetched " << std::to_string(ledgerEntries.size()) - << " ledger entries"; - - std::vector objects; - for (auto i = 0; i < ledgerEntries.size(); ++i) - { - if (ledgerEntries[i].size() != 0) - objects.push_back(LedgerObject{keys[i], ledgerEntries[i]}); - } - - return {objects, {}, warning}; - }; - - std::uint32_t bookShift = indexer_.getBookShift(); - auto upper = getBookIndexOfSeq(ledgerSequence); - - auto [upperComplete, upperResults] = getBooks(upper->bookIndex); - - BOOST_LOG_TRIVIAL(info) << __func__ << ": Upper results found " - << upperResults.size() << " books."; - - if (upperComplete) - { - BOOST_LOG_TRIVIAL(info) << "Upper book page is complete"; - return fetchObjects(upperResults, ledgerSequence, limit, {}); - } - - BOOST_LOG_TRIVIAL(info) << "Upper book page is not complete " - << "fetching again"; - - auto lower = upper->bookIndex - (1 << bookShift); - if (lower < rng->minSequence) - lower = rng->minSequence; - - auto [lowerComplete, lowerResults] = getBooks(lower); - - BOOST_LOG_TRIVIAL(info) << __func__ << ": Lower results found " - << lowerResults.size() << " books."; - - assert(lowerComplete); - - std::vector pairs; - pairs.reserve(upperResults.size() + lowerResults.size()); - std::merge( - upperResults.begin(), - upperResults.end(), - lowerResults.begin(), - lowerResults.end(), - std::back_inserter(pairs), - [](bookKeyPair pair1, bookKeyPair pair2) -> bool { - return pair1.first < pair2.first; - }); - - std::optional warning = "book data may be incomplete"; - return fetchObjects(pairs, ledgerSequence, limit, warning); + return {}; } std::vector @@ -841,8 +696,6 @@ PostgresBackend::writeKeys( std::stringstream temp; buffer.swap(temp); numRows = 0; - if (isAsync) - std::this_thread::sleep_for(std::chrono::seconds(1)); } } if (isAsync) @@ -854,54 +707,6 @@ PostgresBackend::writeKeys( return true; } bool -PostgresBackend::writeBooks( - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books, - BookIndex const& index, - bool isAsync) const -{ - if (abortWrite_) - return false; - BOOST_LOG_TRIVIAL(debug) << __func__; - - PgQuery pgQuery(pgPool_); - PgQuery& conn = isAsync ? pgQuery : writeConnection_; - std::stringstream asyncBuffer; - std::stringstream& buffer = isAsync ? asyncBuffer : booksBuffer_; - if (isAsync) - conn("BEGIN"); - size_t numRows = 0; - for (auto& book : books) - { - for (auto& offer : book.second) - { - buffer << std::to_string(index.bookIndex) << '\t' << "\\\\x" - << ripple::strHex(book.first) << '\t' << "\\\\x" - << ripple::strHex(offer) << '\n'; - numRows++; - // If the buffer gets too large, the insert fails. Not sure why. - // When writing in the background, we insert after every 10 rows - if ((isAsync && numRows == 1000) || numRows == 100000) - { - conn.bulkInsert("books", buffer.str()); - std::stringstream temp; - buffer.swap(temp); - numRows = 0; - if (isAsync) - std::this_thread::sleep_for(std::chrono::seconds(1)); - } - } - } - if (isAsync) - { - if (numRows > 0) - conn.bulkInsert("books", buffer.str()); - conn("COMMIT"); - } - return true; -} -bool PostgresBackend::doOnlineDelete(uint32_t numLedgersToKeep) const { auto rng = fetchLedgerRangeNoThrow(); diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index c4890ecf..d551f42d 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -16,7 +16,7 @@ private: std::shared_ptr pgPool_; mutable PgQuery writeConnection_; mutable bool abortWrite_ = false; - mutable boost::asio::thread_pool pool_{200}; + mutable boost::asio::thread_pool pool_{16}; uint32_t writeInterval_ = 1000000; public: @@ -46,7 +46,7 @@ public: fetchAllTransactionHashesInLedger(uint32_t ledgerSequence) const override; LedgerPage - fetchLedgerPage( + doFetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, std::uint32_t limit) const override; @@ -120,13 +120,6 @@ public: std::unordered_set const& keys, KeyIndex const& index, bool isAsync = false) const override; - bool - writeBooks( - std::unordered_map< - ripple::uint256, - std::unordered_set> const& books, - BookIndex const& index, - bool isAsync = false) const override; }; } // namespace Backend #endif diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index 023aaed7..9b093a23 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -375,7 +375,6 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) BOOST_LOG_TRIVIAL(info) << __func__ << " : " << "Populating caches"; - flatMapBackend_->getIndexer().populateCachesAsync(*flatMapBackend_); BOOST_LOG_TRIVIAL(info) << __func__ << " : " << "Populated caches"; @@ -541,7 +540,6 @@ ReportingETL::runETLPipeline(uint32_t startSequence, int numExtractors) << "Extracted and wrote " << *lastPublishedSequence - startSequence << " in " << ((end - begin).count()) / 1000000000.0; writing_ = false; - flatMapBackend_->getIndexer().clearCaches(); BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "Stopping etl pipeline"; diff --git a/unittests/main.cpp b/unittests/main.cpp new file mode 100644 index 00000000..4ac86e25 --- /dev/null +++ b/unittests/main.cpp @@ -0,0 +1,727 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Demonstrate some basic assertions. +TEST(BackendTest, Basic) +{ + boost::log::core::get()->set_filter( + boost::log::trivial::severity >= boost::log::trivial::warning); + std::string keyspace = + "oceand_test_" + + std::to_string( + std::chrono::system_clock::now().time_since_epoch().count()); + boost::json::object cassandraConfig{ + {"database", + {{"type", "cassandra"}, + {"cassandra", + {{"contact_points", "127.0.0.1"}, + {"port", 9042}, + {"keyspace", keyspace.c_str()}, + {"replication_factor", 1}, + {"table_prefix", ""}, + {"max_requests_outstanding", 1000}, + {"indexer_key_shift", 2}, + {"threads", 8}}}}}}; + boost::json::object postgresConfig{ + {"database", + {{"type", "postgres"}, + {"postgres", + {{"contact_point", "127.0.0.1"}, + {"username", "postgres"}, + {"database", keyspace.c_str()}, + {"password", "postgres"}, + {"indexer_key_shift", 2}, + {"threads", 8}}}}}}; + std::vector configs = { + cassandraConfig, postgresConfig}; + for (auto& config : configs) + { + std::cout << keyspace << std::endl; + auto backend = Backend::makeBackend(config); + backend->open(false); + + std::string rawHeader = + "03C3141A01633CD656F91B4EBB5EB89B791BD34DBC8A04BB6F407C5335BC54351E" + "DD73" + "3898497E809E04074D14D271E4832D7888754F9230800761563A292FA2315A6DB6" + "FE30" + "CC5909B285080FCD6773CC883F9FE0EE4D439340AC592AADB973ED3CF53E2232B3" + "3EF5" + "7CECAC2816E3122816E31A0A00F8377CD95DFA484CFAE282656A58CE5AA29652EF" + "FD80" + "AC59CD91416E4E13DBBE"; + + auto hexStringToBinaryString = [](auto const& hex) { + auto blob = ripple::strUnHex(hex); + std::string strBlob; + for (auto c : *blob) + { + strBlob += c; + } + return strBlob; + }; + auto binaryStringToUint256 = [](auto const& bin) -> ripple::uint256 { + ripple::uint256 uint; + return uint.fromVoid((void const*)bin.data()); + }; + auto ledgerInfoToBinaryString = [](auto const& info) { + auto blob = ledgerInfoToBlob(info); + std::string strBlob; + for (auto c : blob) + { + strBlob += c; + } + return strBlob; + }; + + std::string rawHeaderBlob = hexStringToBinaryString(rawHeader); + ripple::LedgerInfo lgrInfo = + deserializeHeader(ripple::makeSlice(rawHeaderBlob)); + + backend->startWrites(); + backend->writeLedger(lgrInfo, std::move(rawHeaderBlob), true); + ASSERT_TRUE(backend->finishWrites(lgrInfo.seq)); + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng.has_value()); + EXPECT_EQ(rng->minSequence, rng->maxSequence); + EXPECT_EQ(rng->maxSequence, lgrInfo.seq); + } + { + auto seq = backend->fetchLatestLedgerSequence(); + EXPECT_TRUE(seq.has_value()); + EXPECT_EQ(*seq, lgrInfo.seq); + } + + { + auto retLgr = backend->fetchLedgerBySequence(lgrInfo.seq); + EXPECT_TRUE(retLgr.has_value()); + EXPECT_EQ(retLgr->seq, lgrInfo.seq); + EXPECT_EQ(ledgerInfoToBlob(lgrInfo), ledgerInfoToBlob(*retLgr)); + } + + EXPECT_FALSE( + backend->fetchLedgerBySequence(lgrInfo.seq + 1).has_value()); + auto lgrInfoOld = lgrInfo; + + auto lgrInfoNext = lgrInfo; + lgrInfoNext.seq = lgrInfo.seq + 1; + lgrInfoNext.parentHash = lgrInfo.hash; + lgrInfoNext.hash++; + lgrInfoNext.accountHash = ~lgrInfo.accountHash; + { + std::string rawHeaderBlob = ledgerInfoToBinaryString(lgrInfoNext); + + backend->startWrites(); + backend->writeLedger(lgrInfoNext, std::move(rawHeaderBlob)); + ASSERT_TRUE(backend->finishWrites(lgrInfoNext.seq)); + } + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng.has_value()); + EXPECT_EQ(rng->minSequence, lgrInfoOld.seq); + EXPECT_EQ(rng->maxSequence, lgrInfoNext.seq); + } + { + auto seq = backend->fetchLatestLedgerSequence(); + EXPECT_EQ(seq, lgrInfoNext.seq); + } + { + auto retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq); + EXPECT_TRUE(retLgr.has_value()); + EXPECT_EQ(retLgr->seq, lgrInfoNext.seq); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoOld)); + retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 1); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoOld)); + + EXPECT_NE(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq - 2); + EXPECT_FALSE(backend->fetchLedgerBySequence(lgrInfoNext.seq - 2) + .has_value()); + + auto txns = backend->fetchAllTransactionsInLedger(lgrInfoNext.seq); + EXPECT_EQ(txns.size(), 0); + auto hashes = + backend->fetchAllTransactionHashesInLedger(lgrInfoNext.seq); + EXPECT_EQ(hashes.size(), 0); + } + + // the below dummy data is not expected to be consistent. The metadata + // string does represent valid metadata. Don't assume though that the + // transaction or its hash correspond to the metadata, or anything like + // that. These tests are purely binary tests to make sure the same data + // that goes in, comes back out + std::string metaHex = + "201C0000001AF8E411006F560A3E08122A05AC91DEFA87052B0554E4A29B46" + "3A27642EBB060B6052196592EEE72200000000240480FDB52503CE1A863300" + "000000000000003400000000000000005529983CBAED30F547471452921C3C" + "6B9F9685F292F6291000EED0A44413AF18C250101AC09600F4B502C8F7F830" + "F80B616DCB6F3970CB79AB70975A05ED5B66860B9564400000001FE217CB65" + "D54B640B31521B05000000000000000000000000434E5900000000000360E3" + "E0751BD9A566CD03FA6CAFC78118B82BA081142252F328CF91263417762570" + "D67220CCB33B1370E1E1E3110064561AC09600F4B502C8F7F830F80B616DCB" + "6F3970CB79AB70975A05ED33DF783681E8365A05ED33DF783681581AC09600" + "F4B502C8F7F830F80B616DCB6F3970CB79AB70975A05ED33DF783681031100" + "0000000000000000000000434E59000000000004110360E3E0751BD9A566CD" + "03FA6CAFC78118B82BA0E1E1E4110064561AC09600F4B502C8F7F830F80B61" + "6DCB6F3970CB79AB70975A05ED5B66860B95E72200000000365A05ED5B6686" + "0B95581AC09600F4B502C8F7F830F80B616DCB6F3970CB79AB70975A05ED5B" + "66860B95011100000000000000000000000000000000000000000211000000" + "00000000000000000000000000000000000311000000000000000000000000" + "434E59000000000004110360E3E0751BD9A566CD03FA6CAFC78118B82BA0E1" + "E1E311006F5647B05E66DE9F3DF2689E8F4CE6126D3136B6C5E79587F9D24B" + "D71A952B0852BAE8240480FDB950101AC09600F4B502C8F7F830F80B616DCB" + "6F3970CB79AB70975A05ED33DF78368164400000033C83A95F65D59D9A6291" + "9C2D18000000000000000000000000434E5900000000000360E3E0751BD9A5" + "66CD03FA6CAFC78118B82BA081142252F328CF91263417762570D67220CCB3" + "3B1370E1E1E511006456AEA3074F10FE15DAC592F8A0405C61FB7D4C98F588" + "C2D55C84718FAFBBD2604AE722000000003100000000000000003200000000" + "0000000058AEA3074F10FE15DAC592F8A0405C61FB7D4C98F588C2D55C8471" + "8FAFBBD2604A82142252F328CF91263417762570D67220CCB33B1370E1E1E5" + "1100612503CE1A8755CE935137F8C6C8DEF26B5CD93BE18105CA83F65E1E90" + "CEC546F562D25957DC0856E0311EB450B6177F969B94DBDDA83E99B7A0576A" + "CD9079573876F16C0C004F06E6240480FDB9624000000005FF0E2BE1E72200" + "000000240480FDBA2D00000005624000000005FF0E1F81142252F328CF9126" + "3417762570D67220CCB33B1370E1E1F1031000"; + std::string txnHex = + "1200072200000000240480FDB920190480FDB5201B03CE1A8964400000033C" + "83A95F65D59D9A62919C2D18000000000000000000000000434E5900000000" + "000360E3E0751BD9A566CD03FA6CAFC78118B82BA068400000000000000C73" + "21022D40673B44C82DEE1DDB8B9BB53DCCE4F97B27404DB850F068DD91D685" + "E337EA7446304402202EA6B702B48B39F2197112382838F92D4C02948E9911" + "FE6B2DEBCF9183A426BC022005DAC06CD4517E86C2548A80996019F3AC60A0" + "9EED153BF60C992930D68F09F981142252F328CF91263417762570D67220CC" + "B33B1370"; + std::string hashHex = + "0A81FB3D6324C2DCF73131505C6E4DC67981D7FC39F5E9574CEC4B1F22D28BF7"; + + // this account is not related to the above transaction and metadata + std::string accountHex = + "1100612200000000240480FDBC2503CE1A872D0000000555516931B2AD018EFFBE" + "17C5" + "C9DCCF872F36837C2C6136ACF80F2A24079CF81FD0624000000005FF0E07811422" + "52F3" + "28CF91263417762570D67220CCB33B1370"; + std::string accountIndexHex = + "E0311EB450B6177F969B94DBDDA83E99B7A0576ACD9079573876F16C0C004F06"; + + std::string metaBlob = hexStringToBinaryString(metaHex); + std::string txnBlob = hexStringToBinaryString(txnHex); + std::string hashBlob = hexStringToBinaryString(hashHex); + std::string accountBlob = hexStringToBinaryString(accountHex); + std::string accountIndexBlob = hexStringToBinaryString(accountIndexHex); + std::vector affectedAccounts; + + { + backend->startWrites(); + lgrInfoNext.seq = lgrInfoNext.seq + 1; + lgrInfoNext.txHash = ~lgrInfo.txHash; + lgrInfoNext.accountHash = + lgrInfoNext.accountHash ^ lgrInfoNext.txHash; + lgrInfoNext.parentHash = lgrInfoNext.hash; + lgrInfoNext.hash++; + + ripple::uint256 hash256; + EXPECT_TRUE(hash256.parseHex(hashHex)); + ripple::TxMeta txMeta{hash256, lgrInfoNext.seq, metaBlob}; + auto journal = ripple::debugLog(); + auto accountsSet = txMeta.getAffectedAccounts(journal); + for (auto& a : accountsSet) + { + affectedAccounts.push_back(a); + } + + std::vector accountTxData; + accountTxData.emplace_back(txMeta, hash256, journal); + backend->writeLedger( + lgrInfoNext, std::move(ledgerInfoToBinaryString(lgrInfoNext))); + backend->writeTransaction( + std::move(std::string{hashBlob}), + lgrInfoNext.seq, + std::move(std::string{txnBlob}), + std::move(std::string{metaBlob})); + backend->writeAccountTransactions(std::move(accountTxData)); + backend->writeLedgerObject( + std::move(std::string{accountIndexBlob}), + lgrInfoNext.seq, + std::move(std::string{accountBlob}), + true, + false, + {}); + + ASSERT_TRUE(backend->finishWrites(lgrInfoNext.seq)); + } + + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng); + EXPECT_EQ(rng->minSequence, lgrInfoOld.seq); + EXPECT_EQ(rng->maxSequence, lgrInfoNext.seq); + auto retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq); + EXPECT_TRUE(retLgr); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + auto txns = backend->fetchAllTransactionsInLedger(lgrInfoNext.seq); + EXPECT_EQ(txns.size(), 1); + EXPECT_STREQ( + (const char*)txns[0].transaction.data(), + (const char*)txnBlob.data()); + EXPECT_STREQ( + (const char*)txns[0].metadata.data(), + (const char*)metaBlob.data()); + auto hashes = + backend->fetchAllTransactionHashesInLedger(lgrInfoNext.seq); + EXPECT_EQ(hashes.size(), 1); + EXPECT_EQ(ripple::strHex(hashes[0]), hashHex); + for (auto& a : affectedAccounts) + { + auto accountTxns = backend->fetchAccountTransactions(a, 100); + EXPECT_EQ(accountTxns.first.size(), 1); + EXPECT_EQ(accountTxns.first[0], txns[0]); + EXPECT_FALSE(accountTxns.second); + } + + ripple::uint256 key256; + EXPECT_TRUE(key256.parseHex(accountIndexHex)); + auto obj = backend->fetchLedgerObject(key256, lgrInfoNext.seq); + EXPECT_TRUE(obj); + EXPECT_STREQ( + (const char*)obj->data(), (const char*)accountBlob.data()); + obj = backend->fetchLedgerObject(key256, lgrInfoNext.seq + 1); + EXPECT_TRUE(obj); + EXPECT_STREQ( + (const char*)obj->data(), (const char*)accountBlob.data()); + obj = backend->fetchLedgerObject(key256, lgrInfoOld.seq - 1); + EXPECT_FALSE(obj); + } + // obtain a time-based seed: + unsigned seed = + std::chrono::system_clock::now().time_since_epoch().count(); + std::string accountBlobOld = accountBlob; + { + backend->startWrites(); + lgrInfoNext.seq = lgrInfoNext.seq + 1; + lgrInfoNext.parentHash = lgrInfoNext.hash; + lgrInfoNext.hash++; + lgrInfoNext.txHash = lgrInfoNext.txHash ^ lgrInfoNext.accountHash; + lgrInfoNext.accountHash = + ~(lgrInfoNext.accountHash ^ lgrInfoNext.txHash); + + backend->writeLedger( + lgrInfoNext, std::move(ledgerInfoToBinaryString(lgrInfoNext))); + std::shuffle( + accountBlob.begin(), + accountBlob.end(), + std::default_random_engine(seed)); + backend->writeLedgerObject( + std::move(std::string{accountIndexBlob}), + lgrInfoNext.seq, + std::move(std::string{accountBlob}), + true, + false, + {}); + + ASSERT_TRUE(backend->finishWrites(lgrInfoNext.seq)); + } + { + auto rng = backend->fetchLedgerRange(); + EXPECT_TRUE(rng); + EXPECT_EQ(rng->minSequence, lgrInfoOld.seq); + EXPECT_EQ(rng->maxSequence, lgrInfoNext.seq); + auto retLgr = backend->fetchLedgerBySequence(lgrInfoNext.seq); + EXPECT_TRUE(retLgr); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfoNext)); + auto txns = backend->fetchAllTransactionsInLedger(lgrInfoNext.seq); + EXPECT_EQ(txns.size(), 0); + + ripple::uint256 key256; + EXPECT_TRUE(key256.parseHex(accountIndexHex)); + auto obj = backend->fetchLedgerObject(key256, lgrInfoNext.seq); + EXPECT_TRUE(obj); + EXPECT_STREQ( + (const char*)obj->data(), (const char*)accountBlob.data()); + obj = backend->fetchLedgerObject(key256, lgrInfoNext.seq + 1); + EXPECT_TRUE(obj); + EXPECT_STREQ( + (const char*)obj->data(), (const char*)accountBlob.data()); + obj = backend->fetchLedgerObject(key256, lgrInfoNext.seq - 1); + EXPECT_TRUE(obj); + EXPECT_STREQ( + (const char*)obj->data(), (const char*)accountBlobOld.data()); + obj = backend->fetchLedgerObject(key256, lgrInfoOld.seq - 1); + EXPECT_FALSE(obj); + } + + auto generateObjects = [seed]( + size_t numObjects, uint32_t ledgerSequence) { + std::vector> res{numObjects}; + ripple::uint256 key; + key = ledgerSequence * 100000; + + for (auto& blob : res) + { + ++key; + std::string keyStr{(const char*)key.data(), key.size()}; + blob.first = keyStr; + blob.second = std::to_string(ledgerSequence) + keyStr; + } + return res; + }; + auto updateObjects = [](uint32_t ledgerSequence, auto objs) { + for (auto& [key, obj] : objs) + { + obj = std::to_string(ledgerSequence) + obj; + } + return objs; + }; + auto generateTxns = [seed](size_t numTxns, uint32_t ledgerSequence) { + std::vector> res{ + numTxns}; + ripple::uint256 base; + base = ledgerSequence * 100000; + for (auto& blob : res) + { + ++base; + std::string hashStr{(const char*)base.data(), base.size()}; + std::string txnStr = + "tx" + std::to_string(ledgerSequence) + hashStr; + std::string metaStr = + "meta" + std::to_string(ledgerSequence) + hashStr; + blob = std::make_tuple(hashStr, txnStr, metaStr); + } + return res; + }; + auto generateAccounts = [](uint32_t ledgerSequence, + uint32_t numAccounts) { + std::vector accounts; + ripple::AccountID base; + base = ledgerSequence * 998765; + for (size_t i = 0; i < numAccounts; ++i) + { + ++base; + accounts.push_back(base); + } + return accounts; + }; + auto generateAccountTx = [&](uint32_t ledgerSequence, auto txns) { + std::vector ret; + auto accounts = generateAccounts(ledgerSequence, 10); + std::srand(std::time(nullptr)); + uint32_t idx = 0; + for (auto& [hash, txn, meta] : txns) + { + AccountTransactionsData data; + data.ledgerSequence = ledgerSequence; + data.transactionIndex = idx; + data.txHash = hash; + for (size_t i = 0; i < 3; ++i) + { + data.accounts.insert( + accounts[std::rand() % accounts.size()]); + } + ++idx; + ret.push_back(data); + } + return ret; + }; + + auto generateNextLedger = [seed](auto lgrInfo) { + ++lgrInfo.seq; + lgrInfo.parentHash = lgrInfo.hash; + std::srand(std::time(nullptr)); + std::shuffle( + lgrInfo.txHash.begin(), + lgrInfo.txHash.end(), + std::default_random_engine(seed)); + std::shuffle( + lgrInfo.accountHash.begin(), + lgrInfo.accountHash.end(), + std::default_random_engine(seed)); + std::shuffle( + lgrInfo.hash.begin(), + lgrInfo.hash.end(), + std::default_random_engine(seed)); + return lgrInfo; + }; + auto writeLedger = + [&](auto lgrInfo, auto txns, auto objs, auto accountTx) { + std::cout << "writing ledger = " << std::to_string(lgrInfo.seq); + backend->startWrites(); + + backend->writeLedger( + lgrInfo, std::move(ledgerInfoToBinaryString(lgrInfo))); + for (auto [hash, txn, meta] : txns) + { + backend->writeTransaction( + std::move(hash), + lgrInfo.seq, + std::move(txn), + std::move(meta)); + } + for (auto [key, obj] : objs) + { + std::optional bookDir; + if (isOffer(obj.data())) + bookDir = getBook(obj); + backend->writeLedgerObject( + std::move(key), + lgrInfo.seq, + std::move(obj), + true, + false, + std::move(bookDir)); + } + backend->writeAccountTransactions(std::move(accountTx)); + + ASSERT_TRUE(backend->finishWrites(lgrInfo.seq)); + }; + + auto checkLedger = [&](auto lgrInfo, + auto txns, + auto objs, + auto accountTx) { + auto rng = backend->fetchLedgerRange(); + auto seq = lgrInfo.seq; + EXPECT_TRUE(rng); + EXPECT_EQ(rng->minSequence, lgrInfoOld.seq); + EXPECT_GE(rng->maxSequence, seq); + auto retLgr = backend->fetchLedgerBySequence(seq); + EXPECT_TRUE(retLgr); + EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); + // retLgr = backend->fetchLedgerByHash(lgrInfo.hash); + // EXPECT_TRUE(retLgr); + // EXPECT_EQ(ledgerInfoToBlob(*retLgr), ledgerInfoToBlob(lgrInfo)); + auto retTxns = backend->fetchAllTransactionsInLedger(seq); + for (auto [hash, txn, meta] : txns) + { + bool found = false; + for (auto [retTxn, retMeta, retSeq] : retTxns) + { + if (std::strncmp( + (const char*)retTxn.data(), + (const char*)txn.data(), + txn.size()) == 0 && + std::strncmp( + (const char*)retMeta.data(), + (const char*)meta.data(), + meta.size()) == 0) + found = true; + } + ASSERT_TRUE(found); + } + for (auto [account, data] : accountTx) + { + std::vector retData; + std::optional cursor; + do + { + uint32_t limit = 10; + auto res = backend->fetchAccountTransactions( + account, limit, cursor); + if (res.second) + EXPECT_EQ(res.first.size(), limit); + retData.insert( + retData.end(), res.first.begin(), res.first.end()); + cursor = res.second; + } while (cursor); + EXPECT_EQ(retData.size(), data.size()); + for (size_t i = 0; i < retData.size(); ++i) + { + auto [txn, meta, seq] = retData[i]; + auto [hash, expTxn, expMeta] = data[i]; + EXPECT_STREQ( + (const char*)txn.data(), (const char*)expTxn.data()); + EXPECT_STREQ( + (const char*)meta.data(), (const char*)expMeta.data()); + } + } + for (auto [key, obj] : objs) + { + auto retObj = + backend->fetchLedgerObject(binaryStringToUint256(key), seq); + if (obj.size()) + { + ASSERT_TRUE(retObj.has_value()); + EXPECT_STREQ( + (const char*)obj.data(), (const char*)retObj->data()); + } + else + { + ASSERT_FALSE(retObj.has_value()); + } + } + Backend::LedgerPage page; + std::vector retObjs; + size_t numLoops = 0; + do + { + uint32_t limit = 10; + page = backend->fetchLedgerPage(page.cursor, seq, limit); + if (page.cursor) + EXPECT_EQ(page.objects.size(), limit); + retObjs.insert( + retObjs.end(), page.objects.begin(), page.objects.end()); + ++numLoops; + ASSERT_FALSE(page.warning.has_value()); + } while (page.cursor); + for (auto obj : objs) + { + bool found = false; + bool correct = false; + for (auto retObj : retObjs) + { + if (ripple::strHex(obj.first) == ripple::strHex(retObj.key)) + { + found = true; + ASSERT_EQ( + ripple::strHex(obj.second), + ripple::strHex(retObj.blob)); + } + } + ASSERT_EQ(found, obj.second.size() != 0); + } + }; + + std::map>> + state; + std::map< + uint32_t, + std::vector>> + allTxns; + std::unordered_map> + allTxnsMap; + std:: + map>> + allAccountTx; + std::map lgrInfos; + for (size_t i = 0; i < 10; ++i) + { + lgrInfoNext = generateNextLedger(lgrInfoNext); + auto objs = generateObjects(25, lgrInfoNext.seq); + auto txns = generateTxns(10, lgrInfoNext.seq); + auto accountTx = generateAccountTx(lgrInfoNext.seq, txns); + for (auto rec : accountTx) + { + for (auto account : rec.accounts) + { + allAccountTx[lgrInfoNext.seq][account].push_back( + std::string{ + (const char*)rec.txHash.data(), rec.txHash.size()}); + } + } + EXPECT_EQ(objs.size(), 25); + EXPECT_NE(objs[0], objs[1]); + EXPECT_EQ(txns.size(), 10); + EXPECT_NE(txns[0], txns[1]); + writeLedger(lgrInfoNext, txns, objs, accountTx); + state[lgrInfoNext.seq] = objs; + allTxns[lgrInfoNext.seq] = txns; + lgrInfos[lgrInfoNext.seq] = lgrInfoNext; + for (auto& [hash, txn, meta] : txns) + { + allTxnsMap[hash] = std::make_pair(txn, meta); + } + } + + std::vector> objs; + for (size_t i = 0; i < 10; ++i) + { + lgrInfoNext = generateNextLedger(lgrInfoNext); + if (!objs.size()) + objs = generateObjects(25, lgrInfoNext.seq); + else + objs = updateObjects(lgrInfoNext.seq, objs); + auto txns = generateTxns(10, lgrInfoNext.seq); + auto accountTx = generateAccountTx(lgrInfoNext.seq, txns); + for (auto rec : accountTx) + { + for (auto account : rec.accounts) + { + allAccountTx[lgrInfoNext.seq][account].push_back( + std::string{ + (const char*)rec.txHash.data(), rec.txHash.size()}); + } + } + EXPECT_EQ(objs.size(), 25); + EXPECT_NE(objs[0], objs[1]); + EXPECT_EQ(txns.size(), 10); + EXPECT_NE(txns[0], txns[1]); + writeLedger(lgrInfoNext, txns, objs, accountTx); + state[lgrInfoNext.seq] = objs; + allTxns[lgrInfoNext.seq] = txns; + lgrInfos[lgrInfoNext.seq] = lgrInfoNext; + for (auto& [hash, txn, meta] : txns) + { + allTxnsMap[hash] = std::make_pair(txn, meta); + } + } + std::cout << "WROTE ALL OBJECTS" << std::endl; + auto flatten = [&](uint32_t max) { + std::vector> flat; + std::map objs; + for (auto [seq, diff] : state) + { + for (auto [k, v] : diff) + { + if (seq > max) + { + if (objs.count(k) == 0) + objs[k] = ""; + } + else + { + objs[k] = v; + } + } + } + for (auto [key, value] : objs) + { + flat.push_back(std::make_pair(key, value)); + } + return flat; + }; + + auto flattenAccountTx = [&](uint32_t max) { + std::unordered_map< + ripple::AccountID, + std::vector>> + accountTx; + for (auto [seq, map] : allAccountTx) + { + if (seq > max) + break; + for (auto& [account, hashes] : map) + { + for (auto& hash : hashes) + { + auto& [txn, meta] = allTxnsMap[hash]; + accountTx[account].push_back( + std::make_tuple(hash, txn, meta)); + } + } + } + for (auto& [account, data] : accountTx) + std::reverse(data.begin(), data.end()); + return accountTx; + }; + + for (auto [seq, diff] : state) + { + std::cout << "flatteneing" << std::endl; + auto flat = flatten(seq); + std::cout << "flattened" << std::endl; + checkLedger( + lgrInfos[seq], allTxns[seq], flat, flattenAccountTx(seq)); + std::cout << "checked" << std::endl; + } + } +} + From 7f16951a186dd1bee410ed610fea129450d27a46 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Fri, 28 May 2021 22:05:08 -0400 Subject: [PATCH 16/25] clean up logs --- reporting/BackendIndexer.cpp | 53 ++-------------------------------- reporting/CassandraBackend.cpp | 4 +-- reporting/Pg.cpp | 2 +- reporting/ReportingETL.cpp | 4 +-- 4 files changed, 7 insertions(+), 56 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index fe4f5510..0866809d 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -21,55 +21,6 @@ BackendIndexer::addKey(ripple::uint256&& key) keys.insert(std::move(key)); } -void -writeKeyFlagLedger( - uint32_t ledgerSequence, - uint32_t shift, - BackendInterface const& backend, - std::unordered_set const& keys) -{ - uint32_t nextFlag = ((ledgerSequence >> shift << shift) + (1 << shift)); - ripple::uint256 zero = {}; - BOOST_LOG_TRIVIAL(info) - << __func__ - << " starting. ledgerSequence = " << std::to_string(ledgerSequence) - << " nextFlag = " << std::to_string(nextFlag) - << " keys.size() = " << std::to_string(keys.size()); - while (true) - { - try - { - auto [objects, curCursor, warning] = - backend.fetchLedgerPage({}, nextFlag, 1); - if (!warning) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " flag ledger already written. sequence = " - << std::to_string(ledgerSequence) - << " next flag = " << std::to_string(nextFlag) - << "returning"; - return; - } - break; - } - catch (DatabaseTimeout& t) - { - ; - } - } - auto start = std::chrono::system_clock::now(); - - backend.writeKeys(keys, KeyIndex{nextFlag}, true); - backend.writeKeys({zero}, KeyIndex{nextFlag}, true); - auto end = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(info) - << __func__ - << " finished. ledgerSequence = " << std::to_string(ledgerSequence) - << " nextFlag = " << std::to_string(nextFlag) - << " keys.size() = " << std::to_string(keys.size()) - << std::chrono::duration_cast(end - start) - .count(); -} void BackendIndexer::doKeysRepair( BackendInterface const& backend, @@ -218,7 +169,7 @@ BackendIndexer::writeKeyFlagLedgerAsync( void BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) { - BOOST_LOG_TRIVIAL(info) + BOOST_LOG_TRIVIAL(debug) << __func__ << " starting. sequence = " << std::to_string(ledgerSequence); bool isFirst = false; @@ -243,7 +194,7 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) } isFirst_ = false; keys = {}; - BOOST_LOG_TRIVIAL(info) + BOOST_LOG_TRIVIAL(debug) << __func__ << " finished. sequence = " << std::to_string(ledgerSequence); } diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index 7c4152ab..b8794a31 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -752,7 +752,7 @@ CassandraBackend::writeKeys( cbs.reserve(keys.size()); uint32_t concurrentLimit = isAsync ? indexerMaxRequestsOutstanding : keys.size(); - BOOST_LOG_TRIVIAL(info) + BOOST_LOG_TRIVIAL(debug) << __func__ << " Ledger = " << std::to_string(index.keyIndex) << " . num keys = " << std::to_string(keys.size()) << " . concurrentLimit = " @@ -779,7 +779,7 @@ CassandraBackend::writeKeys( concurrentLimit; }); if (numSubmitted % 100000 == 0) - BOOST_LOG_TRIVIAL(info) + BOOST_LOG_TRIVIAL(debug) << __func__ << " Submitted " << std::to_string(numSubmitted) << " write requests. Completed " << (keys.size() - numRemaining); diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 9f61df1e..705e820c 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -244,7 +244,7 @@ Pg::bulkInsert(char const* table, std::string const& records) assert(conn_.get()); auto copyCmd = boost::format(R"(COPY %s FROM stdin)"); auto formattedCmd = boost::str(copyCmd % table); - BOOST_LOG_TRIVIAL(info) << __func__ << " " << formattedCmd; + BOOST_LOG_TRIVIAL(debug) << __func__ << " " << formattedCmd; auto res = query(formattedCmd.c_str()); if (!res || res.status() != PGRES_COPY_IN) { diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index 9b093a23..1727232c 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -313,7 +313,7 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) auto end = std::chrono::system_clock::now(); auto duration = ((end - start).count()) / 1000000000.0; - BOOST_LOG_TRIVIAL(info) + BOOST_LOG_TRIVIAL(debug) << __func__ << " Accumulated " << std::to_string(accumTxns_) << " transactions. Wrote in " << std::to_string(duration) << " transactions per second = " @@ -321,7 +321,7 @@ ReportingETL::buildNextLedger(org::xrpl::rpc::v1::GetLedgerResponse& rawData) accumTxns_ = 0; } else - BOOST_LOG_TRIVIAL(info) << __func__ << " skipping commit"; + BOOST_LOG_TRIVIAL(debug) << __func__ << " skipping commit"; BOOST_LOG_TRIVIAL(debug) << __func__ << " : " << "Inserted/modified/deleted all objects. Number of objects = " From 4c12bb51dde250daeffafb256812ef53a8c1c985 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Sat, 29 May 2021 09:52:02 -0400 Subject: [PATCH 17/25] remove books table from postgres --- reporting/Pg.cpp | 9 --------- reporting/PostgresBackend.cpp | 5 ----- reporting/PostgresBackend.h | 1 - 3 files changed, 15 deletions(-) diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 705e820c..3742faf6 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -841,15 +841,6 @@ create table if not exists account_transactions5 partition of account_transactio create table if not exists account_transactions6 partition of account_transactions for values from (50000000) to (60000000); create table if not exists account_transactions7 partition of account_transactions for values from (60000000) to (70000000); --- Table that maps a book to a list of offers in that book. Deletes from the ledger table --- cascade here based on ledger_seq. -CREATE TABLE IF NOT EXISTS books ( - ledger_seq bigint NOT NULL, - book bytea NOT NULL, - offer_key bytea NOT NULL -); - -CREATE INDEX book_idx ON books using btree(ledger_seq, book, offer_key); CREATE TABLE IF NOT EXISTS keys ( ledger_seq bigint NOT NULL, diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 55572ed9..a9e89422 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -643,9 +643,6 @@ PostgresBackend::doFinishWrites() const std::string keysStr = keysBuffer_.str(); if (keysStr.size()) writeConnection_.bulkInsert("keys", keysStr); - std::string booksStr = booksBuffer_.str(); - if (booksStr.size()) - writeConnection_.bulkInsert("books", booksStr); } auto res = writeConnection_("COMMIT"); if (!res || res.status() != PGRES_COMMAND_OK) @@ -658,8 +655,6 @@ PostgresBackend::doFinishWrites() const transactionsBuffer_.clear(); objectsBuffer_.str(""); objectsBuffer_.clear(); - booksBuffer_.str(""); - booksBuffer_.clear(); keysBuffer_.str(""); keysBuffer_.clear(); accountTxBuffer_.str(""); diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index d551f42d..48f46bf3 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -9,7 +9,6 @@ class PostgresBackend : public BackendInterface private: mutable size_t numRowsInObjectsBuffer_ = 0; mutable std::stringstream objectsBuffer_; - mutable std::stringstream booksBuffer_; mutable std::stringstream keysBuffer_; mutable std::stringstream transactionsBuffer_; mutable std::stringstream accountTxBuffer_; From 756436dd311230b331ee22be8294bcd03711ca37 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Sat, 29 May 2021 16:41:37 -0400 Subject: [PATCH 18/25] base case for fetch ledger page --- reporting/BackendInterface.h | 16 +++++++++++++++- reporting/Pg.cpp | 2 +- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index fb60a107..256a5ea3 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -247,9 +247,23 @@ public: } while (page.objects.size() < limit && page.cursor); if (incomplete) { - std::cout << "checking lower" << std::endl; + auto rng = fetchLedgerRange(); + if (!rng) + return page; + if (rng->minSequence == ledgerSequence) + { + BOOST_LOG_TRIVIAL(fatal) + << __func__ + << " Database is populated but first flag ledger is " + "incomplete. This should never happen"; + assert(false); + throw std::runtime_error("Missing base flag ledger"); + } + BOOST_LOG_TRIVIAL(debug) << __func__ << " recursing"; uint32_t lowerSequence = ledgerSequence >> indexer_.getKeyShift() << indexer_.getKeyShift(); + if (lowerSequence < rng->minSequence) + lowerSequence = rng->minSequence; auto lowerPage = fetchLedgerPage(cursor, lowerSequence, limit); std::vector keys; std::transform( diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index 3742faf6..a3626592 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -790,7 +790,7 @@ CREATE TABLE IF NOT EXISTS objects ( object bytea ) PARTITION BY RANGE (ledger_seq); -CREATE INDEX objects_idx ON objects USING btree(ledger_seq,key); +CREATE INDEX objects_idx ON objects USING btree(key,ledger_seq); create table if not exists objects1 partition of objects for values from (0) to (10000000); create table if not exists objects2 partition of objects for values from (10000000) to (20000000); From 9a4949263e656715424de3b771b2386ab87a52ba Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Sat, 29 May 2021 16:53:15 -0400 Subject: [PATCH 19/25] make keysRepair async --- reporting/BackendIndexer.cpp | 9 +++++++++ reporting/BackendInterface.h | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 0866809d..624a4866 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -74,6 +74,15 @@ BackendIndexer::doKeysRepair( BOOST_LOG_TRIVIAL(info) << __func__ << " finished. sequence = " << std::to_string(*sequence); } +void +BackendIndexer::doKeysRepairAsync( + BackendInterface const& backend, + std::optional sequence) +{ + boost::asio::post(ioc_, [this, sequence, &backend]() { + doKeysRepair(backend, sequence); + }); +} void BackendIndexer::writeKeyFlagLedgerAsync( diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 256a5ea3..9924dc0b 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -88,6 +88,10 @@ class BackendIndexer std::unordered_set keys; mutable bool isFirst_ = true; + void + doKeysRepair( + BackendInterface const& backend, + std::optional sequence); public: BackendIndexer(boost::json::object const& config); @@ -103,7 +107,7 @@ public: uint32_t ledgerSequence, BackendInterface const& backend); void - doKeysRepair( + doKeysRepairAsync( BackendInterface const& backend, std::optional sequence); uint32_t @@ -171,7 +175,7 @@ public: auto rng = fetchLedgerRangeNoThrow(); if (rng && rng->minSequence != ledgerSequence) isFirst_ = false; - indexer_.doKeysRepair(*this, ledgerSequence); + indexer_.doKeysRepairAsync(*this, ledgerSequence); } if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst_) indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); From 46d4ee454852e33078c279ac83a76a01d712ebc8 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 1 Jun 2021 15:10:14 +0000 Subject: [PATCH 20/25] clear out keys buffer in postgres. Fix postgres schema --- reporting/BackendIndexer.cpp | 13 ++++++++++--- reporting/Pg.cpp | 2 -- reporting/PostgresBackend.cpp | 2 ++ reporting/ReportingETL.cpp | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 624a4866..f450e649 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -52,15 +52,22 @@ BackendIndexer::doKeysRepair( // warning only shows up on the first page if (!warning) { - BOOST_LOG_TRIVIAL(debug) - << __func__ << " flag ledger already written. returning"; + BOOST_LOG_TRIVIAL(info) + << __func__ << " - " << std::to_string(*sequence) + << " flag ledger already written. returning"; return; } else { + BOOST_LOG_TRIVIAL(info) + << __func__ << " - " << std::to_string(*sequence) + << " flag ledger not written. recursing.."; uint32_t lower = (*sequence - 1) >> keyShift_ << keyShift_; doKeysRepair(backend, lower); - writeKeyFlagLedgerAsync(lower, backend); + BOOST_LOG_TRIVIAL(info) + << __func__ << " - " << std::to_string(*sequence) + << " finished recursing. submitting repair "; + writeKeyFlagLedgerAsync(*sequence, backend); return; } } diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index a3626592..e9ce37e6 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -965,8 +965,6 @@ CREATE OR REPLACE RULE account_transactions_update_protect AS ON UPDATE TO account_transactions DO INSTEAD NOTHING; CREATE OR REPLACE RULE objects_update_protect AS ON UPDATE TO objects DO INSTEAD NOTHING; -CREATE OR REPLACE RULE books_update_protect AS ON UPDATE TO - books DO INSTEAD NOTHING; -- Return the earliest ledger sequence intended for range operations diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index a9e89422..06441fac 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -697,6 +697,8 @@ PostgresBackend::writeKeys( { if (numRows > 0) conn.bulkInsert("keys", buffer.str()); + std::stringstream temp; + buffer.swap(temp); conn("COMMIT"); } return true; diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index 1727232c..a70e4335 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -69,7 +69,7 @@ ReportingETL::insertTransactions( auto metaSerializer = std::make_shared( txMeta.getAsObject().getSerializer()); - BOOST_LOG_TRIVIAL(debug) + BOOST_LOG_TRIVIAL(trace) << __func__ << " : " << "Inserting transaction = " << sttx.getTransactionID(); From d692f7f675d1ed095f2ba314b476644ffde89687 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 1 Jun 2021 13:33:52 -0400 Subject: [PATCH 21/25] add server info --- CMakeLists.txt | 3 +- handlers/AccountInfo.cpp | 4 +-- handlers/AccountTx.cpp | 4 +-- handlers/BookOffers.cpp | 2 +- handlers/Ledger.cpp | 18 ++--------- handlers/LedgerData.cpp | 9 ++++-- handlers/LedgerEntry.cpp | 2 +- handlers/RPCHelpers.cpp | 25 ++++++++++++-- handlers/RPCHelpers.h | 8 +++-- handlers/ServerInfo.cpp | 54 +++++++++++++++++++++++++++++++ handlers/Tx.cpp | 4 +-- reporting/BackendIndexer.cpp | 23 +++++++++++-- reporting/BackendInterface.h | 24 ++++++++++---- reporting/ETLSource.h | 38 ++++++++++++++++------ reporting/ReportingETL.cpp | 7 ++++ reporting/ReportingETL.h | 14 ++++---- server/websocket_server_async.cpp | 22 +++++++++---- 17 files changed, 200 insertions(+), 61 deletions(-) create mode 100644 handlers/ServerInfo.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 39874c95..ce99aa68 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,7 +82,8 @@ target_sources(reporting PRIVATE handlers/BookOffers.cpp handlers/LedgerRange.cpp handlers/Ledger.cpp - handlers/LedgerEntry.cpp) + handlers/LedgerEntry.cpp + handlers/ServerInfo.cpp) message(${Boost_LIBRARIES}) diff --git a/handlers/AccountInfo.cpp b/handlers/AccountInfo.cpp index 34581626..18375ac7 100644 --- a/handlers/AccountInfo.cpp +++ b/handlers/AccountInfo.cpp @@ -104,7 +104,7 @@ doAccountInfo( { response["success"] = "fetched successfully!"; if (!binary) - response["object"] = getJson(sle); + response["object"] = toJson(sle); else response["object"] = ripple::strHex(*dbResponse); response["db_time"] = time; @@ -124,7 +124,7 @@ doAccountInfo( // support multiple SignerLists on one account. auto const sleSigners = ledger->read(keylet::signers(accountID)); if (sleSigners) - jvSignerList.append(sleSigners->getJson(JsonOptions::none)); + jvSignerList.append(sleSigners->toJson(JsonOptions::none)); result[jss::account_data][jss::signer_lists] = std::move(jvSignerList); diff --git a/handlers/AccountTx.cpp b/handlers/AccountTx.cpp index c4eae77b..ad00d34d 100644 --- a/handlers/AccountTx.cpp +++ b/handlers/AccountTx.cpp @@ -110,8 +110,8 @@ doAccountTx(boost::json::object const& request, BackendInterface const& backend) if (!binary) { auto [txn, meta] = deserializeTxPlusMeta(txnPlusMeta); - obj["transaction"] = getJson(*txn); - obj["metadata"] = getJson(*meta); + obj["transaction"] = toJson(*txn); + obj["metadata"] = toJson(*meta); } else { diff --git a/handlers/BookOffers.cpp b/handlers/BookOffers.cpp index d0e37ddb..603baa06 100644 --- a/handlers/BookOffers.cpp +++ b/handlers/BookOffers.cpp @@ -304,7 +304,7 @@ doBookOffers( ripple::SLE offer{it, obj.key}; ripple::uint256 bookDir = offer.getFieldH256(ripple::sfBookDirectory); - boost::json::object offerJson = getJson(offer); + boost::json::object offerJson = toJson(offer); offerJson["quality"] = ripple::amountFromQuality(getQuality(bookDir)).getText(); jsonOffers.push_back(offerJson); } diff --git a/handlers/Ledger.cpp b/handlers/Ledger.cpp index b03a8920..b82168bd 100644 --- a/handlers/Ledger.cpp +++ b/handlers/Ledger.cpp @@ -37,19 +37,7 @@ doLedger(boost::json::object const& request, BackendInterface const& backend) } else { - header["ledger_sequence"] = lgrInfo->seq; - header["ledger_hash"] = ripple::strHex(lgrInfo->hash); - header["txns_hash"] = ripple::strHex(lgrInfo->txHash); - header["state_hash"] = ripple::strHex(lgrInfo->accountHash); - header["parent_hash"] = ripple::strHex(lgrInfo->parentHash); - header["total_coins"] = ripple::to_string(lgrInfo->drops); - header["close_flags"] = lgrInfo->closeFlags; - - // Always show fields that contribute to the ledger hash - header["parent_close_time"] = - lgrInfo->parentCloseTime.time_since_epoch().count(); - header["close_time"] = lgrInfo->closeTime.time_since_epoch().count(); - header["close_time_resolution"] = lgrInfo->closeTimeResolution.count(); + header = toJson(*lgrInfo); } response["header"] = header; if (getTransactions) @@ -70,8 +58,8 @@ doLedger(boost::json::object const& request, BackendInterface const& backend) if (!binary) { auto [sttx, meta] = deserializeTxPlusMeta(obj); - entry["transaction"] = getJson(*sttx); - entry["metadata"] = getJson(*meta); + entry["transaction"] = toJson(*sttx); + entry["metadata"] = toJson(*meta); } else { diff --git a/handlers/LedgerData.cpp b/handlers/LedgerData.cpp index c43bf709..cd12fdb7 100644 --- a/handlers/LedgerData.cpp +++ b/handlers/LedgerData.cpp @@ -59,7 +59,12 @@ doLedgerData( { BOOST_LOG_TRIVIAL(debug) << __func__ << " : parsing cursor"; cursor = ripple::uint256{}; - cursor->parseHex(request.at("cursor").as_string().c_str()); + if (!cursor->parseHex(request.at("cursor").as_string().c_str())) + { + response["error"] = "Invalid cursor"; + response["request"] = request; + return response; + } } bool binary = request.contains("binary") ? request.at("binary").as_bool() : false; @@ -91,7 +96,7 @@ doLedgerData( objects.push_back(entry); } else - objects.push_back(getJson(sle)); + objects.push_back(toJson(sle)); } response["objects"] = objects; if (returnedCursor) diff --git a/handlers/LedgerEntry.cpp b/handlers/LedgerEntry.cpp index 724db6c8..dcccc583 100644 --- a/handlers/LedgerEntry.cpp +++ b/handlers/LedgerEntry.cpp @@ -47,7 +47,7 @@ doLedgerEntry( { ripple::STLedgerEntry sle{ ripple::SerialIter{dbResponse->data(), dbResponse->size()}, key}; - response["object"] = getJson(sle); + response["object"] = toJson(sle); } return response; diff --git a/handlers/RPCHelpers.cpp b/handlers/RPCHelpers.cpp index 191c6790..62d609e9 100644 --- a/handlers/RPCHelpers.cpp +++ b/handlers/RPCHelpers.cpp @@ -42,7 +42,7 @@ deserializeTxPlusMeta(Backend::TransactionAndMetadata const& blobs) } boost::json::object -getJson(ripple::STBase const& obj) +toJson(ripple::STBase const& obj) { auto start = std::chrono::system_clock::now(); boost::json::value value = boost::json::parse( @@ -55,7 +55,7 @@ getJson(ripple::STBase const& obj) } boost::json::object -getJson(ripple::SLE const& sle) +toJson(ripple::SLE const& sle) { auto start = std::chrono::system_clock::now(); boost::json::value value = boost::json::parse( @@ -66,6 +66,27 @@ getJson(ripple::SLE const& sle) .count(); return value.as_object(); } + +boost::json::object +toJson(ripple::LedgerInfo const& lgrInfo) +{ + boost::json::object header; + header["ledger_sequence"] = lgrInfo.seq; + header["ledger_hash"] = ripple::strHex(lgrInfo.hash); + header["txns_hash"] = ripple::strHex(lgrInfo.txHash); + header["state_hash"] = ripple::strHex(lgrInfo.accountHash); + header["parent_hash"] = ripple::strHex(lgrInfo.parentHash); + header["total_coins"] = ripple::to_string(lgrInfo.drops); + header["close_flags"] = lgrInfo.closeFlags; + + // Always show fields that contribute to the ledger hash + header["parent_close_time"] = + lgrInfo.parentCloseTime.time_since_epoch().count(); + header["close_time"] = lgrInfo.closeTime.time_since_epoch().count(); + header["close_time_resolution"] = lgrInfo.closeTimeResolution.count(); + return header; +} + std::optional ledgerSequenceFromRequest( boost::json::object const& request, diff --git a/handlers/RPCHelpers.h b/handlers/RPCHelpers.h index d3c5c26e..3e38f8a5 100644 --- a/handlers/RPCHelpers.h +++ b/handlers/RPCHelpers.h @@ -15,15 +15,19 @@ std::pair< deserializeTxPlusMeta(Backend::TransactionAndMetadata const& blobs); boost::json::object -getJson(ripple::STBase const& obj); +toJson(ripple::STBase const& obj); boost::json::object -getJson(ripple::SLE const& sle); +toJson(ripple::SLE const& sle); + +boost::json::object +toJson(ripple::LedgerInfo const& info); std::optional ledgerSequenceFromRequest( boost::json::object const& request, BackendInterface const& backend); + std::vector ledgerInfoToBlob(ripple::LedgerInfo const& info); diff --git a/handlers/ServerInfo.cpp b/handlers/ServerInfo.cpp new file mode 100644 index 00000000..18120969 --- /dev/null +++ b/handlers/ServerInfo.cpp @@ -0,0 +1,54 @@ +#include +#include +boost::json::object +doServerInfo( + boost::json::object const& request, + BackendInterface const& backend) +{ + boost::json::object response; + + auto rng = backend.fetchLedgerRange(); + if (!rng) + { + response["complete_ledgers"] = "empty"; + } + else + { + std::string completeLedgers = std::to_string(rng->minSequence); + if (rng->maxSequence != rng->minSequence) + completeLedgers += "-" + std::to_string(rng->maxSequence); + response["complete_ledgers"] = completeLedgers; + } + if (rng) + { + auto lgrInfo = backend.fetchLedgerBySequence(rng->maxSequence); + response["validated_ledger"] = toJson(*lgrInfo); + } + + boost::json::array indexes; + + if (rng) + { + uint32_t cur = rng->minSequence; + while (cur <= rng->maxSequence + 1) + { + auto keyIndex = backend.getKeyIndexOfSeq(cur); + assert(keyIndex.has_value()); + cur = keyIndex->keyIndex; + auto page = backend.fetchLedgerPage({}, cur, 1); + boost::json::object entry; + entry["complete"] = page.warning.has_value(); + entry["sequence"] = cur; + indexes.emplace_back(entry); + cur = cur + 1; + } + } + response["indexes"] = indexes; + auto indexing = backend.getIndexer().getCurrentlyIndexing(); + if (indexing) + response["indexing"] = *indexing; + else + response["indexing"] = "none"; + + return response; +} diff --git a/handlers/Tx.cpp b/handlers/Tx.cpp index 14390ddb..3f0353b3 100644 --- a/handlers/Tx.cpp +++ b/handlers/Tx.cpp @@ -63,8 +63,8 @@ doTx(boost::json::object const& request, BackendInterface const& backend) if (!binary) { auto [sttx, meta] = deserializeTxPlusMeta(dbResponse.value()); - response["transaction"] = getJson(*sttx); - response["metadata"] = getJson(*meta); + response["transaction"] = toJson(*sttx); + response["metadata"] = toJson(*meta); } else { diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index f450e649..42097645 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -10,7 +10,6 @@ BackendIndexer::BackendIndexer(boost::json::object const& config) }; BackendIndexer::~BackendIndexer() { - std::unique_lock lck(mutex_); work_.reset(); ioThread_.join(); } @@ -114,6 +113,21 @@ BackendIndexer::writeKeyFlagLedgerAsync( { try { + { + auto page = + backend.fetchLedgerPage({}, nextFlag.keyIndex, 1); + if (!page.warning) + { + BOOST_LOG_TRIVIAL(warning) + << "writeKeyFlagLedger - " + << "flag ledger already written. flag = " + << std::to_string(nextFlag.keyIndex) + << " , ledger sequence = " + << std::to_string(ledgerSequence); + return; + } + } + indexing_ = nextFlag.keyIndex; auto start = std::chrono::system_clock::now(); auto [objects, curCursor, warning] = backend.fetchLedgerPage(cursor, ledgerSequence, 2048); @@ -121,8 +135,6 @@ BackendIndexer::writeKeyFlagLedgerAsync( // no cursor means this is the first page if (!cursor) { - // if there is no warning, we don't need to do a repair - // warning only shows up on the first page if (warning) { BOOST_LOG_TRIVIAL(error) @@ -176,6 +188,7 @@ BackendIndexer::writeKeyFlagLedgerAsync( << std::chrono::duration_cast( end - begin) .count(); + indexing_ = 0; }); BOOST_LOG_TRIVIAL(info) << __func__ @@ -207,6 +220,10 @@ BackendIndexer::finish(uint32_t ledgerSequence, BackendInterface const& backend) // write completion record ripple::uint256 zero = {}; backend.writeKeys({zero}, keyIndex); + // write next flag sychronously + keyIndex = getKeyIndexOfSeq(ledgerSequence + 1); + backend.writeKeys(keys, keyIndex); + backend.writeKeys({zero}, keyIndex); } isFirst_ = false; keys = {}; diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 9924dc0b..8b6d4814 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -84,6 +84,8 @@ class BackendIndexer std::optional work_; std::thread ioThread_; + std::atomic_uint32_t indexing_ = 0; + uint32_t keyShift_ = 20; std::unordered_set keys; @@ -115,6 +117,14 @@ public: { return keyShift_; } + std::optional + getCurrentlyIndexing() + { + uint32_t cur = indexing_.load(); + if (cur != 0) + return cur; + return {}; + } KeyIndex getKeyIndexOfSeq(uint32_t seq) const { @@ -171,16 +181,18 @@ public: if (commitRes) { if (isFirst_) - { - auto rng = fetchLedgerRangeNoThrow(); - if (rng && rng->minSequence != ledgerSequence) - isFirst_ = false; indexer_.doKeysRepairAsync(*this, ledgerSequence); - } - if (indexer_.isKeyFlagLedger(ledgerSequence) || isFirst_) + if (indexer_.isKeyFlagLedger(ledgerSequence)) indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); isFirst_ = false; } + else + { + // if commitRes is false, we are relinquishing control of ETL. We + // reset isFirst_ to true so that way if we later regain control of + // ETL, we trigger the index repair + isFirst_ = true; + } return commitRes; } diff --git a/reporting/ETLSource.h b/reporting/ETLSource.h index 2b942b10..f1c24fb3 100644 --- a/reporting/ETLSource.h +++ b/reporting/ETLSource.h @@ -212,6 +212,24 @@ public: ", grpc port : " + grpcPort_ + " }"; } + boost::json::object + toJson() const + { + boost::json::object res; + res["validated_range"] = getValidatedRange(); + res["is_connected"] = std::to_string(isConnected()); + res["ip"] = ip_; + res["ws_port"] = wsPort_; + res["grpc_port"] = grpcPort_; + auto last = getLastMsgTime(); + if (last.time_since_epoch().count() != 0) + res["last_msg_arrival_time"] = std::to_string( + std::chrono::duration_cast( + std::chrono::system_clock::now() - getLastMsgTime()) + .count()); + return res; + } + /// Download a ledger in full /// @param ledgerSequence sequence of the ledger to download /// @param writeQueue queue to push downloaded ledger objects @@ -341,16 +359,16 @@ public: // forwarded. return true; // } - // Json::Value - // toJson() const - // { - // Json::Value ret(Json::arrayValue); - // for (auto& src : sources_) - // { - // ret.append(src->toJson()); - // } - // return ret; - // } + boost::json::array + toJson() const + { + boost::json::array ret; + for (auto& src : sources_) + { + ret.emplace_back(src->toJson()); + } + return ret; + } // // /// Randomly select a p2p node to forward a gRPC request to // /// @return gRPC stub to forward requests to p2p node diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index a70e4335..7695d0df 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -190,6 +190,13 @@ ReportingETL::publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts) ++numAttempts; continue; } + else + { + auto lgr = + flatMapBackend_->fetchLedgerBySequence(ledgerSequence); + assert(lgr); + publishLedger(*lgr); + } } catch (Backend::DatabaseTimeout const& e) { diff --git a/reporting/ReportingETL.h b/reporting/ReportingETL.h index 5c4da180..0f566927 100644 --- a/reporting/ReportingETL.h +++ b/reporting/ReportingETL.h @@ -279,21 +279,23 @@ public: return numMarkers_; } - /* - Json::Value + boost::json::object getInfo() { - Json::Value result(Json::objectValue); + boost::json::object result; result["etl_sources"] = loadBalancer_.toJson(); result["is_writer"] = writing_.load(); + result["read_only"] = readOnly_; auto last = getLastPublish(); if (last.time_since_epoch().count() != 0) - result["last_publish_time"] = to_string( - date::floor(getLastPublish())); + result["last_publish_time"] = std::to_string( + std::chrono::duration_cast( + std::chrono::system_clock::now() - getLastPublish()) + .count()); + return result; } - */ /// start all of the necessary components and begin ETL void diff --git a/server/websocket_server_async.cpp b/server/websocket_server_async.cpp index 1e848d55..3411357f 100644 --- a/server/websocket_server_async.cpp +++ b/server/websocket_server_async.cpp @@ -44,7 +44,8 @@ enum RPCCommand { ledger_data, book_offers, ledger_range, - ledger_entry + ledger_entry, + server_info }; std::unordered_map commandMap{ {"tx", tx}, @@ -54,7 +55,8 @@ std::unordered_map commandMap{ {"ledger_entry", ledger_entry}, {"account_info", account_info}, {"ledger_data", ledger_data}, - {"book_offers", book_offers}}; + {"book_offers", book_offers}, + {"server_info", server_info}}; boost::json::object doAccountInfo( @@ -84,6 +86,10 @@ boost::json::object doLedgerRange( boost::json::object const& request, BackendInterface const& backend); +boost::json::object +doServerInfo( + boost::json::object const& request, + BackendInterface const& backend); std::pair buildResponse( @@ -125,6 +131,10 @@ buildResponse( return {res, 1}; } break; + case server_info: { + return {doServerInfo(request, backend), 1}; + break; + } case account_info: return {doAccountInfo(request, backend), 1}; break; @@ -170,10 +180,10 @@ public: void run() { - // We need to be executing within a strand to perform async operations - // on the I/O objects in this session. Although not strictly necessary - // for single-threaded contexts, this example code is written to be - // thread-safe by default. + // We need to be executing within a strand to perform async + // operations on the I/O objects in this session. Although not + // strictly necessary for single-threaded contexts, this example + // code is written to be thread-safe by default. boost::asio::dispatch( ws_.get_executor(), boost::beast::bind_front_handler( From 9edb743dcfd2575a90f9879b8a386f36f581de9d Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Tue, 1 Jun 2021 15:47:50 -0400 Subject: [PATCH 22/25] no duplicates in keys table for postgres --- handlers/ServerInfo.cpp | 2 +- reporting/BackendIndexer.cpp | 4 ++++ reporting/BackendInterface.h | 9 ++++++--- reporting/Pg.cpp | 5 ++--- reporting/PostgresBackend.cpp | 24 ++++++++++++++---------- test.py | 13 ++++++++++++- 6 files changed, 39 insertions(+), 18 deletions(-) diff --git a/handlers/ServerInfo.cpp b/handlers/ServerInfo.cpp index 18120969..0d064c86 100644 --- a/handlers/ServerInfo.cpp +++ b/handlers/ServerInfo.cpp @@ -37,7 +37,7 @@ doServerInfo( cur = keyIndex->keyIndex; auto page = backend.fetchLedgerPage({}, cur, 1); boost::json::object entry; - entry["complete"] = page.warning.has_value(); + entry["complete"] = !page.warning.has_value(); entry["sequence"] = cur; indexes.emplace_back(entry); cur = cur + 1; diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 42097645..8b30e20e 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -114,6 +114,8 @@ BackendIndexer::writeKeyFlagLedgerAsync( try { { + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - checking for complete..."; auto page = backend.fetchLedgerPage({}, nextFlag.keyIndex, 1); if (!page.warning) @@ -126,6 +128,8 @@ BackendIndexer::writeKeyFlagLedgerAsync( << std::to_string(ledgerSequence); return; } + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - is not complete"; } indexing_ = nextFlag.keyIndex; auto start = std::chrono::system_clock::now(); diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 8b6d4814..aa01ae17 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -275,11 +275,14 @@ public: assert(false); throw std::runtime_error("Missing base flag ledger"); } - BOOST_LOG_TRIVIAL(debug) << __func__ << " recursing"; - uint32_t lowerSequence = ledgerSequence >> indexer_.getKeyShift() - << indexer_.getKeyShift(); + uint32_t lowerSequence = (ledgerSequence - 1) >> + indexer_.getKeyShift() << indexer_.getKeyShift(); if (lowerSequence < rng->minSequence) lowerSequence = rng->minSequence; + BOOST_LOG_TRIVIAL(debug) + << __func__ << " recursing. ledgerSequence = " + << std::to_string(ledgerSequence) + << " , lowerSequence = " << std::to_string(lowerSequence); auto lowerPage = fetchLedgerPage(cursor, lowerSequence, limit); std::vector keys; std::transform( diff --git a/reporting/Pg.cpp b/reporting/Pg.cpp index e9ce37e6..c5e351b2 100644 --- a/reporting/Pg.cpp +++ b/reporting/Pg.cpp @@ -844,11 +844,10 @@ create table if not exists account_transactions7 partition of account_transactio CREATE TABLE IF NOT EXISTS keys ( ledger_seq bigint NOT NULL, - key bytea NOT NULL + key bytea NOT NULL, + PRIMARY KEY(ledger_seq, key) ); -CREATE INDEX key_idx ON keys USING btree(ledger_seq, key); - -- account_tx() RPC helper. From the rippled reporting process, only the -- parameters without defaults are required. For the parameters with -- defaults, validation should be done by rippled, such as: diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index 06441fac..b474558b 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -640,9 +640,6 @@ PostgresBackend::doFinishWrites() const BOOST_LOG_TRIVIAL(debug) << __func__ << " objects size = " << objectsStr.size() << " txns size = " << txStr.size(); - std::string keysStr = keysBuffer_.str(); - if (keysStr.size()) - writeConnection_.bulkInsert("keys", keysStr); } auto res = writeConnection_("COMMIT"); if (!res || res.status() != PGRES_COMMAND_OK) @@ -655,8 +652,6 @@ PostgresBackend::doFinishWrites() const transactionsBuffer_.clear(); objectsBuffer_.str(""); objectsBuffer_.clear(); - keysBuffer_.str(""); - keysBuffer_.clear(); accountTxBuffer_.str(""); accountTxBuffer_.clear(); numRowsInObjectsBuffer_ = 0; @@ -675,8 +670,12 @@ PostgresBackend::writeKeys( PgQuery& conn = isAsync ? pgQuery : writeConnection_; std::stringstream asyncBuffer; std::stringstream& buffer = isAsync ? asyncBuffer : keysBuffer_; + std::string tableName = isAsync ? "keys_temp_async" : "keys_temp"; if (isAsync) conn("BEGIN"); + conn(std::string{ + "CREATE TABLE " + tableName + " AS SELECT * FROM keys WITH NO DATA"} + .c_str()); size_t numRows = 0; for (auto& key : keys) { @@ -687,20 +686,25 @@ PostgresBackend::writeKeys( // When writing in the background, we insert after every 10000 rows if ((isAsync && numRows == 10000) || numRows == 100000) { - conn.bulkInsert("keys", buffer.str()); + conn.bulkInsert(tableName.c_str(), buffer.str()); std::stringstream temp; buffer.swap(temp); numRows = 0; } } + if (numRows > 0) + conn.bulkInsert(tableName.c_str(), buffer.str()); + conn(std::string{ + "INSERT INTO keys SELECT * FROM " + tableName + + " ON CONFLICT DO NOTHING"} + .c_str()); + conn(std::string{"DROP TABLE " + tableName}.c_str()); if (isAsync) { - if (numRows > 0) - conn.bulkInsert("keys", buffer.str()); - std::stringstream temp; - buffer.swap(temp); conn("COMMIT"); } + std::stringstream temp; + buffer.swap(temp); return true; } bool diff --git a/test.py b/test.py index 72d296ab..83066d1d 100755 --- a/test.py +++ b/test.py @@ -764,6 +764,15 @@ async def fee(ip, port): print(json.dumps(res,indent=4,sort_keys=True)) except websockets.exceptions.connectionclosederror as e: print(e) +async def server_info(ip, port): + address = 'ws://' + str(ip) + ':' + str(port) + try: + async with websockets.connect(address) as ws: + await ws.send(json.dumps({"command":"server_info"})) + res = json.loads(await ws.recv()) + print(json.dumps(res,indent=4,sort_keys=True)) + except websockets.exceptions.connectionclosederror as e: + print(e) async def ledger_diff(ip, port, base, desired, includeBlobs): address = 'ws://' + str(ip) + ':' + str(port) @@ -785,7 +794,7 @@ async def perf(ip, port): parser = argparse.ArgumentParser(description='test script for xrpl-reporting') -parser.add_argument('action', choices=["account_info", "tx", "txs","account_tx", "account_tx_full","ledger_data", "ledger_data_full", "book_offers","ledger","ledger_range","ledger_entry", "ledgers", "ledger_entries","account_txs","account_infos","account_txs_full","book_offerses","ledger_diff","perf","fee"]) +parser.add_argument('action', choices=["account_info", "tx", "txs","account_tx", "account_tx_full","ledger_data", "ledger_data_full", "book_offers","ledger","ledger_range","ledger_entry", "ledgers", "ledger_entries","account_txs","account_infos","account_txs_full","book_offerses","ledger_diff","perf","fee","server_info"]) parser.add_argument('--ip', default='127.0.0.1') parser.add_argument('--port', default='8080') @@ -828,6 +837,8 @@ def run(args): args.ledger = asyncio.get_event_loop().run_until_complete(ledger_range(args.ip, args.port))[1] if args.action == "fee": asyncio.get_event_loop().run_until_complete(fee(args.ip, args.port)) + elif args.action == "server_info": + asyncio.get_event_loop().run_until_complete(server_info(args.ip, args.port)) elif args.action == "perf": asyncio.get_event_loop().run_until_complete( perf(args.ip,args.port)) From 2299d59fda25ee022ba76c1f955bae55accb0750 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Wed, 2 Jun 2021 17:47:14 +0000 Subject: [PATCH 23/25] lots of refactor and bug fix --- CMakeLists.txt | 1 + handlers/ServerInfo.cpp | 3 +- reporting/BackendIndexer.cpp | 209 +++++++++++++++++---------------- reporting/BackendInterface.h | 161 ++++--------------------- reporting/CassandraBackend.cpp | 9 -- reporting/CassandraBackend.h | 6 - reporting/Pg.h | 4 + reporting/PostgresBackend.cpp | 79 +++++++------ reporting/PostgresBackend.h | 7 -- reporting/ReportingETL.cpp | 1 + 10 files changed, 176 insertions(+), 304 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ce99aa68..3459333c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,7 @@ target_sources(reporting PRIVATE reporting/CassandraBackend.cpp reporting/PostgresBackend.cpp reporting/BackendIndexer.cpp + reporting/BackendInterface.cpp reporting/Pg.cpp reporting/DBHelpers.cpp reporting/ReportingETL.cpp diff --git a/handlers/ServerInfo.cpp b/handlers/ServerInfo.cpp index 0d064c86..852e2686 100644 --- a/handlers/ServerInfo.cpp +++ b/handlers/ServerInfo.cpp @@ -35,9 +35,8 @@ doServerInfo( auto keyIndex = backend.getKeyIndexOfSeq(cur); assert(keyIndex.has_value()); cur = keyIndex->keyIndex; - auto page = backend.fetchLedgerPage({}, cur, 1); boost::json::object entry; - entry["complete"] = !page.warning.has_value(); + entry["complete"] = backend.isLedgerIndexed(cur); entry["sequence"] = cur; indexes.emplace_back(entry); cur = cur + 1; diff --git a/reporting/BackendIndexer.cpp b/reporting/BackendIndexer.cpp index 8b30e20e..c3e0dd70 100644 --- a/reporting/BackendIndexer.cpp +++ b/reporting/BackendIndexer.cpp @@ -2,6 +2,7 @@ namespace Backend { BackendIndexer::BackendIndexer(boost::json::object const& config) + : strand_(ioc_) { if (config.contains("indexer_key_shift")) keyShift_ = config.at("indexer_key_shift").as_int64(); @@ -44,12 +45,7 @@ BackendIndexer::doKeysRepair( { try { - auto [objects, curCursor, warning] = - backend.fetchLedgerPage({}, *sequence, 1); - // no cursor means this is the first page - // if there is no warning, we don't need to do a repair - // warning only shows up on the first page - if (!warning) + if (backend.isLedgerIndexed(*sequence)) { BOOST_LOG_TRIVIAL(info) << __func__ << " - " << std::to_string(*sequence) @@ -64,9 +60,11 @@ BackendIndexer::doKeysRepair( uint32_t lower = (*sequence - 1) >> keyShift_ << keyShift_; doKeysRepair(backend, lower); BOOST_LOG_TRIVIAL(info) - << __func__ << " - " << std::to_string(*sequence) + << __func__ << " - " + << " sequence = " << std::to_string(*sequence) + << " lower = " << std::to_string(lower) << " finished recursing. submitting repair "; - writeKeyFlagLedgerAsync(*sequence, backend); + writeKeyFlagLedger(lower, backend); return; } } @@ -85,11 +83,108 @@ BackendIndexer::doKeysRepairAsync( BackendInterface const& backend, std::optional sequence) { - boost::asio::post(ioc_, [this, sequence, &backend]() { + boost::asio::post(strand_, [this, sequence, &backend]() { doKeysRepair(backend, sequence); }); } +void +BackendIndexer::writeKeyFlagLedger( + uint32_t ledgerSequence, + BackendInterface const& backend) +{ + auto nextFlag = getKeyIndexOfSeq(ledgerSequence + 1); + uint32_t lower = ledgerSequence >> keyShift_ << keyShift_; + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - " + << "next flag = " << std::to_string(nextFlag.keyIndex) + << "lower = " << std::to_string(lower) + << "ledgerSequence = " << std::to_string(ledgerSequence) << " starting"; + ripple::uint256 zero = {}; + std::optional cursor; + size_t numKeys = 0; + auto begin = std::chrono::system_clock::now(); + while (true) + { + try + { + { + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - checking for complete..."; + if (backend.isLedgerIndexed(nextFlag.keyIndex)) + { + BOOST_LOG_TRIVIAL(warning) + << "writeKeyFlagLedger - " + << "flag ledger already written. flag = " + << std::to_string(nextFlag.keyIndex) + << " , ledger sequence = " + << std::to_string(ledgerSequence); + return; + } + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - is not complete"; + } + indexing_ = nextFlag.keyIndex; + auto start = std::chrono::system_clock::now(); + auto [objects, curCursor, warning] = + backend.fetchLedgerPage(cursor, lower, 2048); + auto mid = std::chrono::system_clock::now(); + // no cursor means this is the first page + if (!cursor) + { + if (warning) + { + BOOST_LOG_TRIVIAL(error) + << "writeKeyFlagLedger - " + << " prev flag ledger not written " + << std::to_string(nextFlag.keyIndex) << " : " + << std::to_string(ledgerSequence); + assert(false); + throw std::runtime_error("Missing prev flag"); + } + } + cursor = curCursor; + std::unordered_set keys; + for (auto& obj : objects) + { + keys.insert(obj.key); + } + backend.writeKeys(keys, nextFlag, true); + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(debug) + << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) + << " fetched a page " + << " cursor = " + << (cursor.has_value() ? ripple::strHex(*cursor) + : std::string{}) + << " num keys = " << std::to_string(numKeys) << " fetch time = " + << std::chrono::duration_cast( + mid - start) + .count() + << " write time = " + << std::chrono::duration_cast( + end - mid) + .count(); + if (!cursor) + break; + } + catch (DatabaseTimeout const& e) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " Database timeout fetching keys"; + std::this_thread::sleep_for(std::chrono::seconds(2)); + } + } + backend.writeKeys({zero}, nextFlag, true); + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(info) + << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) + << " finished. " + << " num keys = " << std::to_string(numKeys) << " total time = " + << std::chrono::duration_cast(end - begin) + .count(); + indexing_ = 0; +} void BackendIndexer::writeKeyFlagLedgerAsync( uint32_t ledgerSequence, @@ -99,100 +194,8 @@ BackendIndexer::writeKeyFlagLedgerAsync( << __func__ << " starting. sequence = " << std::to_string(ledgerSequence); - boost::asio::post(ioc_, [this, ledgerSequence, &backend]() { - std::unordered_set keys; - auto nextFlag = getKeyIndexOfSeq(ledgerSequence + 1); - BOOST_LOG_TRIVIAL(info) - << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) - << " starting"; - ripple::uint256 zero = {}; - std::optional cursor; - size_t numKeys = 0; - auto begin = std::chrono::system_clock::now(); - while (true) - { - try - { - { - BOOST_LOG_TRIVIAL(info) - << "writeKeyFlagLedger - checking for complete..."; - auto page = - backend.fetchLedgerPage({}, nextFlag.keyIndex, 1); - if (!page.warning) - { - BOOST_LOG_TRIVIAL(warning) - << "writeKeyFlagLedger - " - << "flag ledger already written. flag = " - << std::to_string(nextFlag.keyIndex) - << " , ledger sequence = " - << std::to_string(ledgerSequence); - return; - } - BOOST_LOG_TRIVIAL(info) - << "writeKeyFlagLedger - is not complete"; - } - indexing_ = nextFlag.keyIndex; - auto start = std::chrono::system_clock::now(); - auto [objects, curCursor, warning] = - backend.fetchLedgerPage(cursor, ledgerSequence, 2048); - auto mid = std::chrono::system_clock::now(); - // no cursor means this is the first page - if (!cursor) - { - if (warning) - { - BOOST_LOG_TRIVIAL(error) - << "writeKeyFlagLedger - " - << " prev flag ledger not written " - << std::to_string(nextFlag.keyIndex) << " : " - << std::to_string(ledgerSequence); - assert(false); - throw std::runtime_error("Missing prev flag"); - } - } - - cursor = curCursor; - for (auto& obj : objects) - { - keys.insert(obj.key); - } - backend.writeKeys(keys, nextFlag, true); - auto end = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(debug) - << "writeKeyFlagLedger - " - << std::to_string(nextFlag.keyIndex) << " fetched a page " - << " cursor = " - << (cursor.has_value() ? ripple::strHex(*cursor) - : std::string{}) - << " num keys = " << std::to_string(numKeys) - << " fetch time = " - << std::chrono::duration_cast( - mid - start) - .count() - << " write time = " - << std::chrono::duration_cast( - end - mid) - .count(); - if (!cursor) - break; - } - catch (DatabaseTimeout const& e) - { - BOOST_LOG_TRIVIAL(warning) - << __func__ << " Database timeout fetching keys"; - std::this_thread::sleep_for(std::chrono::seconds(2)); - } - } - backend.writeKeys({zero}, nextFlag, true); - auto end = std::chrono::system_clock::now(); - BOOST_LOG_TRIVIAL(info) - << "writeKeyFlagLedger - " << std::to_string(nextFlag.keyIndex) - << " finished. " - << " num keys = " << std::to_string(numKeys) << " total time = " - << std::chrono::duration_cast( - end - begin) - .count(); - indexing_ = 0; + boost::asio::post(strand_, [this, ledgerSequence, &backend]() { + writeKeyFlagLedger(ledgerSequence, backend); }); BOOST_LOG_TRIVIAL(info) << __func__ diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index aa01ae17..3c85a132 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -80,6 +80,7 @@ class BackendInterface; class BackendIndexer { boost::asio::io_context ioc_; + boost::asio::io_context::strand strand_; std::mutex mutex_; std::optional work_; std::thread ioThread_; @@ -94,6 +95,10 @@ class BackendIndexer doKeysRepair( BackendInterface const& backend, std::optional sequence); + void + writeKeyFlagLedger( + uint32_t ledgerSequence, + BackendInterface const& backend); public: BackendIndexer(boost::json::object const& config); @@ -160,41 +165,14 @@ public: return indexer_; } + void + checkFlagLedgers() const; + std::optional - getKeyIndexOfSeq(uint32_t seq) const - { - if (indexer_.isKeyFlagLedger(seq)) - return KeyIndex{seq}; - auto rng = fetchLedgerRange(); - if (!rng) - return {}; - if (rng->minSequence == seq) - return KeyIndex{seq}; - return indexer_.getKeyIndexOfSeq(seq); - } + getKeyIndexOfSeq(uint32_t seq) const; bool - finishWrites(uint32_t ledgerSequence) const - { - indexer_.finish(ledgerSequence, *this); - auto commitRes = doFinishWrites(); - if (commitRes) - { - if (isFirst_) - indexer_.doKeysRepairAsync(*this, ledgerSequence); - if (indexer_.isKeyFlagLedger(ledgerSequence)) - indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); - isFirst_ = false; - } - else - { - // if commitRes is false, we are relinquishing control of ETL. We - // reset isFirst_ to true so that way if we later regain control of - // ETL, we trigger the index repair - isFirst_ = true; - } - return commitRes; - } + finishWrites(uint32_t ledgerSequence) const; virtual std::optional fetchLatestLedgerSequence() const = 0; @@ -206,21 +184,7 @@ public: fetchLedgerRange() const = 0; std::optional - fetchLedgerRangeNoThrow() const - { - BOOST_LOG_TRIVIAL(warning) << __func__; - while (true) - { - try - { - return fetchLedgerRange(); - } - catch (DatabaseTimeout& t) - { - ; - } - } - } + fetchLedgerRangeNoThrow() const; virtual std::optional fetchLedgerObject(ripple::uint256 const& key, uint32_t sequence) const = 0; @@ -239,87 +203,14 @@ public: fetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, - std::uint32_t limit) const - { - assert(limit != 0); - bool incomplete = false; - { - auto check = doFetchLedgerPage({}, ledgerSequence, 1); - incomplete = check.warning.has_value(); - } - uint32_t adjustedLimit = limit; - LedgerPage page; - page.cursor = cursor; - do - { - adjustedLimit = adjustedLimit > 2048 ? 2048 : adjustedLimit * 2; - auto partial = - doFetchLedgerPage(page.cursor, ledgerSequence, adjustedLimit); - page.objects.insert( - page.objects.end(), - partial.objects.begin(), - partial.objects.end()); - page.cursor = partial.cursor; - } while (page.objects.size() < limit && page.cursor); - if (incomplete) - { - auto rng = fetchLedgerRange(); - if (!rng) - return page; - if (rng->minSequence == ledgerSequence) - { - BOOST_LOG_TRIVIAL(fatal) - << __func__ - << " Database is populated but first flag ledger is " - "incomplete. This should never happen"; - assert(false); - throw std::runtime_error("Missing base flag ledger"); - } - uint32_t lowerSequence = (ledgerSequence - 1) >> - indexer_.getKeyShift() << indexer_.getKeyShift(); - if (lowerSequence < rng->minSequence) - lowerSequence = rng->minSequence; - BOOST_LOG_TRIVIAL(debug) - << __func__ << " recursing. ledgerSequence = " - << std::to_string(ledgerSequence) - << " , lowerSequence = " << std::to_string(lowerSequence); - auto lowerPage = fetchLedgerPage(cursor, lowerSequence, limit); - std::vector keys; - std::transform( - std::move_iterator(lowerPage.objects.begin()), - std::move_iterator(lowerPage.objects.end()), - std::back_inserter(keys), - [](auto&& elt) { return std::move(elt.key); }); - auto objs = fetchLedgerObjects(keys, ledgerSequence); - for (size_t i = 0; i < keys.size(); ++i) - { - auto& obj = objs[i]; - auto& key = keys[i]; - if (obj.size()) - page.objects.push_back({std::move(key), std::move(obj)}); - } - std::sort( - page.objects.begin(), page.objects.end(), [](auto a, auto b) { - return a.key < b.key; - }); - page.warning = "Data may be incomplete"; - } - if (page.objects.size() >= limit) - { - page.objects.resize(limit); - page.cursor = page.objects.back().key; - } - return page; - } + std::uint32_t limit) const; + + bool + isLedgerIndexed(std::uint32_t ledgerSequence) const; std::optional - fetchSuccessor(ripple::uint256 key, uint32_t ledgerSequence) - { - auto page = fetchLedgerPage({++key}, ledgerSequence, 1); - if (page.objects.size()) - return page.objects[0]; - return {}; - } + fetchSuccessor(ripple::uint256 key, uint32_t ledgerSequence) const; + virtual LedgerPage doFetchLedgerPage( std::optional const& cursor, @@ -327,12 +218,12 @@ public: std::uint32_t limit) const = 0; // TODO add warning for incomplete data - virtual BookOffersPage + BookOffersPage fetchBookOffers( ripple::uint256 const& book, uint32_t ledgerSequence, std::uint32_t limit, - std::optional const& cursor = {}) const = 0; + std::optional const& cursor = {}) const; virtual std::vector fetchTransactions(std::vector const& hashes) const = 0; @@ -365,18 +256,8 @@ public: std::string&& blob, bool isCreated, bool isDeleted, - std::optional&& book) const - { - ripple::uint256 key256 = ripple::uint256::fromVoid(key.data()); - indexer_.addKey(std::move(key256)); - doWriteLedgerObject( - std::move(key), - seq, - std::move(blob), - isCreated, - isDeleted, - std::move(book)); - } + std::optional&& book) const; + virtual void doWriteLedgerObject( std::string&& key, diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index b8794a31..9ed4c1b8 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -494,15 +494,6 @@ CassandraBackend::fetchLedgerObjects( << "Fetched " << numKeys << " records from Cassandra"; return results; } -BookOffersPage -CassandraBackend::fetchBookOffers( - ripple::uint256 const& book, - uint32_t ledgerSequence, - std::uint32_t limit, - std::optional const& cursor) const -{ - return {}; -} // namespace Backend struct WriteBookCallbackData { CassandraBackend const& backend; diff --git a/reporting/CassandraBackend.h b/reporting/CassandraBackend.h index 09b97a1a..652296b6 100644 --- a/reporting/CassandraBackend.h +++ b/reporting/CassandraBackend.h @@ -1019,12 +1019,6 @@ public: std::unordered_set const& keys, KeyIndex const& index, bool isAsync = false) const override; - BookOffersPage - fetchBookOffers( - ripple::uint256 const& book, - uint32_t sequence, - std::uint32_t limit, - std::optional const& cursor) const override; bool canFetchBatch() diff --git a/reporting/Pg.h b/reporting/Pg.h index 1fbd24e2..24881b37 100644 --- a/reporting/Pg.h +++ b/reporting/Pg.h @@ -476,6 +476,10 @@ public: pool_->checkin(pg_); } + // TODO. add sendQuery and getResult, for sending the query and getting the + // result asynchronously. This could be useful for sending a bunch of + // requests concurrently + /** Execute postgres query with parameters. * * @param dbParams Database command with parameters. diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index b474558b..e833c754 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -375,16 +375,6 @@ PostgresBackend::doFetchLedgerPage( return {}; } -BookOffersPage -PostgresBackend::fetchBookOffers( - ripple::uint256 const& book, - uint32_t ledgerSequence, - std::uint32_t limit, - std::optional const& cursor) const -{ - return {}; -} - std::vector PostgresBackend::fetchTransactions( std::vector const& hashes) const @@ -665,46 +655,61 @@ PostgresBackend::writeKeys( { if (abortWrite_) return false; - BOOST_LOG_TRIVIAL(debug) << __func__; PgQuery pgQuery(pgPool_); PgQuery& conn = isAsync ? pgQuery : writeConnection_; - std::stringstream asyncBuffer; - std::stringstream& buffer = isAsync ? asyncBuffer : keysBuffer_; - std::string tableName = isAsync ? "keys_temp_async" : "keys_temp"; - if (isAsync) - conn("BEGIN"); - conn(std::string{ - "CREATE TABLE " + tableName + " AS SELECT * FROM keys WITH NO DATA"} - .c_str()); + std::stringstream sql; size_t numRows = 0; for (auto& key : keys) { - buffer << std::to_string(index.keyIndex) << '\t' << "\\\\x" - << ripple::strHex(key) << '\n'; numRows++; - // If the buffer gets too large, the insert fails. Not sure why. - // When writing in the background, we insert after every 10000 rows - if ((isAsync && numRows == 10000) || numRows == 100000) + sql << "INSERT INTO keys (ledger_seq, key) VALUES (" + << std::to_string(index.keyIndex) << ", \'\\x" + << ripple::strHex(key) << "\') ON CONFLICT DO NOTHING; "; + if (numRows > 10000) { - conn.bulkInsert(tableName.c_str(), buffer.str()); - std::stringstream temp; - buffer.swap(temp); + conn(sql.str().c_str()); + sql.str(""); + sql.clear(); numRows = 0; } } if (numRows > 0) - conn.bulkInsert(tableName.c_str(), buffer.str()); - conn(std::string{ - "INSERT INTO keys SELECT * FROM " + tableName + - " ON CONFLICT DO NOTHING"} - .c_str()); - conn(std::string{"DROP TABLE " + tableName}.c_str()); - if (isAsync) + conn(sql.str().c_str()); + return true; + /* + BOOST_LOG_TRIVIAL(debug) << __func__; + std::condition_variable cv; + std::mutex mtx; + std::atomic_uint numRemaining = keys.size(); + auto start = std::chrono::system_clock::now(); + for (auto& key : keys) { - conn("COMMIT"); + boost::asio::post( + pool_, [this, key, &numRemaining, &cv, &mtx, &index]() { + PgQuery pgQuery(pgPool_); + std::stringstream sql; + sql << "INSERT INTO keys (ledger_seq, key) VALUES (" + << std::to_string(index.keyIndex) << ", \'\\x" + << ripple::strHex(key) << "\') ON CONFLICT DO NOTHING"; + + auto res = pgQuery(sql.str().data()); + if (--numRemaining == 0) + { + std::unique_lock lck(mtx); + cv.notify_one(); + } + }); } - std::stringstream temp; - buffer.swap(temp); + std::unique_lock lck(mtx); + cv.wait(lck, [&numRemaining]() { return numRemaining == 0; }); + auto end = std::chrono::system_clock::now(); + auto duration = + std::chrono::duration_cast(end - start) + .count(); + BOOST_LOG_TRIVIAL(info) + << __func__ << " wrote " << std::to_string(keys.size()) + << " keys with threadpool. took " << std::to_string(duration); + */ return true; } bool diff --git a/reporting/PostgresBackend.h b/reporting/PostgresBackend.h index 48f46bf3..e2d0185e 100644 --- a/reporting/PostgresBackend.h +++ b/reporting/PostgresBackend.h @@ -50,13 +50,6 @@ public: std::uint32_t ledgerSequence, std::uint32_t limit) const override; - BookOffersPage - fetchBookOffers( - ripple::uint256 const& book, - uint32_t ledgerSequence, - std::uint32_t limit, - std::optional const& cursor) const override; - std::vector fetchTransactions( std::vector const& hashes) const override; diff --git a/reporting/ReportingETL.cpp b/reporting/ReportingETL.cpp index 7695d0df..571b7217 100644 --- a/reporting/ReportingETL.cpp +++ b/reporting/ReportingETL.cpp @@ -754,5 +754,6 @@ ReportingETL::ReportingETL( if (config.contains("txn_threshold")) txnThreshold_ = config.at("txn_threshold").as_int64(); flatMapBackend_->open(readOnly_); + flatMapBackend_->checkFlagLedgers(); } From 19b52787c49057c9f3105fa1b05a154f6d419215 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Thu, 3 Jun 2021 14:33:35 +0000 Subject: [PATCH 24/25] Add BackendInterface.cpp --- reporting/BackendInterface.cpp | 280 +++++++++++++++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 reporting/BackendInterface.cpp diff --git a/reporting/BackendInterface.cpp b/reporting/BackendInterface.cpp new file mode 100644 index 00000000..ebf3abfe --- /dev/null +++ b/reporting/BackendInterface.cpp @@ -0,0 +1,280 @@ +#include +#include +#include +namespace Backend { +bool +BackendInterface::finishWrites(uint32_t ledgerSequence) const +{ + indexer_.finish(ledgerSequence, *this); + auto commitRes = doFinishWrites(); + if (commitRes) + { + if (isFirst_) + indexer_.doKeysRepairAsync(*this, ledgerSequence); + if (indexer_.isKeyFlagLedger(ledgerSequence)) + indexer_.writeKeyFlagLedgerAsync(ledgerSequence, *this); + isFirst_ = false; + } + else + { + // if commitRes is false, we are relinquishing control of ETL. We + // reset isFirst_ to true so that way if we later regain control of + // ETL, we trigger the index repair + isFirst_ = true; + } + return commitRes; +} +bool +BackendInterface::isLedgerIndexed(std::uint32_t ledgerSequence) const +{ + auto keyIndex = getKeyIndexOfSeq(ledgerSequence); + if (keyIndex) + { + auto page = doFetchLedgerPage({}, ledgerSequence, 1); + return !page.warning.has_value(); + } + return false; +} +void +BackendInterface::writeLedgerObject( + std::string&& key, + uint32_t seq, + std::string&& blob, + bool isCreated, + bool isDeleted, + std::optional&& book) const +{ + ripple::uint256 key256 = ripple::uint256::fromVoid(key.data()); + indexer_.addKey(std::move(key256)); + doWriteLedgerObject( + std::move(key), + seq, + std::move(blob), + isCreated, + isDeleted, + std::move(book)); +} +std::optional +BackendInterface::fetchLedgerRangeNoThrow() const +{ + BOOST_LOG_TRIVIAL(warning) << __func__; + while (true) + { + try + { + return fetchLedgerRange(); + } + catch (DatabaseTimeout& t) + { + ; + } + } +} +std::optional +BackendInterface::getKeyIndexOfSeq(uint32_t seq) const +{ + if (indexer_.isKeyFlagLedger(seq)) + return KeyIndex{seq}; + auto rng = fetchLedgerRange(); + if (!rng) + return {}; + if (rng->minSequence == seq) + return KeyIndex{seq}; + return indexer_.getKeyIndexOfSeq(seq); +} +BookOffersPage +BackendInterface::fetchBookOffers( + ripple::uint256 const& book, + uint32_t ledgerSequence, + std::uint32_t limit, + std::optional const& cursor) const +{ + BookOffersPage page; + const ripple::uint256 bookEnd = ripple::getQualityNext(book); + ripple::uint256 uTipIndex = book; + bool done = false; + while (page.offers.size() < limit) + { + auto offerDir = fetchSuccessor(uTipIndex, ledgerSequence); + if (!offerDir || offerDir->key > bookEnd) + { + BOOST_LOG_TRIVIAL(debug) << __func__ << " - offerDir.has_value() " + << offerDir.has_value() << " breaking"; + break; + } + while (page.offers.size() < limit) + { + uTipIndex = offerDir->key; + ripple::STLedgerEntry sle{ + ripple::SerialIter{ + offerDir->blob.data(), offerDir->blob.size()}, + offerDir->key}; + auto indexes = sle.getFieldV256(ripple::sfIndexes); + std::vector keys; + keys.insert(keys.end(), indexes.begin(), indexes.end()); + auto objs = fetchLedgerObjects(keys, ledgerSequence); + for (size_t i = 0; i < keys.size(); ++i) + { + BOOST_LOG_TRIVIAL(debug) + << __func__ << " key = " << ripple::strHex(keys[i]) + << " blob = " << ripple::strHex(objs[i]); + if (objs[i].size()) + page.offers.push_back({keys[i], objs[i]}); + } + auto next = sle.getFieldU64(ripple::sfIndexNext); + if (!next) + { + BOOST_LOG_TRIVIAL(debug) + << __func__ << " next is empty. breaking"; + break; + } + auto nextKey = ripple::keylet::page(uTipIndex, next); + auto nextDir = fetchLedgerObject(nextKey.key, ledgerSequence); + assert(nextDir); + offerDir->blob = *nextDir; + offerDir->key = nextKey.key; + } + } + + return page; +} + +std::optional +BackendInterface::fetchSuccessor(ripple::uint256 key, uint32_t ledgerSequence) + const +{ + auto page = fetchLedgerPage({++key}, ledgerSequence, 1); + if (page.objects.size()) + return page.objects[0]; + return {}; +} +LedgerPage +BackendInterface::fetchLedgerPage( + std::optional const& cursor, + std::uint32_t ledgerSequence, + std::uint32_t limit) const +{ + assert(limit != 0); + bool incomplete = !isLedgerIndexed(ledgerSequence); + // really low limits almost always miss + uint32_t adjustedLimit = std::max(limit, (uint32_t)4); + LedgerPage page; + page.cursor = cursor; + do + { + adjustedLimit = adjustedLimit > 2048 ? 2048 : adjustedLimit * 2; + auto partial = + doFetchLedgerPage(page.cursor, ledgerSequence, adjustedLimit); + page.objects.insert( + page.objects.end(), partial.objects.begin(), partial.objects.end()); + page.cursor = partial.cursor; + } while (page.objects.size() < limit && page.cursor); + if (incomplete) + { + auto rng = fetchLedgerRange(); + if (!rng) + return page; + if (rng->minSequence == ledgerSequence) + { + BOOST_LOG_TRIVIAL(fatal) + << __func__ + << " Database is populated but first flag ledger is " + "incomplete. This should never happen"; + assert(false); + throw std::runtime_error("Missing base flag ledger"); + } + uint32_t lowerSequence = (ledgerSequence - 1) >> indexer_.getKeyShift() + << indexer_.getKeyShift(); + if (lowerSequence < rng->minSequence) + lowerSequence = rng->minSequence; + BOOST_LOG_TRIVIAL(debug) + << __func__ + << " recursing. ledgerSequence = " << std::to_string(ledgerSequence) + << " , lowerSequence = " << std::to_string(lowerSequence); + auto lowerPage = fetchLedgerPage(cursor, lowerSequence, limit); + std::vector keys; + std::transform( + std::move_iterator(lowerPage.objects.begin()), + std::move_iterator(lowerPage.objects.end()), + std::back_inserter(keys), + [](auto&& elt) { return std::move(elt.key); }); + auto objs = fetchLedgerObjects(keys, ledgerSequence); + for (size_t i = 0; i < keys.size(); ++i) + { + auto& obj = objs[i]; + auto& key = keys[i]; + if (obj.size()) + page.objects.push_back({std::move(key), std::move(obj)}); + } + std::sort(page.objects.begin(), page.objects.end(), [](auto a, auto b) { + return a.key < b.key; + }); + page.warning = "Data may be incomplete"; + } + if (page.objects.size() >= limit) + { + page.objects.resize(limit); + page.cursor = page.objects.back().key; + } + return page; +} + +void +BackendInterface::checkFlagLedgers() const +{ + auto rng = fetchLedgerRangeNoThrow(); + if (rng) + { + bool prevComplete = true; + uint32_t cur = rng->minSequence; + size_t numIncomplete = 0; + while (cur <= rng->maxSequence + 1) + { + auto keyIndex = getKeyIndexOfSeq(cur); + assert(keyIndex.has_value()); + cur = keyIndex->keyIndex; + + if (!isLedgerIndexed(cur)) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " - flag ledger " + << std::to_string(keyIndex->keyIndex) << " is incomplete"; + ++numIncomplete; + prevComplete = false; + } + else + { + if (!prevComplete) + { + BOOST_LOG_TRIVIAL(fatal) + << __func__ << " - flag ledger " + << std::to_string(keyIndex->keyIndex) + << " is incomplete but the next is complete. This " + "should never happen"; + assert(false); + throw std::runtime_error("missing prev flag ledger"); + } + prevComplete = true; + BOOST_LOG_TRIVIAL(info) + << __func__ << " - flag ledger " + << std::to_string(keyIndex->keyIndex) << " is complete"; + } + cur = cur + 1; + } + if (numIncomplete > 1) + { + BOOST_LOG_TRIVIAL(warning) + << __func__ << " " << std::to_string(numIncomplete) + << " incomplete flag ledgers. " + "This can happen, but is unlikely. Check indexer_key_shift " + "in config"; + } + else + { + BOOST_LOG_TRIVIAL(info) + << __func__ << " number of incomplete flag ledgers = " + << std::to_string(numIncomplete); + } + } +} +} // namespace Backend From 5f6edee312b16f2f1297843400fe1a1872013682 Mon Sep 17 00:00:00 2001 From: CJ Cobb Date: Fri, 4 Jun 2021 05:53:38 +0000 Subject: [PATCH 25/25] book offers seems to work --- reporting/BackendInterface.cpp | 87 +++++++++++++++++++++++++++------- reporting/BackendInterface.h | 3 +- reporting/CassandraBackend.cpp | 6 +-- reporting/PostgresBackend.cpp | 2 +- test.py | 9 ++-- 5 files changed, 83 insertions(+), 24 deletions(-) diff --git a/reporting/BackendInterface.cpp b/reporting/BackendInterface.cpp index ebf3abfe..5a1e7c5e 100644 --- a/reporting/BackendInterface.cpp +++ b/reporting/BackendInterface.cpp @@ -89,38 +89,49 @@ BackendInterface::fetchBookOffers( std::uint32_t limit, std::optional const& cursor) const { + // TODO try to speed this up. This can take a few seconds. The goal is to + // get it down to a few hundred milliseconds. BookOffersPage page; const ripple::uint256 bookEnd = ripple::getQualityNext(book); ripple::uint256 uTipIndex = book; bool done = false; - while (page.offers.size() < limit) + std::vector keys; + auto getMillis = [](auto diff) { + return std::chrono::duration_cast(diff) + .count(); + }; + auto begin = std::chrono::system_clock::now(); + uint32_t numSucc = 0; + uint32_t numPages = 0; + long succMillis = 0; + long pageMillis = 0; + while (keys.size() < limit) { + auto mid1 = std::chrono::system_clock::now(); auto offerDir = fetchSuccessor(uTipIndex, ledgerSequence); + auto mid2 = std::chrono::system_clock::now(); + numSucc++; + succMillis += getMillis(mid2 - mid1); if (!offerDir || offerDir->key > bookEnd) { BOOST_LOG_TRIVIAL(debug) << __func__ << " - offerDir.has_value() " << offerDir.has_value() << " breaking"; break; } - while (page.offers.size() < limit) + while (keys.size() < limit) { + ++numPages; uTipIndex = offerDir->key; ripple::STLedgerEntry sle{ ripple::SerialIter{ offerDir->blob.data(), offerDir->blob.size()}, offerDir->key}; auto indexes = sle.getFieldV256(ripple::sfIndexes); - std::vector keys; keys.insert(keys.end(), indexes.begin(), indexes.end()); - auto objs = fetchLedgerObjects(keys, ledgerSequence); - for (size_t i = 0; i < keys.size(); ++i) - { - BOOST_LOG_TRIVIAL(debug) - << __func__ << " key = " << ripple::strHex(keys[i]) - << " blob = " << ripple::strHex(objs[i]); - if (objs[i].size()) - page.offers.push_back({keys[i], objs[i]}); - } + // TODO we probably don't have to wait here. We can probably fetch + // these objects in another thread, and move on to another page of + // the book directory, or another directory. We also could just + // accumulate all of the keys before fetching the offers auto next = sle.getFieldU64(ripple::sfIndexNext); if (!next) { @@ -134,7 +145,33 @@ BackendInterface::fetchBookOffers( offerDir->blob = *nextDir; offerDir->key = nextKey.key; } + auto mid3 = std::chrono::system_clock::now(); + pageMillis += getMillis(mid3 - mid2); } + auto mid = std::chrono::system_clock::now(); + auto objs = fetchLedgerObjects(keys, ledgerSequence); + for (size_t i = 0; i < keys.size(); ++i) + { + BOOST_LOG_TRIVIAL(trace) + << __func__ << " key = " << ripple::strHex(keys[i]) + << " blob = " << ripple::strHex(objs[i]); + assert(objs[i].size()); + page.offers.push_back({keys[i], objs[i]}); + } + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(info) + << __func__ << " " + << "Fetching " << std::to_string(keys.size()) << " keys took " + << std::to_string(getMillis(mid - begin)) + << " milliseconds. Fetching next dir took " + << std::to_string(succMillis) << " milliseonds. Fetched next dir " + << std::to_string(numSucc) << " times" + << " Fetching next page of dir took " << std::to_string(pageMillis) + << ". num pages = " << std::to_string(numPages) + << " milliseconds. Fetching all objects took " + << std::to_string(getMillis(end - mid)) + << " milliseconds. total time = " + << std::to_string(getMillis(end - begin)) << " milliseconds"; return page; } @@ -143,7 +180,14 @@ std::optional BackendInterface::fetchSuccessor(ripple::uint256 key, uint32_t ledgerSequence) const { - auto page = fetchLedgerPage({++key}, ledgerSequence, 1); + auto start = std::chrono::system_clock::now(); + auto page = fetchLedgerPage({++key}, ledgerSequence, 1, 512); + auto end = std::chrono::system_clock::now(); + + auto ms = std::chrono::duration_cast(end - start) + .count(); + BOOST_LOG_TRIVIAL(debug) + << __func__ << " took " << std::to_string(ms) << " milliseconds"; if (page.objects.size()) return page.objects[0]; return {}; @@ -152,19 +196,30 @@ LedgerPage BackendInterface::fetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, - std::uint32_t limit) const + std::uint32_t limit, + std::uint32_t limitHint) const { assert(limit != 0); bool incomplete = !isLedgerIndexed(ledgerSequence); // really low limits almost always miss - uint32_t adjustedLimit = std::max(limit, (uint32_t)4); + uint32_t adjustedLimit = std::max(limitHint, std::max(limit, (uint32_t)4)); LedgerPage page; page.cursor = cursor; do { - adjustedLimit = adjustedLimit > 2048 ? 2048 : adjustedLimit * 2; + adjustedLimit = adjustedLimit >= 8192 ? 8192 : adjustedLimit * 2; + auto start = std::chrono::system_clock::now(); auto partial = doFetchLedgerPage(page.cursor, ledgerSequence, adjustedLimit); + auto end = std::chrono::system_clock::now(); + BOOST_LOG_TRIVIAL(debug) + << __func__ << " " << std::to_string(ledgerSequence) << " " + << std::to_string(adjustedLimit) << " " + << ripple::strHex(*page.cursor) << " - time = " + << std::to_string( + std::chrono::duration_cast( + end - start) + .count()); page.objects.insert( page.objects.end(), partial.objects.begin(), partial.objects.end()); page.cursor = partial.cursor; diff --git a/reporting/BackendInterface.h b/reporting/BackendInterface.h index 3c85a132..122497de 100644 --- a/reporting/BackendInterface.h +++ b/reporting/BackendInterface.h @@ -203,7 +203,8 @@ public: fetchLedgerPage( std::optional const& cursor, std::uint32_t ledgerSequence, - std::uint32_t limit) const; + std::uint32_t limit, + std::uint32_t limitHint = 0) const; bool isLedgerIndexed(std::uint32_t ledgerSequence) const; diff --git a/reporting/CassandraBackend.cpp b/reporting/CassandraBackend.cpp index 9ed4c1b8..f0d239d9 100644 --- a/reporting/CassandraBackend.cpp +++ b/reporting/CassandraBackend.cpp @@ -422,7 +422,7 @@ CassandraBackend::doFetchLedgerPage( CassandraResult result = executeSyncRead(statement); if (!!result) { - BOOST_LOG_TRIVIAL(trace) + BOOST_LOG_TRIVIAL(debug) << __func__ << " - got keys - size = " << result.numRows(); std::vector keys; @@ -430,7 +430,7 @@ CassandraBackend::doFetchLedgerPage( { keys.push_back(result.getUInt256()); } while (result.nextRow()); - if (keys.size() && keys.size() == limit) + if (keys.size() && keys.size() >= limit) { page.cursor = keys.back(); ++(*page.cursor); @@ -440,7 +440,7 @@ CassandraBackend::doFetchLedgerPage( throw std::runtime_error("Mismatch in size of objects and keys"); if (cursor) - BOOST_LOG_TRIVIAL(trace) + BOOST_LOG_TRIVIAL(debug) << __func__ << " Cursor = " << ripple::strHex(*page.cursor); for (size_t i = 0; i < objects.size(); ++i) diff --git a/reporting/PostgresBackend.cpp b/reporting/PostgresBackend.cpp index e833c754..c2adbade 100644 --- a/reporting/PostgresBackend.cpp +++ b/reporting/PostgresBackend.cpp @@ -351,7 +351,7 @@ PostgresBackend::doFetchLedgerPage( { keys.push_back({res.asUInt256(i, 0)}); } - if (numRows == limit) + if (numRows >= limit) { returnCursor = keys.back(); ++(*returnCursor); diff --git a/test.py b/test.py index 83066d1d..ae4390c1 100755 --- a/test.py +++ b/test.py @@ -436,9 +436,12 @@ async def ledger_data(ip, port, ledger, limit, binary, cursor): address = 'ws://' + str(ip) + ':' + str(port) try: async with websockets.connect(address) as ws: - await ws.send(json.dumps({"command":"ledger_data","ledger_index":int(ledger),"binary":bool(binary),"limit":int(limit),"cursor":cursor})) - await ws.send(json.dumps({"command":"ledger_data","ledger_index":int(ledger),"binary":bool(binary),"cursor":cursor})) + if limit is not None: + await ws.send(json.dumps({"command":"ledger_data","ledger_index":int(ledger),"binary":bool(binary),"limit":int(limit),"cursor":cursor})) + else: + await ws.send(json.dumps({"command":"ledger_data","ledger_index":int(ledger),"binary":bool(binary),"cursor":cursor})) res = json.loads(await ws.recv()) + print(res) objects = [] blobs = [] keys = [] @@ -598,7 +601,7 @@ async def book_offers(ip, port, ledger, pay_currency, pay_issuer, get_currency, req["cursor"] = cursor await ws.send(json.dumps(req)) res = json.loads(await ws.recv()) - #print(json.dumps(res,indent=4,sort_keys=True)) + print(json.dumps(res,indent=4,sort_keys=True)) if "result" in res: res = res["result"] for x in res["offers"]: