feat: ETLng task manager (#1843)

This commit is contained in:
Alex Kremer
2025-01-29 15:29:13 +00:00
committed by GitHub
parent 3e200d8b9d
commit 73f375f20d
19 changed files with 769 additions and 29 deletions

View File

@@ -40,6 +40,7 @@ target_sources(
etlng/GrpcSourceTests.cpp
etlng/RegistryTests.cpp
etlng/SchedulingTests.cpp
etlng/TaskManagerTests.cpp
etlng/LoadingTests.cpp
# Feed
util/BytesConverterTests.cpp
@@ -129,6 +130,7 @@ target_sources(
util/ConceptsTests.cpp
util/CoroutineGroupTests.cpp
util/LedgerUtilsTests.cpp
util/StrandedPriorityQueueTests.cpp
# Prometheus support
util/prometheus/BoolTests.cpp
util/prometheus/CounterTests.cpp

View File

@@ -0,0 +1,137 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include "etlng/ExtractorInterface.hpp"
#include "etlng/LoaderInterface.hpp"
#include "etlng/Models.hpp"
#include "etlng/SchedulerInterface.hpp"
#include "etlng/impl/Loading.hpp"
#include "etlng/impl/TaskManager.hpp"
#include "util/BinaryTestObject.hpp"
#include "util/LoggerFixtures.hpp"
#include "util/TestObject.hpp"
#include "util/async/AnyExecutionContext.hpp"
#include "util/async/context/BasicExecutionContext.hpp"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <xrpl/protocol/LedgerHeader.h>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <optional>
#include <semaphore>
#include <vector>
using namespace etlng::model;
using namespace etlng::impl;
namespace {
constinit auto const kSEQ = 30;
constinit auto const kLEDGER_HASH = "4BC50C9B0D8515D3EAAE1E74B29A95804346C491EE1A95BF25E4AAB854A6A652";
struct MockScheduler : etlng::SchedulerInterface {
MOCK_METHOD(std::optional<Task>, next, (), (override));
};
struct MockExtractor : etlng::ExtractorInterface {
MOCK_METHOD(std::optional<LedgerData>, extractLedgerWithDiff, (uint32_t), (override));
MOCK_METHOD(std::optional<LedgerData>, extractLedgerOnly, (uint32_t), (override));
};
struct MockLoader : etlng::LoaderInterface {
MOCK_METHOD(void, load, (LedgerData const&), (override));
MOCK_METHOD(std::optional<ripple::LedgerHeader>, loadInitialLedger, (LedgerData const&), (override));
};
struct TaskManagerTests : NoLoggerFixture {
using MockSchedulerType = testing::NiceMock<MockScheduler>;
using MockExtractorType = testing::NiceMock<MockExtractor>;
using MockLoaderType = testing::NiceMock<MockLoader>;
protected:
util::async::CoroExecutionContext ctx_{2};
std::shared_ptr<MockSchedulerType> mockSchedulerPtr_ = std::make_shared<MockSchedulerType>();
std::shared_ptr<MockExtractorType> mockExtractorPtr_ = std::make_shared<MockExtractorType>();
std::shared_ptr<MockLoaderType> mockLoaderPtr_ = std::make_shared<MockLoaderType>();
TaskManager taskManager_{ctx_, *mockSchedulerPtr_, *mockExtractorPtr_, *mockLoaderPtr_};
};
auto
createTestData(uint32_t seq)
{
auto const header = createLedgerHeader(kLEDGER_HASH, seq);
return LedgerData{
.transactions = {},
.objects = {util::createObject(), util::createObject(), util::createObject()},
.successors = {},
.edgeKeys = {},
.header = header,
.rawHeader = {},
.seq = seq
};
}
} // namespace
TEST_F(TaskManagerTests, LoaderGetsDataIfNextSequenceIsExtracted)
{
static constexpr auto kTOTAL = 64uz;
static constexpr auto kEXTRACTORS = 5uz;
static constexpr auto kLOADERS = 1uz;
std::atomic_uint32_t seq = kSEQ;
std::vector<uint32_t> loaded;
std::binary_semaphore done{0};
EXPECT_CALL(*mockSchedulerPtr_, next()).WillRepeatedly([&]() {
return Task{.priority = Task::Priority::Higher, .seq = seq++};
});
EXPECT_CALL(*mockExtractorPtr_, extractLedgerWithDiff(testing::_))
.WillRepeatedly([](uint32_t seq) -> std::optional<LedgerData> {
if (seq > kSEQ + kTOTAL - 1)
return std::nullopt;
return createTestData(seq);
});
EXPECT_CALL(*mockLoaderPtr_, load(testing::_)).Times(kTOTAL).WillRepeatedly([&](LedgerData data) {
loaded.push_back(data.seq);
if (loaded.size() == kTOTAL) {
done.release();
}
});
auto loop = ctx_.execute([&] { taskManager_.run({.numExtractors = kEXTRACTORS, .numLoaders = kLOADERS}); });
done.acquire();
taskManager_.stop();
loop.wait();
EXPECT_EQ(loaded.size(), kTOTAL);
for (std::size_t i = 0; i < loaded.size(); ++i) {
EXPECT_EQ(loaded[i], kSEQ + i);
}
}

View File

@@ -0,0 +1,196 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2025, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#include "util/StrandedPriorityQueue.hpp"
#include "util/async/AnyExecutionContext.hpp"
#include "util/async/AnyOperation.hpp"
#include "util/async/context/BasicExecutionContext.hpp"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <thread>
#include <unordered_set>
#include <vector>
using namespace util;
namespace {
struct TestData {
uint32_t seq;
auto
operator<=>(TestData const&) const = default;
};
} // namespace
TEST(StrandedPriorityQueueTests, DefaultPriority)
{
util::async::CoroExecutionContext ctx;
StrandedPriorityQueue<TestData> queue{ctx.makeStrand()};
for (auto i = 0u; i < 100u; ++i) {
EXPECT_TRUE(queue.enqueue(TestData{.seq = i}));
}
EXPECT_FALSE(queue.empty());
auto next = 99u;
while (auto maybeValue = queue.dequeue()) {
EXPECT_EQ(maybeValue->seq, next--);
}
EXPECT_TRUE(queue.empty());
}
TEST(StrandedPriorityQueueTests, CustomPriority)
{
struct Comp {
[[nodiscard]] bool
operator()(TestData const& lhs, TestData const& rhs) const noexcept
{
return lhs.seq > rhs.seq;
}
};
util::async::CoroExecutionContext ctx;
StrandedPriorityQueue<TestData, Comp> queue{ctx.makeStrand()};
for (auto i = 0u; i < 100u; ++i) {
EXPECT_TRUE(queue.enqueue(TestData{.seq = i}));
}
EXPECT_FALSE(queue.empty());
auto next = 0u;
while (auto maybeValue = queue.dequeue()) {
EXPECT_EQ(maybeValue->seq, next++);
}
EXPECT_TRUE(queue.empty());
}
TEST(StrandedPriorityQueueTests, MultipleThreadsUnlimitedQueue)
{
async::CoroExecutionContext realCtx{6};
async::AnyExecutionContext ctx{realCtx};
StrandedPriorityQueue<TestData> queue{ctx.makeStrand()};
EXPECT_TRUE(queue.empty());
static constexpr auto kTOTAL_THREADS = 5u;
static constexpr auto kTOTAL_ITEMS_PER_THREAD = 100u;
std::atomic_size_t totalEnqueued = 0uz;
std::vector<async::AnyOperation<void>> tasks;
tasks.reserve(kTOTAL_THREADS);
for (auto batchIdx = 0u; batchIdx < kTOTAL_THREADS; ++batchIdx) {
// enqueue batches tasks running on multiple threads
tasks.push_back(ctx.execute([&queue, batchIdx, &totalEnqueued] {
for (auto i = 0u; i < kTOTAL_ITEMS_PER_THREAD; ++i) {
if (queue.enqueue(TestData{.seq = (batchIdx * kTOTAL_ITEMS_PER_THREAD) + i}))
++totalEnqueued;
}
}));
}
for (auto& task : tasks)
task.wait();
auto next = (kTOTAL_ITEMS_PER_THREAD * kTOTAL_THREADS) - 1;
while (auto maybeValue = queue.dequeue()) {
EXPECT_EQ(maybeValue->seq, next--);
}
EXPECT_TRUE(queue.empty());
EXPECT_EQ(totalEnqueued, kTOTAL_ITEMS_PER_THREAD * kTOTAL_THREADS);
}
TEST(StrandedPriorityQueueTests, MultipleThreadsLimitedQueue)
{
static constexpr auto kQUEUE_SIZE_LIMIT = 32uz;
static constexpr auto kTOTAL_THREADS = 5u;
static constexpr auto kTOTAL_ITEMS_PER_THREAD = 100u;
async::CoroExecutionContext realCtx{8};
async::AnyExecutionContext ctx{realCtx};
StrandedPriorityQueue<TestData> queue{ctx.makeStrand(), kQUEUE_SIZE_LIMIT};
EXPECT_TRUE(queue.empty());
std::atomic_size_t totalEnqueued = 0uz;
std::atomic_size_t totalSleepCycles = 0uz;
std::vector<async::AnyOperation<void>> tasks;
tasks.reserve(kTOTAL_THREADS);
std::unordered_set<uint32_t> expectedSequences;
for (auto batchIdx = 0u; batchIdx < kTOTAL_THREADS; ++batchIdx) {
for (auto i = 0u; i < kTOTAL_ITEMS_PER_THREAD; ++i) {
expectedSequences.insert((batchIdx * kTOTAL_ITEMS_PER_THREAD) + i);
}
// enqueue batches tasks running on multiple threads
tasks.push_back(ctx.execute([&queue, batchIdx, &totalEnqueued, &totalSleepCycles] {
for (auto i = 0u; i < kTOTAL_ITEMS_PER_THREAD; ++i) {
auto data = TestData{.seq = (batchIdx * kTOTAL_ITEMS_PER_THREAD) + i};
while (not queue.enqueue(data)) {
std::this_thread::sleep_for(std::chrono::nanoseconds{1});
++totalSleepCycles;
}
++totalEnqueued;
}
}));
}
EXPECT_FALSE(expectedSequences.empty());
auto loader = ctx.execute([&queue, &expectedSequences] {
while (not expectedSequences.empty()) {
while (auto maybeValue = queue.dequeue()) {
EXPECT_TRUE(expectedSequences.contains(maybeValue->seq));
expectedSequences.erase(maybeValue->seq);
}
}
});
for (auto& task : tasks)
task.wait();
loader.wait();
EXPECT_TRUE(queue.empty());
EXPECT_TRUE(expectedSequences.empty());
EXPECT_EQ(totalEnqueued, kTOTAL_ITEMS_PER_THREAD * kTOTAL_THREADS);
EXPECT_GE(totalSleepCycles, 1uz);
}
TEST(StrandedPriorityQueueTests, ReturnsNulloptIfQueueEmpty)
{
async::CoroExecutionContext realCtx;
StrandedPriorityQueue<TestData> queue{realCtx.makeStrand()};
EXPECT_TRUE(queue.empty());
auto maybeValue = queue.dequeue();
EXPECT_FALSE(maybeValue.has_value());
}