mirror of
https://github.com/XRPLF/clio.git
synced 2025-11-25 22:25:55 +00:00
refactor: Load balancer forwarding metrics (#2103)
This commit is contained in:
@@ -61,14 +61,10 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
using namespace util::config;
|
using namespace util::config;
|
||||||
using namespace util::prometheus;
|
using util::prometheus::Labels;
|
||||||
|
|
||||||
namespace etl {
|
namespace etl {
|
||||||
|
|
||||||
namespace {
|
|
||||||
std::vector<std::int64_t> const kHISTOGRAM_BUCKETS{1, 2, 5, 10, 20, 50, 100, 200, 500, 700, 1000};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
std::shared_ptr<etlng::LoadBalancerInterface>
|
std::shared_ptr<etlng::LoadBalancerInterface>
|
||||||
LoadBalancer::makeLoadBalancer(
|
LoadBalancer::makeLoadBalancer(
|
||||||
ClioConfigDefinition const& config,
|
ClioConfigDefinition const& config,
|
||||||
@@ -92,27 +88,33 @@ LoadBalancer::LoadBalancer(
|
|||||||
std::shared_ptr<NetworkValidatedLedgersInterface> validatedLedgers,
|
std::shared_ptr<NetworkValidatedLedgersInterface> validatedLedgers,
|
||||||
SourceFactory sourceFactory
|
SourceFactory sourceFactory
|
||||||
)
|
)
|
||||||
: forwardedDurationHistogram_(PrometheusService::histogramInt(
|
: forwardingCounters_{
|
||||||
"lb_forwarded_duration_milliseconds_histogram",
|
.successDuration = PrometheusService::counterInt(
|
||||||
Labels(),
|
"forwarding_duration_milliseconds_counter",
|
||||||
kHISTOGRAM_BUCKETS,
|
Labels({util::prometheus::Label{"status", "success"}}),
|
||||||
"The duration of processing forwarded requests"
|
"The duration of processing successful forwarded requests"
|
||||||
))
|
),
|
||||||
, forwardedRetryCounter_(PrometheusService::counterInt(
|
.failDuration = PrometheusService::counterInt(
|
||||||
"lb_forwarded_retry_counter",
|
"forwarding_duration_milliseconds_counter",
|
||||||
Labels(),
|
Labels({util::prometheus::Label{"status", "fail"}}),
|
||||||
"The number of retries before a forwarded request was successful. Initial attempt excluded"
|
"The duration of processing failed forwarded requests"
|
||||||
))
|
),
|
||||||
, cacheTriedCounter_(PrometheusService::counterInt(
|
.retries = PrometheusService::counterInt(
|
||||||
"lb_cache_tried_counter",
|
"forwarding_retries_counter",
|
||||||
Labels(),
|
Labels(),
|
||||||
"The number of requests that we tried to serve from the cache"
|
"The number of retries before a forwarded request was successful. Initial attempt excluded"
|
||||||
))
|
),
|
||||||
, cacheMissCounter_(PrometheusService::counterInt(
|
.cacheHit = PrometheusService::counterInt(
|
||||||
"lb_cache_miss_counter",
|
"forwarding_cache_hit_counter",
|
||||||
Labels(),
|
Labels(),
|
||||||
"The number of requests that were not served from the cache"
|
"The number of requests that we served from the cache"
|
||||||
))
|
),
|
||||||
|
.cacheMiss = PrometheusService::counterInt(
|
||||||
|
"forwarding_cache_miss_counter",
|
||||||
|
Labels(),
|
||||||
|
"The number of requests that were not served from the cache"
|
||||||
|
)
|
||||||
|
}
|
||||||
{
|
{
|
||||||
auto const forwardingCacheTimeout = config.get<float>("forwarding.cache_timeout");
|
auto const forwardingCacheTimeout = config.get<float>("forwarding.cache_timeout");
|
||||||
if (forwardingCacheTimeout > 0.f) {
|
if (forwardingCacheTimeout > 0.f) {
|
||||||
@@ -266,11 +268,11 @@ LoadBalancer::forwardToRippled(
|
|||||||
auto const cmd = boost::json::value_to<std::string>(request.at("command"));
|
auto const cmd = boost::json::value_to<std::string>(request.at("command"));
|
||||||
|
|
||||||
if (forwardingCache_ and forwardingCache_->shouldCache(cmd)) {
|
if (forwardingCache_ and forwardingCache_->shouldCache(cmd)) {
|
||||||
++cacheTriedCounter_.get();
|
bool servedFromCache = true;
|
||||||
|
|
||||||
auto updater =
|
auto updater =
|
||||||
[this, &request, &clientIp, isAdmin](boost::asio::yield_context yield
|
[this, &request, &clientIp, &servedFromCache, isAdmin](boost::asio::yield_context yield
|
||||||
) -> std::expected<util::ResponseExpirationCache::EntryData, util::ResponseExpirationCache::Error> {
|
) -> std::expected<util::ResponseExpirationCache::EntryData, util::ResponseExpirationCache::Error> {
|
||||||
|
servedFromCache = false;
|
||||||
auto result = forwardToRippledImpl(request, clientIp, isAdmin, yield);
|
auto result = forwardToRippledImpl(request, clientIp, isAdmin, yield);
|
||||||
if (result.has_value()) {
|
if (result.has_value()) {
|
||||||
return util::ResponseExpirationCache::EntryData{
|
return util::ResponseExpirationCache::EntryData{
|
||||||
@@ -288,6 +290,9 @@ LoadBalancer::forwardToRippled(
|
|||||||
std::move(updater),
|
std::move(updater),
|
||||||
[](util::ResponseExpirationCache::EntryData const& entry) { return not entry.response.contains("error"); }
|
[](util::ResponseExpirationCache::EntryData const& entry) { return not entry.response.contains("error"); }
|
||||||
);
|
);
|
||||||
|
if (servedFromCache) {
|
||||||
|
++forwardingCounters_.cacheHit.get();
|
||||||
|
}
|
||||||
if (result.has_value()) {
|
if (result.has_value()) {
|
||||||
return std::move(result).value();
|
return std::move(result).value();
|
||||||
}
|
}
|
||||||
@@ -395,7 +400,7 @@ LoadBalancer::forwardToRippledImpl(
|
|||||||
boost::asio::yield_context yield
|
boost::asio::yield_context yield
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
++cacheMissCounter_.get();
|
++forwardingCounters_.cacheMiss.get();
|
||||||
|
|
||||||
ASSERT(not sources_.empty(), "ETL sources must be configured to forward requests.");
|
ASSERT(not sources_.empty(), "ETL sources must be configured to forward requests.");
|
||||||
std::size_t sourceIdx = util::Random::uniform(0ul, sources_.size() - 1);
|
std::size_t sourceIdx = util::Random::uniform(0ul, sources_.size() - 1);
|
||||||
@@ -409,13 +414,14 @@ LoadBalancer::forwardToRippledImpl(
|
|||||||
while (numAttempts < sources_.size()) {
|
while (numAttempts < sources_.size()) {
|
||||||
auto [res, duration] =
|
auto [res, duration] =
|
||||||
util::timed([&]() { return sources_[sourceIdx]->forwardToRippled(request, clientIp, xUserValue, yield); });
|
util::timed([&]() { return sources_[sourceIdx]->forwardToRippled(request, clientIp, xUserValue, yield); });
|
||||||
forwardedDurationHistogram_.get().observe(duration);
|
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
|
forwardingCounters_.successDuration.get() += duration;
|
||||||
response = std::move(res).value();
|
response = std::move(res).value();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
++forwardedRetryCounter_.get();
|
forwardingCounters_.failDuration.get() += duration;
|
||||||
|
++forwardingCounters_.retries.get();
|
||||||
error = std::max(error, res.error()); // Choose the best result between all sources
|
error = std::max(error, res.error()); // Choose the best result between all sources
|
||||||
|
|
||||||
sourceIdx = (sourceIdx + 1) % sources_.size();
|
sourceIdx = (sourceIdx + 1) % sources_.size();
|
||||||
|
|||||||
@@ -33,7 +33,6 @@
|
|||||||
#include "util/log/Logger.hpp"
|
#include "util/log/Logger.hpp"
|
||||||
#include "util/newconfig/ConfigDefinition.hpp"
|
#include "util/newconfig/ConfigDefinition.hpp"
|
||||||
#include "util/prometheus/Counter.hpp"
|
#include "util/prometheus/Counter.hpp"
|
||||||
#include "util/prometheus/Histogram.hpp"
|
|
||||||
|
|
||||||
#include <boost/asio.hpp>
|
#include <boost/asio.hpp>
|
||||||
#include <boost/asio/io_context.hpp>
|
#include <boost/asio/io_context.hpp>
|
||||||
@@ -94,10 +93,13 @@ private:
|
|||||||
std::uint32_t downloadRanges_ =
|
std::uint32_t downloadRanges_ =
|
||||||
kDEFAULT_DOWNLOAD_RANGES; /*< The number of markers to use when downloading initial ledger */
|
kDEFAULT_DOWNLOAD_RANGES; /*< The number of markers to use when downloading initial ledger */
|
||||||
|
|
||||||
std::reference_wrapper<util::prometheus::HistogramInt> forwardedDurationHistogram_;
|
struct ForwardingCounters {
|
||||||
std::reference_wrapper<util::prometheus::CounterInt> forwardedRetryCounter_;
|
std::reference_wrapper<util::prometheus::CounterInt> successDuration;
|
||||||
std::reference_wrapper<util::prometheus::CounterInt> cacheTriedCounter_;
|
std::reference_wrapper<util::prometheus::CounterInt> failDuration;
|
||||||
std::reference_wrapper<util::prometheus::CounterInt> cacheMissCounter_;
|
std::reference_wrapper<util::prometheus::CounterInt> retries;
|
||||||
|
std::reference_wrapper<util::prometheus::CounterInt> cacheHit;
|
||||||
|
std::reference_wrapper<util::prometheus::CounterInt> cacheMiss;
|
||||||
|
} forwardingCounters_;
|
||||||
|
|
||||||
// Using mutex instead of atomic_bool because choosing a new source to
|
// Using mutex instead of atomic_bool because choosing a new source to
|
||||||
// forward messages should be done with a mutual exclusion otherwise there will be a race condition
|
// forward messages should be done with a mutual exclusion otherwise there will be a race condition
|
||||||
|
|||||||
@@ -59,6 +59,7 @@
|
|||||||
using namespace etl;
|
using namespace etl;
|
||||||
using namespace util::config;
|
using namespace util::config;
|
||||||
using testing::Return;
|
using testing::Return;
|
||||||
|
using namespace util::prometheus;
|
||||||
|
|
||||||
constexpr static auto const kTWO_SOURCES_LEDGER_RESPONSE = R"({
|
constexpr static auto const kTWO_SOURCES_LEDGER_RESPONSE = R"({
|
||||||
"etl_sources": [
|
"etl_sources": [
|
||||||
@@ -641,6 +642,71 @@ TEST_F(LoadBalancerForwardToRippledTests, source0Fails)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct LoadBalancerForwardToRippledPrometheusTests : LoadBalancerForwardToRippledTests, WithMockPrometheus {};
|
||||||
|
|
||||||
|
TEST_F(LoadBalancerForwardToRippledPrometheusTests, forwardingCacheEnabled)
|
||||||
|
{
|
||||||
|
configJson_.as_object()["forwarding"] = boost::json::object{{"cache_timeout", 10.}};
|
||||||
|
EXPECT_CALL(sourceFactory_, makeSource).Times(2);
|
||||||
|
auto loadBalancer = makeLoadBalancer();
|
||||||
|
|
||||||
|
auto const request = boost::json::object{{"command", "server_info"}};
|
||||||
|
|
||||||
|
auto& cacheHitCounter = makeMock<CounterInt>("forwarding_cache_hit_counter", "");
|
||||||
|
auto& cacheMissCounter = makeMock<CounterInt>("forwarding_cache_miss_counter", "");
|
||||||
|
auto& successDurationCounter =
|
||||||
|
makeMock<CounterInt>("forwarding_duration_milliseconds_counter", "{status=\"success\"}");
|
||||||
|
|
||||||
|
EXPECT_CALL(cacheMissCounter, add(1));
|
||||||
|
EXPECT_CALL(cacheHitCounter, add(1)).Times(3);
|
||||||
|
EXPECT_CALL(successDurationCounter, add(testing::_));
|
||||||
|
|
||||||
|
EXPECT_CALL(
|
||||||
|
sourceFactory_.sourceAt(0),
|
||||||
|
forwardToRippled(request, clientIP_, LoadBalancer::kUSER_FORWARDING_X_USER_VALUE, testing::_)
|
||||||
|
)
|
||||||
|
.WillOnce(Return(response_));
|
||||||
|
|
||||||
|
runSpawn([&](boost::asio::yield_context yield) {
|
||||||
|
EXPECT_EQ(loadBalancer->forwardToRippled(request, clientIP_, false, yield), response_);
|
||||||
|
EXPECT_EQ(loadBalancer->forwardToRippled(request, clientIP_, false, yield), response_);
|
||||||
|
EXPECT_EQ(loadBalancer->forwardToRippled(request, clientIP_, false, yield), response_);
|
||||||
|
EXPECT_EQ(loadBalancer->forwardToRippled(request, clientIP_, false, yield), response_);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(LoadBalancerForwardToRippledPrometheusTests, source0Fails)
|
||||||
|
{
|
||||||
|
EXPECT_CALL(sourceFactory_, makeSource).Times(2);
|
||||||
|
auto loadBalancer = makeLoadBalancer();
|
||||||
|
|
||||||
|
auto& cacheMissCounter = makeMock<CounterInt>("forwarding_cache_miss_counter", "");
|
||||||
|
auto& retriesCounter = makeMock<CounterInt>("forwarding_retries_counter", "");
|
||||||
|
auto& successDurationCounter =
|
||||||
|
makeMock<CounterInt>("forwarding_duration_milliseconds_counter", "{status=\"success\"}");
|
||||||
|
auto& failDurationCounter = makeMock<CounterInt>("forwarding_duration_milliseconds_counter", "{status=\"fail\"}");
|
||||||
|
|
||||||
|
EXPECT_CALL(cacheMissCounter, add(1));
|
||||||
|
EXPECT_CALL(retriesCounter, add(1));
|
||||||
|
EXPECT_CALL(successDurationCounter, add(testing::_));
|
||||||
|
EXPECT_CALL(failDurationCounter, add(testing::_));
|
||||||
|
|
||||||
|
EXPECT_CALL(
|
||||||
|
sourceFactory_.sourceAt(0),
|
||||||
|
forwardToRippled(request_, clientIP_, LoadBalancer::kUSER_FORWARDING_X_USER_VALUE, testing::_)
|
||||||
|
)
|
||||||
|
.WillOnce(Return(std::unexpected{rpc::ClioError::EtlConnectionError}));
|
||||||
|
EXPECT_CALL(
|
||||||
|
sourceFactory_.sourceAt(1),
|
||||||
|
forwardToRippled(request_, clientIP_, LoadBalancer::kUSER_FORWARDING_X_USER_VALUE, testing::_)
|
||||||
|
)
|
||||||
|
.WillOnce(Return(response_));
|
||||||
|
|
||||||
|
runSpawn([&](boost::asio::yield_context yield) {
|
||||||
|
EXPECT_EQ(loadBalancer->forwardToRippled(request_, clientIP_, false, yield), response_);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
struct LoadBalancerForwardToRippledErrorTestBundle {
|
struct LoadBalancerForwardToRippledErrorTestBundle {
|
||||||
std::string testName;
|
std::string testName;
|
||||||
rpc::ClioError firstSourceError;
|
rpc::ClioError firstSourceError;
|
||||||
|
|||||||
Reference in New Issue
Block a user