Export etl metrics to prometheus (#1256)

Fixes #1248.
This commit is contained in:
Sergey Kuznetsov
2024-03-14 11:37:31 +00:00
committed by GitHub
parent 010538d6fe
commit e83dfcbcc3
28 changed files with 708 additions and 51 deletions

View File

@@ -276,7 +276,7 @@ ETLService::ETLService(
{
startSequence_ = config.maybeValue<uint32_t>("start_sequence");
finishSequence_ = config.maybeValue<uint32_t>("finish_sequence");
state_.isReadOnly = config.valueOr("read_only", state_.isReadOnly);
state_.isReadOnly = config.valueOr("read_only", static_cast<bool>(state_.isReadOnly));
extractorThreads_ = config.valueOr<uint32_t>("extractor_threads", extractorThreads_);
txnThreshold_ = config.valueOr<size_t>("txn_threshold", txnThreshold_);
}

View File

@@ -212,8 +212,8 @@ public:
boost::json::object result;
result["etl_sources"] = loadBalancer_->toJson();
result["is_writer"] = state_.isWriting.load();
result["read_only"] = state_.isReadOnly;
result["is_writer"] = static_cast<int>(state_.isWriting);
result["read_only"] = static_cast<int>(state_.isReadOnly);
auto last = ledgerPublisher_.getLastPublish();
if (last.time_since_epoch().count() != 0)
result["last_publish_age_seconds"] = std::to_string(ledgerPublisher_.lastPublishAgeSeconds());

View File

@@ -19,6 +19,10 @@
#pragma once
#include "util/prometheus/Bool.hpp"
#include "util/prometheus/Label.hpp"
#include "util/prometheus/Prometheus.hpp"
#include <atomic>
namespace etl {
@@ -33,9 +37,19 @@ struct SystemState {
* In strict read-only mode, the process will never attempt to become the ETL writer, and will only publish ledgers
* as they are written to the database.
*/
bool isReadOnly = false;
util::prometheus::Bool isReadOnly = PrometheusService::boolMetric(
"read_only",
util::prometheus::Labels{},
"Whether the process is in strict read-only mode"
);
/** @brief Whether the process is writing to the database. */
util::prometheus::Bool isWriting = PrometheusService::boolMetric(
"etl_writing",
util::prometheus::Labels{},
"Whether the process is writing to the database"
);
std::atomic_bool isWriting = false; /**< @brief Whether the process is writing to the database. */
std::atomic_bool isStopping = false; /**< @brief Whether the software is stopping. */
std::atomic_bool writeConflict = false; /**< @brief Whether a write conflict was detected. */
@@ -46,7 +60,11 @@ struct SystemState {
* arrived from rippled and therefore can't extract the ledger diff. When this happens, Clio can't proceed with ETL
* and should log this error and only handle RPC requests.
*/
std::atomic_bool isAmendmentBlocked = false;
util::prometheus::Bool isAmendmentBlocked = PrometheusService::boolMetric(
"etl_amendment_blocked",
util::prometheus::Labels{},
"Whether clio detected an amendment block"
);
};
} // namespace etl

View File

@@ -25,6 +25,8 @@
#include "etl/SystemState.hpp"
#include "util/Assert.hpp"
#include "util/log/Logger.hpp"
#include "util/prometheus/Counter.hpp"
#include "util/prometheus/Prometheus.hpp"
#include <boost/asio/io_context.hpp>
#include <boost/asio/strand.hpp>
@@ -75,8 +77,11 @@ class LedgerPublisher {
std::chrono::time_point<ripple::NetClock> lastCloseTime_;
mutable std::shared_mutex closeTimeMtx_;
std::chrono::time_point<std::chrono::system_clock> lastPublish_;
mutable std::shared_mutex publishTimeMtx_;
std::reference_wrapper<util::prometheus::CounterInt> lastPublishSeconds_ = PrometheusService::counterInt(
"etl_last_publish_seconds",
{},
"Seconds since epoch of the last published ledger"
);
std::optional<uint32_t> lastPublishedSequence_;
mutable std::shared_mutex lastPublishedSeqMtx_;
@@ -232,8 +237,8 @@ public:
std::chrono::time_point<std::chrono::system_clock>
getLastPublish() const
{
std::shared_lock const lck(publishTimeMtx_);
return lastPublish_;
return std::chrono::time_point<std::chrono::system_clock>{std::chrono::seconds{lastPublishSeconds_.get().value()
}};
}
/**
@@ -273,8 +278,9 @@ private:
void
setLastPublishTime()
{
std::scoped_lock const lck(publishTimeMtx_);
lastPublish_ = std::chrono::system_clock::now();
using namespace std::chrono;
auto const nowSeconds = duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
lastPublishSeconds_.get().set(nowSeconds);
}
void

View File

@@ -0,0 +1,89 @@
//------------------------------------------------------------------------------
/*
This file is part of clio: https://github.com/XRPLF/clio
Copyright (c) 2024, the clio developers.
Permission to use, copy, modify, and distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
//==============================================================================
#pragma once
#include "util/Assert.hpp"
#include "util/prometheus/Gauge.hpp"
#include <cstdint>
#include <functional>
namespace util::prometheus {
template <typename T>
concept SomeBoolImpl = requires(T a) {
{
a.set(0)
} -> std::same_as<void>;
{
a.value()
} -> std::same_as<int64_t>;
};
/**
* @brief A wrapped to provide bool interface for a Prometheus metric
* @note Prometheus does not have a native bool type, so we use a counter with a value of 0 or 1
*/
template <SomeBoolImpl ImplType>
class AnyBool {
std::reference_wrapper<ImplType> impl_;
public:
/**
* @brief Construct a bool metric
*
* @param impl The implementation of the metric
*/
explicit AnyBool(ImplType& impl) : impl_(impl)
{
}
/**
* @brief Set the value of the bool metric
*
* @param value The value to set
* @return A reference to the metric
*/
AnyBool&
operator=(bool value)
{
impl_.get().set(value ? 1 : 0);
return *this;
}
/**
* @brief Get the value of the bool metric
*
* @return The value of the metric
*/
operator bool() const
{
auto const value = impl_.get().value();
ASSERT(value == 0 || value == 1, "Invalid value for bool: {}", value);
return value == 1;
}
};
/**
* @brief Alias for Prometheus bool metric with GaugeInt implementation
*/
using Bool = AnyBool<GaugeInt>;
} // namespace util::prometheus

View File

@@ -83,6 +83,18 @@ struct AnyCounter : MetricBase, impl::AnyCounterBase<NumberType> {
return *this;
}
/**
* @brief Set the value of the counter
*
* @param value The value to set the counter to
*/
void
set(ValueType value)
{
ASSERT(value >= this->value(), "Cannot decrease a counter {}", this->name());
this->pimpl_->set(value);
}
/**
* @brief Reset the counter to zero
*/

View File

@@ -21,6 +21,7 @@
#include "util/Assert.hpp"
#include "util/config/Config.hpp"
#include "util/prometheus/Bool.hpp"
#include "util/prometheus/Counter.hpp"
#include "util/prometheus/Gauge.hpp"
#include "util/prometheus/Histogram.hpp"
@@ -52,6 +53,13 @@ convertBaseTo(MetricBase& metricBase)
} // namespace
Bool
PrometheusImpl::boolMetric(std::string name, Labels labels, std::optional<std::string> description)
{
auto& metric = gaugeInt(std::move(name), std::move(labels), std::move(description));
return Bool{metric};
}
CounterInt&
PrometheusImpl::counterInt(std::string name, Labels labels, std::optional<std::string> description)
{
@@ -175,6 +183,12 @@ PrometheusService::init(util::Config const& config)
instance_ = std::make_unique<util::prometheus::PrometheusImpl>(enabled, compressReply);
}
util::prometheus::Bool
PrometheusService::boolMetric(std::string name, util::prometheus::Labels labels, std::optional<std::string> description)
{
return instance().boolMetric(std::move(name), std::move(labels), std::move(description));
}
util::prometheus::CounterInt&
PrometheusService::counterInt(std::string name, util::prometheus::Labels labels, std::optional<std::string> description)
{

View File

@@ -20,6 +20,7 @@
#pragma once
#include "util/config/Config.hpp"
#include "util/prometheus/Bool.hpp"
#include "util/prometheus/Counter.hpp"
#include "util/prometheus/Gauge.hpp"
#include "util/prometheus/Histogram.hpp"
@@ -54,6 +55,18 @@ public:
virtual ~PrometheusInterface() = default;
/**
* @brief Get a bool based metric. It will be created if it doesn't exist
* @note Prometheus does not have a native bool type, so we use a counter with a value of 0 or 1
*
* @param name The name of the metric
* @param labels The labels of the metric
* @param description The description of the metric
* @return The bool object
*/
virtual Bool
boolMetric(std::string name, Labels labels, std::optional<std::string> description = std::nullopt) = 0;
/**
* @brief Get an integer based counter metric. It will be created if it doesn't exist
*
@@ -176,6 +189,9 @@ class PrometheusImpl : public PrometheusInterface {
public:
using PrometheusInterface::PrometheusInterface;
Bool
boolMetric(std::string name, Labels labels, std::optional<std::string> description = std::nullopt) override;
CounterInt&
counterInt(std::string name, Labels labels, std::optional<std::string> description) override;
@@ -242,6 +258,22 @@ public:
*/
void static init(util::Config const& config = util::Config{});
/**
* @brief Get a bool based metric. It will be created if it doesn't exist
* @note Prometheus does not have a native bool type, so we use a counter with a value of 0 or 1
*
* @param name The name of the metric
* @param labels The labels of the metric
* @param description The description of the metric
* @return The bool object
*/
static util::prometheus::Bool
boolMetric(
std::string name,
util::prometheus::Labels labels,
std::optional<std::string> description = std::nullopt
);
/**
* @brief Get an integer based counter metric. It will be created if it doesn't exist
*