fix(telemetry): StatsD gauge and io_latency first-sample emit

Two fixes so gauges register in Prometheus (via StatsD) even when their
initial/steady-state value is 0:

1. StatsDGaugeImpl m_dirty: default-init to true so the initial value
   (0) is emitted on the first flush. Previously, gauges whose value
   never changed from 0 were never flushed and never appeared
   downstream.

2. io_latency_sampler firstSample_: new atomic<bool>, init true.
   m_event.notify now fires when either firstSample_ is true (exchanged
   to false) or lastSample >= 10 ms. This guarantees the io_latency
   metric is registered on startup; subsequent sub-10 ms samples are
   still suppressed to avoid flooding.
This commit is contained in:
Pratik Mankawde
2026-05-13 14:40:58 +01:00
parent beaf01ae4d
commit 580ee5ede7
2 changed files with 10 additions and 3 deletions

View File

@@ -166,7 +166,7 @@ private:
std::string m_name;
GaugeImpl::value_type m_last_value{0};
GaugeImpl::value_type m_value{0};
bool m_dirty{false};
bool m_dirty{true};
};
//------------------------------------------------------------------------------
@@ -583,6 +583,9 @@ StatsDEventImpl::do_notify(EventImpl::value_type const& value)
StatsDGaugeImpl::StatsDGaugeImpl(std::string name, std::shared_ptr<StatsDCollectorImp> const& impl)
: m_impl(impl), m_name(std::move(name))
{
// Start dirty so the initial value (0) is emitted on the first flush.
// Without this, gauges whose value never changes from 0 would never
// appear in downstream metric stores (e.g. Prometheus via StatsD).
m_impl->add(*this);
}

View File

@@ -149,6 +149,7 @@ private:
beast::Journal m_journal;
beast::io_latency_probe<std::chrono::steady_clock> m_probe;
std::atomic<std::chrono::milliseconds> lastSample_;
std::atomic<bool> firstSample_;
public:
io_latency_sampler(
@@ -156,7 +157,7 @@ private:
beast::Journal journal,
std::chrono::milliseconds interval,
boost::asio::io_context& ios)
: m_event(std::move(ev)), m_journal(journal), m_probe(interval, ios)
: m_event(std::move(ev)), m_journal(journal), m_probe(interval, ios), firstSample_(true)
{
}
@@ -175,7 +176,10 @@ private:
lastSample_ = lastSample;
if (lastSample >= 10ms)
// Always emit the first sample so the metric is registered in
// downstream stores (Prometheus via StatsD). After that, only
// report latency >= 10 ms to avoid flooding with sub-ms values.
if (firstSample_.exchange(false) || lastSample >= 10ms)
m_event.notify(lastSample);
if (lastSample >= 500ms)
{