From 694abe2004c299dae81ed97ebcbf75e79d605092 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 29 Apr 2026 13:00:39 +0100 Subject: [PATCH] docs(telemetry): add thread-safety comments to stop() and sdkProvider_.reset() Co-Authored-By: Claude Opus 4.6 --- src/libxrpl/telemetry/Telemetry.cpp | 7 +++++++ src/xrpld/app/main/Application.cpp | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index f7fb64d5dd..3b212dc4fa 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -315,6 +315,13 @@ public: // Force flush with timeout to avoid blocking indefinitely // when the OTLP endpoint is unreachable. sdkProvider_->ForceFlush(std::chrono::milliseconds(5000)); + // TODO: sdkProvider_ is not thread-safe. This reset() races with + // getTracer() if any thread is still calling startSpan(). + // Currently safe because Application::stop() shuts down + // serverHandler_, overlay_, and jobQueue_ before calling + // telemetry_->stop() — so no callers should remain. If the + // shutdown order ever changes, add an std::atomic stopped_ + // flag checked in getTracer() to make this robust. sdkProvider_.reset(); trace_api::Provider::SetTracerProvider( opentelemetry::nostd::shared_ptr( diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp index e222660c39..cad96f382b 100644 --- a/src/xrpld/app/main/Application.cpp +++ b/src/xrpld/app/main/Application.cpp @@ -1661,6 +1661,10 @@ ApplicationImp::run() ledgerCleaner_->stop(); m_nodeStore->stop(); perfLog_->stop(); + // Telemetry must stop last among trace-producing components. + // serverHandler_, overlay_, and jobQueue_ are already stopped above, + // so no threads should be calling startSpan() at this point. + // See TODO in TelemetryImpl::stop() re: thread-safety of sdkProvider_. telemetry_->stop(); JLOG(m_journal.info()) << "Done.";